xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision e32f2f54e699d0aa6e733466c00da7e34666fe5e)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
4c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
549b5e25fSSatish Balay #include "petscbt.h"
67c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h"
7f3da1532SBarry Smith #include "petscblaslapack.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
155d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
165d0c19d7SBarry Smith   const PetscInt *idx;
17521d7252SBarry Smith   PetscBT        table,table0;
18d94109b8SHong Zhang 
19d94109b8SHong Zhang   PetscFunctionBegin;
20*e32f2f54SBarry Smith   if (ov < 0)  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
21d94109b8SHong Zhang   mbs = a->mbs;
22d94109b8SHong Zhang   ai  = a->i;
23d94109b8SHong Zhang   aj  = a->j;
24d0f46423SBarry Smith   bs  = A->rmap->bs;
25d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2613f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
27d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
28d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
29d94109b8SHong Zhang 
30d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
31d94109b8SHong Zhang     isz  = 0;
32d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
33d94109b8SHong Zhang 
34d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
35d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37d94109b8SHong Zhang 
38d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
39dbe03f88SHong Zhang     bcol_max = 0;
40d94109b8SHong Zhang     for (j=0; j<n ; ++j){
41d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
42*e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
44dbe03f88SHong Zhang         nidx[isz++] = brow;
45dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
46dbe03f88SHong Zhang       }
47d94109b8SHong Zhang     }
48d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
49d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
50d94109b8SHong Zhang 
51d94109b8SHong Zhang     k = 0;
52d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
53d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
54d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
55efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
56d94109b8SHong Zhang 
57d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
58d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
59d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
60d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
61d94109b8SHong Zhang           for (l = start; l<end ; l++){
62d94109b8SHong Zhang             bcol = aj[l];
63d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
64d94109b8SHong Zhang           }
65d94109b8SHong Zhang           k++;
66d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
67d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
68d94109b8SHong Zhang           for (l = start; l<end ; l++){
69d94109b8SHong Zhang             bcol = aj[l];
70dbe03f88SHong Zhang             if (bcol > bcol_max) break;
71d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
72d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
73d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
74d94109b8SHong Zhang             }
75d94109b8SHong Zhang           }
76d94109b8SHong Zhang         }
77d94109b8SHong Zhang       }
78d94109b8SHong Zhang     } /* for each overlap */
79d94109b8SHong Zhang 
80d94109b8SHong Zhang     /* expand the Index Set */
81d94109b8SHong Zhang     for (j=0; j<isz; j++) {
82d94109b8SHong Zhang       for (k=0; k<bs; k++)
83d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
84d94109b8SHong Zhang     }
85d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
86d94109b8SHong Zhang   }
87d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
915eee224dSHong Zhang   PetscFunctionReturn(0);
9249b5e25fSSatish Balay }
9349b5e25fSSatish Balay 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
964aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9749b5e25fSSatish Balay {
9849b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
996849ba73SBarry Smith   PetscErrorCode ierr;
10013f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10113f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1025d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1035d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10449b5e25fSSatish Balay   MatScalar      *mat_a;
10549b5e25fSSatish Balay   Mat            C;
10614ca34e6SBarry Smith   PetscTruth     flag,sorted;
10749b5e25fSSatish Balay 
10849b5e25fSSatish Balay   PetscFunctionBegin;
109*e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
11014ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
111*e32f2f54SBarry Smith   if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11249b5e25fSSatish Balay 
11349b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11449b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11549b5e25fSSatish Balay 
11674ed9c26SBarry Smith   ierr  = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11774ed9c26SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11849b5e25fSSatish Balay   ssmap = smap;
11913f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
12049b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12149b5e25fSSatish Balay   /* determine lens of each row */
12249b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12349b5e25fSSatish Balay     kstart  = ai[irow[i]];
12449b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12549b5e25fSSatish Balay     lens[i] = 0;
12649b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12749b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12849b5e25fSSatish Balay           lens[i]++;
12949b5e25fSSatish Balay         }
13049b5e25fSSatish Balay       }
13149b5e25fSSatish Balay     }
13249b5e25fSSatish Balay   /* Create and fill new matrix */
13349b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13449b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13549b5e25fSSatish Balay 
136*e32f2f54SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13713f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
138abc0a331SBarry Smith     if (!flag) {
139*e32f2f54SBarry Smith       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
14049b5e25fSSatish Balay     }
14113f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14249b5e25fSSatish Balay     C = *B;
14349b5e25fSSatish Balay   } else {
1447adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
145f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1467adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
147ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14849b5e25fSSatish Balay   }
14949b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
15049b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15149b5e25fSSatish Balay     row    = irow[i];
15249b5e25fSSatish Balay     kstart = ai[row];
15349b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15449b5e25fSSatish Balay     mat_i  = c->i[i];
15549b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15649b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15749b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15849b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15949b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
16049b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16149b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16249b5e25fSSatish Balay         mat_a   += bs2;
16349b5e25fSSatish Balay         (*mat_ilen)++;
16449b5e25fSSatish Balay       }
16549b5e25fSSatish Balay     }
16649b5e25fSSatish Balay   }
16749b5e25fSSatish Balay 
16849b5e25fSSatish Balay   /* Free work space */
16949b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17349b5e25fSSatish Balay 
17449b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17549b5e25fSSatish Balay   *B = C;
17649b5e25fSSatish Balay   PetscFunctionReturn(0);
17749b5e25fSSatish Balay }
17849b5e25fSSatish Balay 
1794a2ae208SSatish Balay #undef __FUNCT__
1804a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1814aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
18249b5e25fSSatish Balay {
18349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18449b5e25fSSatish Balay   IS             is1;
1856849ba73SBarry Smith   PetscErrorCode ierr;
1865d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1875d0c19d7SBarry Smith   const PetscInt *irow;
18849b5e25fSSatish Balay 
18949b5e25fSSatish Balay   PetscFunctionBegin;
190*e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
19149b5e25fSSatish Balay 
19249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19449b5e25fSSatish Balay 
19549b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19649b5e25fSSatish Balay    and form the IS with compressed IS */
19774ed9c26SBarry Smith   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
19874ed9c26SBarry Smith   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
19949b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
20049b5e25fSSatish Balay 
20149b5e25fSSatish Balay   count = 0;
20249b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
203*e32f2f54SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20449b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20549b5e25fSSatish Balay   }
20649b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20774ed9c26SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20874ed9c26SBarry Smith   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
20949b5e25fSSatish Balay 
2104aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
21174ed9c26SBarry Smith   ierr = ISDestroy(is1);CHKERRQ(ierr);
21249b5e25fSSatish Balay   PetscFunctionReturn(0);
21349b5e25fSSatish Balay }
21449b5e25fSSatish Balay 
2154a2ae208SSatish Balay #undef __FUNCT__
2164a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21713f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21849b5e25fSSatish Balay {
2196849ba73SBarry Smith   PetscErrorCode ierr;
22013f74950SBarry Smith   PetscInt       i;
22149b5e25fSSatish Balay 
22249b5e25fSSatish Balay   PetscFunctionBegin;
22349b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22482502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22549b5e25fSSatish Balay   }
22649b5e25fSSatish Balay 
22749b5e25fSSatish Balay   for (i=0; i<n; i++) {
2284aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
22949b5e25fSSatish Balay   }
23049b5e25fSSatish Balay   PetscFunctionReturn(0);
23149b5e25fSSatish Balay }
23249b5e25fSSatish Balay 
23349b5e25fSSatish Balay /* -------------------------------------------------------*/
23449b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23549b5e25fSSatish Balay /* -------------------------------------------------------*/
23649b5e25fSSatish Balay 
2374a2ae208SSatish Balay #undef __FUNCT__
2384a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
239dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
24049b5e25fSSatish Balay {
24149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
24349b5e25fSSatish Balay   MatScalar      *v;
2446849ba73SBarry Smith   PetscErrorCode ierr;
24513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24749b5e25fSSatish Balay 
24849b5e25fSSatish Balay   PetscFunctionBegin;
2492dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25249b5e25fSSatish Balay 
25349b5e25fSSatish Balay   v     = a->a;
25449b5e25fSSatish Balay   xb = x;
25549b5e25fSSatish Balay 
25649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
25849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
25949b5e25fSSatish Balay     ib = aj + *ai;
260831a3094SHong Zhang     jmin = 0;
26198c9bda7SSatish Balay     nonzerorow += (n>0);
2627fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
26349b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
26449b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
265831a3094SHong Zhang       v += 4; jmin++;
2667fbae186SHong Zhang     }
267831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
26949b5e25fSSatish Balay       cval       = ib[j]*2;
27049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
27149b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
27249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
27349b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
27449b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
27549b5e25fSSatish Balay       v  += 4;
27649b5e25fSSatish Balay     }
27749b5e25fSSatish Balay     xb +=2; ai++;
27849b5e25fSSatish Balay   }
27949b5e25fSSatish Balay 
2801ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2811ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
282dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
28349b5e25fSSatish Balay   PetscFunctionReturn(0);
28449b5e25fSSatish Balay }
28549b5e25fSSatish Balay 
2864a2ae208SSatish Balay #undef __FUNCT__
2874a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
288dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
28949b5e25fSSatish Balay {
29049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
29187828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
29249b5e25fSSatish Balay   MatScalar      *v;
2936849ba73SBarry Smith   PetscErrorCode ierr;
29413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29649b5e25fSSatish Balay 
29749b5e25fSSatish Balay   PetscFunctionBegin;
2982dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2991ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
30149b5e25fSSatish Balay 
30249b5e25fSSatish Balay   v    = a->a;
30349b5e25fSSatish Balay   xb   = x;
30449b5e25fSSatish Balay 
30549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
30849b5e25fSSatish Balay     ib = aj + *ai;
309831a3094SHong Zhang     jmin = 0;
31098c9bda7SSatish Balay     nonzerorow += (n>0);
3117fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
31249b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
31349b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
31449b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
315831a3094SHong Zhang       v += 9; jmin++;
3167fbae186SHong Zhang     }
317831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31949b5e25fSSatish Balay       cval       = ib[j]*3;
32049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
32149b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
32249b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
32349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32449b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
32549b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
32649b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
32749b5e25fSSatish Balay       v  += 9;
32849b5e25fSSatish Balay     }
32949b5e25fSSatish Balay     xb +=3; ai++;
33049b5e25fSSatish Balay   }
33149b5e25fSSatish Balay 
3321ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3331ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
334dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33549b5e25fSSatish Balay   PetscFunctionReturn(0);
33649b5e25fSSatish Balay }
33749b5e25fSSatish Balay 
3384a2ae208SSatish Balay #undef __FUNCT__
3394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
340dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
34149b5e25fSSatish Balay {
34249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
34387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
34449b5e25fSSatish Balay   MatScalar      *v;
3456849ba73SBarry Smith   PetscErrorCode ierr;
34613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
34849b5e25fSSatish Balay 
34949b5e25fSSatish Balay   PetscFunctionBegin;
3502dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3511ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3521ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35349b5e25fSSatish Balay 
35449b5e25fSSatish Balay   v     = a->a;
35549b5e25fSSatish Balay   xb = x;
35649b5e25fSSatish Balay 
35749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
35849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
35949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
36049b5e25fSSatish Balay     ib = aj + *ai;
361831a3094SHong Zhang     jmin = 0;
36298c9bda7SSatish Balay     nonzerorow += (n>0);
3637fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
36449b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36549b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
36649b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
36749b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
368831a3094SHong Zhang       v += 16; jmin++;
3697fbae186SHong Zhang     }
370831a3094SHong Zhang     for (j=jmin; j<n; j++) {
37149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37249b5e25fSSatish Balay       cval       = ib[j]*4;
37349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
37449b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
37549b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
37649b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
37749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
37849b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
37949b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
38049b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
38149b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38249b5e25fSSatish Balay       v  += 16;
38349b5e25fSSatish Balay     }
38449b5e25fSSatish Balay     xb +=4; ai++;
38549b5e25fSSatish Balay   }
38649b5e25fSSatish Balay 
3871ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3881ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
389dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
39049b5e25fSSatish Balay   PetscFunctionReturn(0);
39149b5e25fSSatish Balay }
39249b5e25fSSatish Balay 
3934a2ae208SSatish Balay #undef __FUNCT__
3944a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
395dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
39649b5e25fSSatish Balay {
39749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
39887828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
39949b5e25fSSatish Balay   MatScalar      *v;
4006849ba73SBarry Smith   PetscErrorCode ierr;
40113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
40298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
40349b5e25fSSatish Balay 
40449b5e25fSSatish Balay   PetscFunctionBegin;
4052dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4061ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4071ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
40849b5e25fSSatish Balay 
40949b5e25fSSatish Balay   v     = a->a;
41049b5e25fSSatish Balay   xb = x;
41149b5e25fSSatish Balay 
41249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
41449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
41549b5e25fSSatish Balay     ib = aj + *ai;
416831a3094SHong Zhang     jmin = 0;
41798c9bda7SSatish Balay     nonzerorow += (n>0);
4187fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
41949b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
42049b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
42149b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
42249b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42349b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
424831a3094SHong Zhang       v += 25; jmin++;
4257fbae186SHong Zhang     }
426831a3094SHong Zhang     for (j=jmin; j<n; j++) {
42749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
42849b5e25fSSatish Balay       cval       = ib[j]*5;
42949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
43049b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
43149b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
43249b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
43349b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
43449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
43549b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
43649b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
43749b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
43849b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
43949b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
44049b5e25fSSatish Balay       v  += 25;
44149b5e25fSSatish Balay     }
44249b5e25fSSatish Balay     xb +=5; ai++;
44349b5e25fSSatish Balay   }
44449b5e25fSSatish Balay 
4451ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4461ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
447dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
44849b5e25fSSatish Balay   PetscFunctionReturn(0);
44949b5e25fSSatish Balay }
45049b5e25fSSatish Balay 
45149b5e25fSSatish Balay 
4524a2ae208SSatish Balay #undef __FUNCT__
4534a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
454dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
45549b5e25fSSatish Balay {
45649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
45787828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
45849b5e25fSSatish Balay   MatScalar      *v;
4596849ba73SBarry Smith   PetscErrorCode ierr;
46013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
46198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
46249b5e25fSSatish Balay 
46349b5e25fSSatish Balay   PetscFunctionBegin;
4642dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4651ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4661ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
46749b5e25fSSatish Balay 
46849b5e25fSSatish Balay   v     = a->a;
46949b5e25fSSatish Balay   xb = x;
47049b5e25fSSatish Balay 
47149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
47349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
47449b5e25fSSatish Balay     ib = aj + *ai;
475831a3094SHong Zhang     jmin = 0;
47698c9bda7SSatish Balay     nonzerorow += (n>0);
4777fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
47849b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
47949b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
48049b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
48149b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
48249b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
48349b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
484831a3094SHong Zhang       v += 36; jmin++;
4857fbae186SHong Zhang     }
486831a3094SHong Zhang     for (j=jmin; j<n; j++) {
48749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
48849b5e25fSSatish Balay       cval       = ib[j]*6;
48949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
49049b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
49149b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
49249b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
49349b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
49449b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
49549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
49649b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
49749b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
49849b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
49949b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
50049b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
50149b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
50249b5e25fSSatish Balay       v  += 36;
50349b5e25fSSatish Balay     }
50449b5e25fSSatish Balay     xb +=6; ai++;
50549b5e25fSSatish Balay   }
50649b5e25fSSatish Balay 
5071ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5081ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
509dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
51049b5e25fSSatish Balay   PetscFunctionReturn(0);
51149b5e25fSSatish Balay }
5124a2ae208SSatish Balay #undef __FUNCT__
5134a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
514dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
51549b5e25fSSatish Balay {
51649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
51787828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
51849b5e25fSSatish Balay   MatScalar      *v;
5196849ba73SBarry Smith   PetscErrorCode ierr;
52013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
52198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
52249b5e25fSSatish Balay 
52349b5e25fSSatish Balay   PetscFunctionBegin;
5242dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5251ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5261ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
52749b5e25fSSatish Balay 
52849b5e25fSSatish Balay   v     = a->a;
52949b5e25fSSatish Balay   xb = x;
53049b5e25fSSatish Balay 
53149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
53249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
53349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
53449b5e25fSSatish Balay     ib = aj + *ai;
535831a3094SHong Zhang     jmin = 0;
53698c9bda7SSatish Balay     nonzerorow += (n>0);
5377fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
53849b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
53949b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
54049b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
54149b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
54249b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
54349b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
54449b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
545831a3094SHong Zhang       v += 49; jmin++;
5467fbae186SHong Zhang     }
547831a3094SHong Zhang     for (j=jmin; j<n; j++) {
54849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
54949b5e25fSSatish Balay       cval       = ib[j]*7;
55049b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
55149b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
55249b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
55349b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
55449b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
55549b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
55649b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
55749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
55849b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
55949b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
56049b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
56149b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
56249b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
56349b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
56449b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
56549b5e25fSSatish Balay       v  += 49;
56649b5e25fSSatish Balay     }
56749b5e25fSSatish Balay     xb +=7; ai++;
56849b5e25fSSatish Balay   }
5691ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5701ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
571dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
57249b5e25fSSatish Balay   PetscFunctionReturn(0);
57349b5e25fSSatish Balay }
57449b5e25fSSatish Balay 
57549b5e25fSSatish Balay /*
57649b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
57749b5e25fSSatish Balay */
5784a2ae208SSatish Balay #undef __FUNCT__
5794a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
580dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
58149b5e25fSSatish Balay {
58249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
58387828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5840b60a74dSHong Zhang   MatScalar      *v;
585dfbe8321SBarry Smith   PetscErrorCode ierr;
586d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
58798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
58849b5e25fSSatish Balay 
58949b5e25fSSatish Balay   PetscFunctionBegin;
5902dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5911ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
5921ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
59349b5e25fSSatish Balay 
59449b5e25fSSatish Balay   aj   = a->j;
59549b5e25fSSatish Balay   v    = a->a;
59649b5e25fSSatish Balay   ii   = a->i;
59749b5e25fSSatish Balay 
59849b5e25fSSatish Balay   if (!a->mult_work) {
599d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
60049b5e25fSSatish Balay   }
60149b5e25fSSatish Balay   work = a->mult_work;
60249b5e25fSSatish Balay 
60349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
60449b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
60549b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
60698c9bda7SSatish Balay     nonzerorow += (n>0);
60749b5e25fSSatish Balay 
60849b5e25fSSatish Balay     /* upper triangular part */
60949b5e25fSSatish Balay     for (j=0; j<n; j++) {
61049b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
61149b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
61249b5e25fSSatish Balay       workt += bs;
61349b5e25fSSatish Balay     }
61449b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
61549b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
61649b5e25fSSatish Balay 
61749b5e25fSSatish Balay     /* strict lower triangular part */
618831a3094SHong Zhang     idx = aj+ii[0];
619831a3094SHong Zhang     if (*idx == i){
62096b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
621831a3094SHong Zhang     }
62296b9376eSHong Zhang 
62349b5e25fSSatish Balay     if (ncols > 0){
62449b5e25fSSatish Balay       workt = work;
62587828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
626831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
627831a3094SHong Zhang       for (j=0; j<n; j++) {
628831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
62949b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
63049b5e25fSSatish Balay         workt += bs;
63149b5e25fSSatish Balay       }
63249b5e25fSSatish Balay     }
63349b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
63449b5e25fSSatish Balay   }
63549b5e25fSSatish Balay 
6361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6371ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
638dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
63949b5e25fSSatish Balay   PetscFunctionReturn(0);
64049b5e25fSSatish Balay }
64149b5e25fSSatish Balay 
6426a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6434a2ae208SSatish Balay #undef __FUNCT__
6444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
645dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
64649b5e25fSSatish Balay {
64749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
648bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
64949b5e25fSSatish Balay   MatScalar      *v;
6506849ba73SBarry Smith   PetscErrorCode ierr;
65113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
65298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
65349b5e25fSSatish Balay 
65449b5e25fSSatish Balay   PetscFunctionBegin;
6556a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6561ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6571ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
65849b5e25fSSatish Balay   v  = a->a;
65949b5e25fSSatish Balay   xb = x;
66049b5e25fSSatish Balay 
66149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
66249b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
66349b5e25fSSatish Balay     x1 = xb[0];
66449b5e25fSSatish Balay     ib = aj + *ai;
665831a3094SHong Zhang     jmin = 0;
66698c9bda7SSatish Balay     nonzerorow += (n>0);
667831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
668831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
669831a3094SHong Zhang     }
670831a3094SHong Zhang     for (j=jmin; j<n; j++) {
67149b5e25fSSatish Balay       cval    = *ib;
67249b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
67349b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
67449b5e25fSSatish Balay     }
67549b5e25fSSatish Balay     xb++; ai++;
67649b5e25fSSatish Balay   }
67749b5e25fSSatish Balay 
6781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
679bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68049b5e25fSSatish Balay 
681dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
68249b5e25fSSatish Balay   PetscFunctionReturn(0);
68349b5e25fSSatish Balay }
68449b5e25fSSatish Balay 
6854a2ae208SSatish Balay #undef __FUNCT__
6864a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
687dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
68849b5e25fSSatish Balay {
68949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
690bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
69149b5e25fSSatish Balay   MatScalar      *v;
6926849ba73SBarry Smith   PetscErrorCode ierr;
69313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
69498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
69549b5e25fSSatish Balay 
69649b5e25fSSatish Balay   PetscFunctionBegin;
6976a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70049b5e25fSSatish Balay 
70149b5e25fSSatish Balay   v  = a->a;
70249b5e25fSSatish Balay   xb = x;
70349b5e25fSSatish Balay 
70449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
70649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
70749b5e25fSSatish Balay     ib = aj + *ai;
708831a3094SHong Zhang     jmin = 0;
70998c9bda7SSatish Balay     nonzerorow += (n>0);
7107fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
71149b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
71249b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
713831a3094SHong Zhang       v += 4; jmin++;
7147fbae186SHong Zhang     }
715831a3094SHong Zhang     for (j=jmin; j<n; j++) {
71649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
71749b5e25fSSatish Balay       cval       = ib[j]*2;
71849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
71949b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
72049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
72149b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
72249b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
72349b5e25fSSatish Balay       v  += 4;
72449b5e25fSSatish Balay     }
72549b5e25fSSatish Balay     xb +=2; ai++;
72649b5e25fSSatish Balay   }
7271ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
728bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
72949b5e25fSSatish Balay 
730dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
73149b5e25fSSatish Balay   PetscFunctionReturn(0);
73249b5e25fSSatish Balay }
73349b5e25fSSatish Balay 
7344a2ae208SSatish Balay #undef __FUNCT__
7354a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
736dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
73749b5e25fSSatish Balay {
73849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
739bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
74049b5e25fSSatish Balay   MatScalar      *v;
7416849ba73SBarry Smith   PetscErrorCode ierr;
74213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
74398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
74449b5e25fSSatish Balay 
74549b5e25fSSatish Balay   PetscFunctionBegin;
7466a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7471ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
74949b5e25fSSatish Balay 
75049b5e25fSSatish Balay   v     = a->a;
75149b5e25fSSatish Balay   xb = x;
75249b5e25fSSatish Balay 
75349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
75449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
75549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
75649b5e25fSSatish Balay     ib = aj + *ai;
757831a3094SHong Zhang     jmin = 0;
75898c9bda7SSatish Balay     nonzerorow += (n>0);
7597fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
76049b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
76149b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
76249b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
763831a3094SHong Zhang      v += 9; jmin++;
7647fbae186SHong Zhang     }
765831a3094SHong Zhang     for (j=jmin; j<n; j++) {
76649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
76749b5e25fSSatish Balay       cval       = ib[j]*3;
76849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
76949b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
77049b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
77149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
77249b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
77349b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
77449b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
77549b5e25fSSatish Balay       v  += 9;
77649b5e25fSSatish Balay     }
77749b5e25fSSatish Balay     xb +=3; ai++;
77849b5e25fSSatish Balay   }
77949b5e25fSSatish Balay 
7801ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
781bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
78249b5e25fSSatish Balay 
783dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
78449b5e25fSSatish Balay   PetscFunctionReturn(0);
78549b5e25fSSatish Balay }
78649b5e25fSSatish Balay 
7874a2ae208SSatish Balay #undef __FUNCT__
7884a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
789dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
79049b5e25fSSatish Balay {
79149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
792bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
79349b5e25fSSatish Balay   MatScalar      *v;
7946849ba73SBarry Smith   PetscErrorCode ierr;
79513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
79698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
79749b5e25fSSatish Balay 
79849b5e25fSSatish Balay   PetscFunctionBegin;
7996a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8001ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8011ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
80249b5e25fSSatish Balay 
80349b5e25fSSatish Balay   v     = a->a;
80449b5e25fSSatish Balay   xb = x;
80549b5e25fSSatish Balay 
80649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
80749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
80849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
80949b5e25fSSatish Balay     ib = aj + *ai;
810831a3094SHong Zhang     jmin = 0;
81198c9bda7SSatish Balay     nonzerorow += (n>0);
8127fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
81349b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
81449b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
81549b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
81649b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
817831a3094SHong Zhang       v += 16; jmin++;
8187fbae186SHong Zhang     }
819831a3094SHong Zhang     for (j=jmin; j<n; j++) {
82049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
82149b5e25fSSatish Balay       cval       = ib[j]*4;
82249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
82349b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
82449b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
82549b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
82649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
82749b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
82849b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
82949b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
83049b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
83149b5e25fSSatish Balay       v  += 16;
83249b5e25fSSatish Balay     }
83349b5e25fSSatish Balay     xb +=4; ai++;
83449b5e25fSSatish Balay   }
83549b5e25fSSatish Balay 
8361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
837bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
83849b5e25fSSatish Balay 
839dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
84049b5e25fSSatish Balay   PetscFunctionReturn(0);
84149b5e25fSSatish Balay }
84249b5e25fSSatish Balay 
8434a2ae208SSatish Balay #undef __FUNCT__
8444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
845dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
84649b5e25fSSatish Balay {
84749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
848bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
84949b5e25fSSatish Balay   MatScalar      *v;
8506849ba73SBarry Smith   PetscErrorCode ierr;
85113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
85298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
85349b5e25fSSatish Balay 
85449b5e25fSSatish Balay   PetscFunctionBegin;
8556a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8561ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8571ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
85849b5e25fSSatish Balay 
85949b5e25fSSatish Balay   v     = a->a;
86049b5e25fSSatish Balay   xb = x;
86149b5e25fSSatish Balay 
86249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
86349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
86449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
86549b5e25fSSatish Balay     ib = aj + *ai;
866831a3094SHong Zhang     jmin = 0;
86798c9bda7SSatish Balay     nonzerorow += (n>0);
8687fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
86949b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
87049b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
87149b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
87249b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
87349b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
874831a3094SHong Zhang       v += 25; jmin++;
8757fbae186SHong Zhang     }
876831a3094SHong Zhang     for (j=jmin; j<n; j++) {
87749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
87849b5e25fSSatish Balay       cval       = ib[j]*5;
87949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
88049b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
88149b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
88249b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
88349b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
88449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
88549b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
88649b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
88749b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
88849b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
88949b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
89049b5e25fSSatish Balay       v  += 25;
89149b5e25fSSatish Balay     }
89249b5e25fSSatish Balay     xb +=5; ai++;
89349b5e25fSSatish Balay   }
89449b5e25fSSatish Balay 
8951ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
896bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
89749b5e25fSSatish Balay 
898dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
89949b5e25fSSatish Balay   PetscFunctionReturn(0);
90049b5e25fSSatish Balay }
9014a2ae208SSatish Balay #undef __FUNCT__
9024a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
903dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
90449b5e25fSSatish Balay {
90549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
906bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
90749b5e25fSSatish Balay   MatScalar      *v;
9086849ba73SBarry Smith   PetscErrorCode ierr;
90913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
91098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
91149b5e25fSSatish Balay 
91249b5e25fSSatish Balay   PetscFunctionBegin;
9136a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9141ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9151ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
91649b5e25fSSatish Balay 
91749b5e25fSSatish Balay   v     = a->a;
91849b5e25fSSatish Balay   xb = x;
91949b5e25fSSatish Balay 
92049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
92149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
92249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
92349b5e25fSSatish Balay     ib = aj + *ai;
924831a3094SHong Zhang     jmin = 0;
92598c9bda7SSatish Balay     nonzerorow += (n>0);
9267fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
92749b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
92849b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
92949b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
93049b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
93149b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
93249b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
933831a3094SHong Zhang       v += 36; jmin++;
9347fbae186SHong Zhang     }
935831a3094SHong Zhang     for (j=jmin; j<n; j++) {
93649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
93749b5e25fSSatish Balay       cval       = ib[j]*6;
93849b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
93949b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
94049b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
94149b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
94249b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
94349b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
94449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
94549b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
94649b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
94749b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
94849b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
94949b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
95049b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
95149b5e25fSSatish Balay       v  += 36;
95249b5e25fSSatish Balay     }
95349b5e25fSSatish Balay     xb +=6; ai++;
95449b5e25fSSatish Balay   }
95549b5e25fSSatish Balay 
9561ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
957bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
95849b5e25fSSatish Balay 
959dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
96049b5e25fSSatish Balay   PetscFunctionReturn(0);
96149b5e25fSSatish Balay }
96249b5e25fSSatish Balay 
9634a2ae208SSatish Balay #undef __FUNCT__
9644a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
965dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
96649b5e25fSSatish Balay {
96749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
968bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
96949b5e25fSSatish Balay   MatScalar      *v;
9706849ba73SBarry Smith   PetscErrorCode ierr;
97113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
97298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
97349b5e25fSSatish Balay 
97449b5e25fSSatish Balay   PetscFunctionBegin;
9756a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9761ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9771ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
97849b5e25fSSatish Balay 
97949b5e25fSSatish Balay   v     = a->a;
98049b5e25fSSatish Balay   xb = x;
98149b5e25fSSatish Balay 
98249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
98349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
98449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
98549b5e25fSSatish Balay     ib = aj + *ai;
986831a3094SHong Zhang     jmin = 0;
98798c9bda7SSatish Balay     nonzerorow += (n>0);
9887fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
98949b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
99049b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
99149b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
99249b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
99349b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
99449b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
99549b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
996831a3094SHong Zhang       v += 49; jmin++;
9977fbae186SHong Zhang     }
998831a3094SHong Zhang     for (j=jmin; j<n; j++) {
99949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
100049b5e25fSSatish Balay       cval       = ib[j]*7;
100149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
100249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
100349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
100449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
100549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
100649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
100749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
100849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
100949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
101049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
101149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
101249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
101349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
101449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
101549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
101649b5e25fSSatish Balay       v  += 49;
101749b5e25fSSatish Balay     }
101849b5e25fSSatish Balay     xb +=7; ai++;
101949b5e25fSSatish Balay   }
102049b5e25fSSatish Balay 
10211ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1022bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
102349b5e25fSSatish Balay 
1024dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
102549b5e25fSSatish Balay   PetscFunctionReturn(0);
102649b5e25fSSatish Balay }
102749b5e25fSSatish Balay 
10284a2ae208SSatish Balay #undef __FUNCT__
10294a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1030dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
103149b5e25fSSatish Balay {
103249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1033bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1034066653e3SSatish Balay   MatScalar      *v;
1035dfbe8321SBarry Smith   PetscErrorCode ierr;
1036d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
103798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
103849b5e25fSSatish Balay 
103949b5e25fSSatish Balay   PetscFunctionBegin;
10406a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10411ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10421ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
104349b5e25fSSatish Balay 
104449b5e25fSSatish Balay   aj   = a->j;
104549b5e25fSSatish Balay   v    = a->a;
104649b5e25fSSatish Balay   ii   = a->i;
104749b5e25fSSatish Balay 
104849b5e25fSSatish Balay   if (!a->mult_work) {
1049d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
105049b5e25fSSatish Balay   }
105149b5e25fSSatish Balay   work = a->mult_work;
105249b5e25fSSatish Balay 
105349b5e25fSSatish Balay 
105449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
105549b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
105649b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
105798c9bda7SSatish Balay     nonzerorow += (n>0);
105849b5e25fSSatish Balay 
105949b5e25fSSatish Balay     /* upper triangular part */
106049b5e25fSSatish Balay     for (j=0; j<n; j++) {
106149b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
106249b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
106349b5e25fSSatish Balay       workt += bs;
106449b5e25fSSatish Balay     }
106549b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
106649b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
106749b5e25fSSatish Balay 
106849b5e25fSSatish Balay     /* strict lower triangular part */
1069831a3094SHong Zhang     idx = aj+ii[0];
1070831a3094SHong Zhang     if (*idx == i){
107196b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1072831a3094SHong Zhang     }
107349b5e25fSSatish Balay     if (ncols > 0){
107449b5e25fSSatish Balay       workt = work;
107587828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1076831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1077831a3094SHong Zhang       for (j=0; j<n; j++) {
1078831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
107949b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
108049b5e25fSSatish Balay         workt += bs;
108149b5e25fSSatish Balay       }
108249b5e25fSSatish Balay     }
108349b5e25fSSatish Balay 
108449b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
108549b5e25fSSatish Balay   }
108649b5e25fSSatish Balay 
10871ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1088bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
108949b5e25fSSatish Balay 
1090dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
109149b5e25fSSatish Balay   PetscFunctionReturn(0);
109249b5e25fSSatish Balay }
109349b5e25fSSatish Balay 
10944a2ae208SSatish Balay #undef __FUNCT__
10954a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1096f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
109749b5e25fSSatish Balay {
109849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1099f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1100efee365bSSatish Balay   PetscErrorCode ierr;
11010805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
110249b5e25fSSatish Balay 
110349b5e25fSSatish Balay   PetscFunctionBegin;
1104f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1105efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
110649b5e25fSSatish Balay   PetscFunctionReturn(0);
110749b5e25fSSatish Balay }
110849b5e25fSSatish Balay 
11094a2ae208SSatish Balay #undef __FUNCT__
11104a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1111dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
111249b5e25fSSatish Balay {
111349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111449b5e25fSSatish Balay   MatScalar      *v = a->a;
111549b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1116d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11176849ba73SBarry Smith   PetscErrorCode ierr;
111813f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
111949b5e25fSSatish Balay 
112049b5e25fSSatish Balay   PetscFunctionBegin;
112149b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
112249b5e25fSSatish Balay     for (k=0; k<mbs; k++){
112349b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1124831a3094SHong Zhang       col  = aj + jmin;
1125831a3094SHong Zhang       if (*col == k){         /* diagonal block */
112649b5e25fSSatish Balay         for (i=0; i<bs2; i++){
112749b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
112849b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
112949b5e25fSSatish Balay #else
113049b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
113149b5e25fSSatish Balay #endif
113249b5e25fSSatish Balay         }
1133831a3094SHong Zhang         jmin++;
1134831a3094SHong Zhang       }
1135831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
113649b5e25fSSatish Balay         for (i=0; i<bs2; i++){
113749b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
113849b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
113949b5e25fSSatish Balay #else
114049b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
114149b5e25fSSatish Balay #endif
114249b5e25fSSatish Balay         }
114349b5e25fSSatish Balay       }
114449b5e25fSSatish Balay     }
114549b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11460b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
114774ed9c26SBarry Smith     ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
11480b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11490b8dc8d2SHong Zhang     il[0] = 0;
115049b5e25fSSatish Balay 
115149b5e25fSSatish Balay     *norm = 0.0;
115249b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
115349b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
115449b5e25fSSatish Balay       /*-- col sum --*/
115549b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1156831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1157831a3094SHong Zhang                   at step k */
115849b5e25fSSatish Balay       while (i<mbs){
115949b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
116049b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
116149b5e25fSSatish Balay         for (j=0; j<bs; j++){
116249b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
116349b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
116449b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
116549b5e25fSSatish Balay           }
116649b5e25fSSatish Balay         }
116749b5e25fSSatish Balay         /* update il, jl */
1168831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1169831a3094SHong Zhang         jmax = a->i[i+1];
117049b5e25fSSatish Balay         if (jmin < jmax){
117149b5e25fSSatish Balay           il[i] = jmin;
117249b5e25fSSatish Balay           j   = a->j[jmin];
117349b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
117449b5e25fSSatish Balay         }
117549b5e25fSSatish Balay         i = nexti;
117649b5e25fSSatish Balay       }
117749b5e25fSSatish Balay       /*-- row sum --*/
117849b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
117949b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
118049b5e25fSSatish Balay         for (j=0; j<bs; j++){
118149b5e25fSSatish Balay           v = a->a + i*bs2 + j;
118249b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
11830b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
118449b5e25fSSatish Balay           }
118549b5e25fSSatish Balay         }
118649b5e25fSSatish Balay       }
118749b5e25fSSatish Balay       /* add k_th block row to il, jl */
1188831a3094SHong Zhang       col = aj+jmin;
1189831a3094SHong Zhang       if (*col == k) jmin++;
119049b5e25fSSatish Balay       if (jmin < jmax){
119149b5e25fSSatish Balay         il[k] = jmin;
11920b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
119349b5e25fSSatish Balay       }
119449b5e25fSSatish Balay       for (j=0; j<bs; j++){
119549b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
119649b5e25fSSatish Balay       }
119749b5e25fSSatish Balay     }
119874ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
119949b5e25fSSatish Balay   } else {
1200*e32f2f54SBarry Smith     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
120149b5e25fSSatish Balay   }
120249b5e25fSSatish Balay   PetscFunctionReturn(0);
120349b5e25fSSatish Balay }
120449b5e25fSSatish Balay 
12054a2ae208SSatish Balay #undef __FUNCT__
12064a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1207dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
120849b5e25fSSatish Balay {
120949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1210dfbe8321SBarry Smith   PetscErrorCode ierr;
121149b5e25fSSatish Balay 
121249b5e25fSSatish Balay   PetscFunctionBegin;
121349b5e25fSSatish Balay 
121449b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1215d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1216ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1217ef511fbeSHong Zhang     PetscFunctionReturn(0);
121849b5e25fSSatish Balay   }
121949b5e25fSSatish Balay 
122049b5e25fSSatish Balay   /* if the a->i are the same */
122113f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1222abc0a331SBarry Smith   if (!*flg) {
122349b5e25fSSatish Balay     PetscFunctionReturn(0);
122449b5e25fSSatish Balay   }
122549b5e25fSSatish Balay 
122649b5e25fSSatish Balay   /* if a->j are the same */
122713f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1228abc0a331SBarry Smith   if (!*flg) {
122949b5e25fSSatish Balay     PetscFunctionReturn(0);
123049b5e25fSSatish Balay   }
123149b5e25fSSatish Balay   /* if a->a are the same */
1232d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1233935af2e7SHong Zhang   PetscFunctionReturn(0);
123449b5e25fSSatish Balay }
123549b5e25fSSatish Balay 
12364a2ae208SSatish Balay #undef __FUNCT__
12374a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1238dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
123949b5e25fSSatish Balay {
124049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1241dfbe8321SBarry Smith   PetscErrorCode ierr;
124213f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
124387828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
124449b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
124549b5e25fSSatish Balay 
124649b5e25fSSatish Balay   PetscFunctionBegin;
1247d0f46423SBarry Smith   bs   = A->rmap->bs;
1248*e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
124982799104SHong Zhang 
125049b5e25fSSatish Balay   aa   = a->a;
125149b5e25fSSatish Balay   ai   = a->i;
125249b5e25fSSatish Balay   aj   = a->j;
125349b5e25fSSatish Balay   ambs = a->mbs;
125449b5e25fSSatish Balay   bs2  = a->bs2;
125549b5e25fSSatish Balay 
12562dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12571ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
125849b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1259*e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
126049b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
126149b5e25fSSatish Balay     j=ai[i];
126249b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
126349b5e25fSSatish Balay       row  = i*bs;
126449b5e25fSSatish Balay       aa_j = aa + j*bs2;
1265d5f3da31SBarry Smith       if (A->factortype && bs==1){
126682799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
126782799104SHong Zhang       } else {
126849b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
126949b5e25fSSatish Balay       }
127049b5e25fSSatish Balay     }
127182799104SHong Zhang   }
127282799104SHong Zhang 
12731ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
127449b5e25fSSatish Balay   PetscFunctionReturn(0);
127549b5e25fSSatish Balay }
127649b5e25fSSatish Balay 
12774a2ae208SSatish Balay #undef __FUNCT__
12784a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1279dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
128049b5e25fSSatish Balay {
128149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
12825e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
128349b5e25fSSatish Balay   MatScalar      *aa,*v;
1284dfbe8321SBarry Smith   PetscErrorCode ierr;
12855e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1286b3bf805bSHong Zhang   PetscTruth     flg;
128749b5e25fSSatish Balay 
128849b5e25fSSatish Balay   PetscFunctionBegin;
1289b3bf805bSHong Zhang   if (ll != rr){
1290b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1291b3bf805bSHong Zhang     if (!flg)
1292*e32f2f54SBarry Smith       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1293b3bf805bSHong Zhang   }
1294b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
129549b5e25fSSatish Balay   ai  = a->i;
129649b5e25fSSatish Balay   aj  = a->j;
129749b5e25fSSatish Balay   aa  = a->a;
1298d0f46423SBarry Smith   m   = A->rmap->N;
1299d0f46423SBarry Smith   bs  = A->rmap->bs;
130049b5e25fSSatish Balay   mbs = a->mbs;
130149b5e25fSSatish Balay   bs2 = a->bs2;
130249b5e25fSSatish Balay 
13031ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
130449b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1305*e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
130649b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
130749b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
130849b5e25fSSatish Balay     li = l + i*bs;
130949b5e25fSSatish Balay     v  = aa + bs2*ai[i];
131049b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
131149b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13125e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
131349b5e25fSSatish Balay         x = ri[k];
131449b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
131549b5e25fSSatish Balay       }
131649b5e25fSSatish Balay     }
131749b5e25fSSatish Balay   }
13181ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1319dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
132049b5e25fSSatish Balay   PetscFunctionReturn(0);
132149b5e25fSSatish Balay }
132249b5e25fSSatish Balay 
13234a2ae208SSatish Balay #undef __FUNCT__
13244a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1325dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
132649b5e25fSSatish Balay {
132749b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
132849b5e25fSSatish Balay 
132949b5e25fSSatish Balay   PetscFunctionBegin;
133049b5e25fSSatish Balay   info->block_size     = a->bs2;
13316c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13326c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
133349b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
133449b5e25fSSatish Balay   info->assemblies   = A->num_ass;
133549b5e25fSSatish Balay   info->mallocs      = a->reallocs;
13367adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1337d5f3da31SBarry Smith   if (A->factortype) {
133849b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
133949b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
134049b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
134149b5e25fSSatish Balay   } else {
134249b5e25fSSatish Balay     info->fill_ratio_given  = 0;
134349b5e25fSSatish Balay     info->fill_ratio_needed = 0;
134449b5e25fSSatish Balay     info->factor_mallocs    = 0;
134549b5e25fSSatish Balay   }
134649b5e25fSSatish Balay   PetscFunctionReturn(0);
134749b5e25fSSatish Balay }
134849b5e25fSSatish Balay 
134949b5e25fSSatish Balay 
13504a2ae208SSatish Balay #undef __FUNCT__
13514a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1352dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
135349b5e25fSSatish Balay {
135449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1355dfbe8321SBarry Smith   PetscErrorCode ierr;
135649b5e25fSSatish Balay 
135749b5e25fSSatish Balay   PetscFunctionBegin;
135849b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
135949b5e25fSSatish Balay   PetscFunctionReturn(0);
136049b5e25fSSatish Balay }
1361dc354874SHong Zhang 
13624a2ae208SSatish Balay #undef __FUNCT__
1363985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1364985db425SBarry Smith /*
1365985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1366985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1367985db425SBarry Smith */
1368985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1369dc354874SHong Zhang {
1370dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1371dfbe8321SBarry Smith   PetscErrorCode ierr;
137213f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1373c3fca9a7SHong Zhang   PetscReal      atmp;
1374273d9f13SBarry Smith   MatScalar      *aa;
1375985db425SBarry Smith   PetscScalar    *x;
137613f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1377dc354874SHong Zhang 
1378dc354874SHong Zhang   PetscFunctionBegin;
1379*e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1380*e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1381d0f46423SBarry Smith   bs   = A->rmap->bs;
1382dc354874SHong Zhang   aa   = a->a;
1383dc354874SHong Zhang   ai   = a->i;
1384dc354874SHong Zhang   aj   = a->j;
138544117c81SHong Zhang   mbs = a->mbs;
1386dc354874SHong Zhang 
1387985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
13881ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1389dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1390*e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
139144117c81SHong Zhang   for (i=0; i<mbs; i++) {
1392d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1393d0f6400bSHong Zhang     brow  = bs*i;
139444117c81SHong Zhang     for (j=0; j<ncols; j++){
1395d0f6400bSHong Zhang       bcol = bs*(*aj);
139644117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1397d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
139844117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1399d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1400d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1401c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1402c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
140344117c81SHong Zhang         }
140444117c81SHong Zhang       }
1405d0f6400bSHong Zhang       aj++;
1406dc354874SHong Zhang     }
1407dc354874SHong Zhang   }
14081ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1409dc354874SHong Zhang   PetscFunctionReturn(0);
1410dc354874SHong Zhang }
1411