xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision ce94432eddcd14845bc7e8083b7f8ea723b9bf7d)
149b5e25fSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
3c6db04a5SJed Brown #include <../src/mat/blockinvert.h>
4c6db04a5SJed Brown #include <petscbt.h>
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
6c6db04a5SJed Brown #include <petscblaslapack.h>
749b5e25fSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1149b5e25fSSatish Balay {
125eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
136849ba73SBarry Smith   PetscErrorCode ierr;
145d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
155d0c19d7SBarry Smith   const PetscInt *idx;
16db41cccfSHong Zhang   PetscBT        table_out,table_in;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19e32f2f54SBarry Smith   if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs  = a->mbs;
21d94109b8SHong Zhang   ai   = a->i;
22d94109b8SHong Zhang   aj   = a->j;
23d0f46423SBarry Smith   bs   = A->rmap->bs;
2453b8de81SBarry Smith   ierr = PetscBTCreate(mbs,&table_out);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
2753b8de81SBarry Smith   ierr = PetscBTCreate(mbs,&table_in);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31db41cccfSHong Zhang     ierr = PetscBTMemzero(mbs,table_out);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37db41cccfSHong Zhang     /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n; ++j) {
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42db41cccfSHong Zhang       if (!PetscBTLookupSet(table_out,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
486bf464f9SBarry Smith     ierr = ISDestroy(&is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++) { /* for each overlap */
52db41cccfSHong Zhang       /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53db41cccfSHong Zhang       ierr = PetscBTMemzero(mbs,table_in);CHKERRQ(ierr);
54db41cccfSHong Zhang       for (l=k; l<isz; l++) { ierr = PetscBTSet(table_in,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++) {
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59db41cccfSHong Zhang         if (PetscBTLookup(table_in,brow)) { /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end; l++) {
61d94109b8SHong Zhang             bcol = aj[l];
62d7b97159SHong Zhang             if (!PetscBTLookupSet(table_out,bcol)) {
63d7b97159SHong Zhang               nidx[isz++] = bcol;
64d7b97159SHong Zhang               if (bcol_max < bcol) bcol_max = bcol;
65d7b97159SHong Zhang             }
66d94109b8SHong Zhang           }
67d94109b8SHong Zhang           k++;
68d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
69d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
70d94109b8SHong Zhang           for (l = start; l<end; l++) {
71d94109b8SHong Zhang             bcol = aj[l];
72dbe03f88SHong Zhang             if (bcol > bcol_max) break;
73db41cccfSHong Zhang             if (PetscBTLookup(table_in,bcol)) {
7426fbe8dcSKarl Rupp               if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow;
75d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
76d94109b8SHong Zhang             }
77d94109b8SHong Zhang           }
78d94109b8SHong Zhang         }
79d94109b8SHong Zhang       }
80d94109b8SHong Zhang     } /* for each overlap */
81d94109b8SHong Zhang 
82d94109b8SHong Zhang     /* expand the Index Set */
83d94109b8SHong Zhang     for (j=0; j<isz; j++) {
8426fbe8dcSKarl Rupp       for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k;
85d94109b8SHong Zhang     }
8670b3c8c7SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr);
87d94109b8SHong Zhang   }
8894bacf5dSBarry Smith   ierr = PetscBTDestroy(&table_out);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
9194bacf5dSBarry Smith   ierr = PetscBTDestroy(&table_in);CHKERRQ(ierr);
925eee224dSHong Zhang   PetscFunctionReturn(0);
9349b5e25fSSatish Balay }
9449b5e25fSSatish Balay 
954a2ae208SSatish Balay #undef __FUNCT__
964a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
974aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9849b5e25fSSatish Balay {
9949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*c;
1006849ba73SBarry Smith   PetscErrorCode ierr;
10113f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10213f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1035d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1045d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10549b5e25fSSatish Balay   MatScalar      *mat_a;
10649b5e25fSSatish Balay   Mat            C;
107ace3abfcSBarry Smith   PetscBool      flag,sorted;
10849b5e25fSSatish Balay 
10949b5e25fSSatish Balay   PetscFunctionBegin;
110e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
11114ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
112e32f2f54SBarry Smith   if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11349b5e25fSSatish Balay 
11449b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11549b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11649b5e25fSSatish Balay 
11774ed9c26SBarry Smith   ierr  = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11874ed9c26SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11949b5e25fSSatish Balay   ssmap = smap;
12013f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12249b5e25fSSatish Balay   /* determine lens of each row */
12349b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12449b5e25fSSatish Balay     kstart  = ai[irow[i]];
12549b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12649b5e25fSSatish Balay     lens[i] = 0;
12749b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
12826fbe8dcSKarl Rupp       if (ssmap[aj[k]]) lens[i]++;
12949b5e25fSSatish Balay     }
13049b5e25fSSatish Balay   }
13149b5e25fSSatish Balay   /* Create and fill new matrix */
13249b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13349b5e25fSSatish Balay     c = (Mat_SeqSBAIJ*)((*B)->data);
13449b5e25fSSatish Balay 
135e32f2f54SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13613f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
137e7e72b3dSBarry Smith     if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13813f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
13949b5e25fSSatish Balay     C    = *B;
14049b5e25fSSatish Balay   } else {
141*ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
142f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1437adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
144ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14549b5e25fSSatish Balay   }
14649b5e25fSSatish Balay   c = (Mat_SeqSBAIJ*)(C->data);
14749b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
14849b5e25fSSatish Balay     row      = irow[i];
14949b5e25fSSatish Balay     kstart   = ai[row];
15049b5e25fSSatish Balay     kend     = kstart + a->ilen[row];
15149b5e25fSSatish Balay     mat_i    = c->i[i];
15249b5e25fSSatish Balay     mat_j    = c->j + mat_i;
15349b5e25fSSatish Balay     mat_a    = c->a + mat_i*bs2;
15449b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15549b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15649b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15749b5e25fSSatish Balay         *mat_j++ = tcol - 1;
15849b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
15949b5e25fSSatish Balay         mat_a   += bs2;
16049b5e25fSSatish Balay         (*mat_ilen)++;
16149b5e25fSSatish Balay       }
16249b5e25fSSatish Balay     }
16349b5e25fSSatish Balay   }
16449b5e25fSSatish Balay 
16549b5e25fSSatish Balay   /* Free work space */
16649b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16749b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
16849b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17049b5e25fSSatish Balay 
17149b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17249b5e25fSSatish Balay   *B   = C;
17349b5e25fSSatish Balay   PetscFunctionReturn(0);
17449b5e25fSSatish Balay }
17549b5e25fSSatish Balay 
1764a2ae208SSatish Balay #undef __FUNCT__
1774a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1784aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
17949b5e25fSSatish Balay {
18049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18149b5e25fSSatish Balay   IS             is1;
1826849ba73SBarry Smith   PetscErrorCode ierr;
1835d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1845d0c19d7SBarry Smith   const PetscInt *irow;
18549b5e25fSSatish Balay 
18649b5e25fSSatish Balay   PetscFunctionBegin;
187e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
18849b5e25fSSatish Balay 
18949b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19049b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19149b5e25fSSatish Balay 
19249b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19349b5e25fSSatish Balay    and form the IS with compressed IS */
19474ed9c26SBarry Smith   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
19574ed9c26SBarry Smith   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
19649b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19749b5e25fSSatish Balay 
19849b5e25fSSatish Balay   count = 0;
19949b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
200e32f2f54SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20149b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20249b5e25fSSatish Balay   }
20349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20470b3c8c7SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
20574ed9c26SBarry Smith   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
20649b5e25fSSatish Balay 
2074aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
2086bf464f9SBarry Smith   ierr = ISDestroy(&is1);CHKERRQ(ierr);
20949b5e25fSSatish Balay   PetscFunctionReturn(0);
21049b5e25fSSatish Balay }
21149b5e25fSSatish Balay 
2124a2ae208SSatish Balay #undef __FUNCT__
2134a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21413f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21549b5e25fSSatish Balay {
2166849ba73SBarry Smith   PetscErrorCode ierr;
21713f74950SBarry Smith   PetscInt       i;
21849b5e25fSSatish Balay 
21949b5e25fSSatish Balay   PetscFunctionBegin;
22049b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22182502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22249b5e25fSSatish Balay   }
22349b5e25fSSatish Balay 
22449b5e25fSSatish Balay   for (i=0; i<n; i++) {
2254aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
22649b5e25fSSatish Balay   }
22749b5e25fSSatish Balay   PetscFunctionReturn(0);
22849b5e25fSSatish Balay }
22949b5e25fSSatish Balay 
23049b5e25fSSatish Balay /* -------------------------------------------------------*/
23149b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23249b5e25fSSatish Balay /* -------------------------------------------------------*/
23349b5e25fSSatish Balay 
2344a2ae208SSatish Balay #undef __FUNCT__
2354a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
236dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
23749b5e25fSSatish Balay {
23849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
23987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
24049b5e25fSSatish Balay   MatScalar      *v;
2416849ba73SBarry Smith   PetscErrorCode ierr;
24213f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24449b5e25fSSatish Balay 
24549b5e25fSSatish Balay   PetscFunctionBegin;
2462dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2471ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
24949b5e25fSSatish Balay 
25049b5e25fSSatish Balay   v  = a->a;
25149b5e25fSSatish Balay   xb = x;
25249b5e25fSSatish Balay 
25349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25449b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
25549b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1];
25649b5e25fSSatish Balay     ib          = aj + *ai;
257831a3094SHong Zhang     jmin        = 0;
25898c9bda7SSatish Balay     nonzerorow += (n>0);
2597fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
26049b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
26149b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
262831a3094SHong Zhang       v        += 4; jmin++;
2637fbae186SHong Zhang     }
264444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
265444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
266831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
26849b5e25fSSatish Balay       cval       = ib[j]*2;
26949b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2;
27049b5e25fSSatish Balay       z[cval+1] += v[2]*x1 + v[3]*x2;
27149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
27249b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
27349b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
27449b5e25fSSatish Balay       v        += 4;
27549b5e25fSSatish Balay     }
27649b5e25fSSatish Balay     xb +=2; ai++;
27749b5e25fSSatish Balay   }
27849b5e25fSSatish Balay 
2791ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2801ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
281dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
28249b5e25fSSatish Balay   PetscFunctionReturn(0);
28349b5e25fSSatish Balay }
28449b5e25fSSatish Balay 
2854a2ae208SSatish Balay #undef __FUNCT__
2864a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
287dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
28849b5e25fSSatish Balay {
28949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
29087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
29149b5e25fSSatish Balay   MatScalar      *v;
2926849ba73SBarry Smith   PetscErrorCode ierr;
29313f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29549b5e25fSSatish Balay 
29649b5e25fSSatish Balay   PetscFunctionBegin;
2972dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
30049b5e25fSSatish Balay 
30149b5e25fSSatish Balay   v  = a->a;
30249b5e25fSSatish Balay   xb = x;
30349b5e25fSSatish Balay 
30449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30549b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
30649b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
30749b5e25fSSatish Balay     ib          = aj + *ai;
308831a3094SHong Zhang     jmin        = 0;
30998c9bda7SSatish Balay     nonzerorow += (n>0);
3107fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
31149b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
31249b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
31349b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
314831a3094SHong Zhang       v        += 9; jmin++;
3157fbae186SHong Zhang     }
316444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
317444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
318831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
32049b5e25fSSatish Balay       cval       = ib[j]*3;
32149b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
32249b5e25fSSatish Balay       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
32349b5e25fSSatish Balay       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
32449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
32649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
32749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
32849b5e25fSSatish Balay       v        += 9;
32949b5e25fSSatish Balay     }
33049b5e25fSSatish Balay     xb +=3; ai++;
33149b5e25fSSatish Balay   }
33249b5e25fSSatish Balay 
3331ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3341ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
335dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33649b5e25fSSatish Balay   PetscFunctionReturn(0);
33749b5e25fSSatish Balay }
33849b5e25fSSatish Balay 
3394a2ae208SSatish Balay #undef __FUNCT__
3404a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
341dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
34249b5e25fSSatish Balay {
34349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
34487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
34549b5e25fSSatish Balay   MatScalar      *v;
3466849ba73SBarry Smith   PetscErrorCode ierr;
34713f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
34949b5e25fSSatish Balay 
35049b5e25fSSatish Balay   PetscFunctionBegin;
3512dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3521ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3531ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35449b5e25fSSatish Balay 
35549b5e25fSSatish Balay   v  = a->a;
35649b5e25fSSatish Balay   xb = x;
35749b5e25fSSatish Balay 
35849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
35949b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
36049b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
36149b5e25fSSatish Balay     ib          = aj + *ai;
362831a3094SHong Zhang     jmin        = 0;
36398c9bda7SSatish Balay     nonzerorow += (n>0);
3647fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
36549b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36649b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
36749b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
36849b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
369831a3094SHong Zhang       v        += 16; jmin++;
3707fbae186SHong Zhang     }
371444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
372444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
373831a3094SHong Zhang     for (j=jmin; j<n; j++) {
37449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37549b5e25fSSatish Balay       cval       = ib[j]*4;
37649b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
37749b5e25fSSatish Balay       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
37849b5e25fSSatish Balay       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
37949b5e25fSSatish Balay       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
38049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
38149b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
38249b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
38349b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
38449b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38549b5e25fSSatish Balay       v        += 16;
38649b5e25fSSatish Balay     }
38749b5e25fSSatish Balay     xb +=4; ai++;
38849b5e25fSSatish Balay   }
38949b5e25fSSatish Balay 
3901ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3911ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
392dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
39349b5e25fSSatish Balay   PetscFunctionReturn(0);
39449b5e25fSSatish Balay }
39549b5e25fSSatish Balay 
3964a2ae208SSatish Balay #undef __FUNCT__
3974a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
398dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
39949b5e25fSSatish Balay {
40049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
40187828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
40249b5e25fSSatish Balay   MatScalar      *v;
4036849ba73SBarry Smith   PetscErrorCode ierr;
40413f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
40598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
40649b5e25fSSatish Balay 
40749b5e25fSSatish Balay   PetscFunctionBegin;
4082dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4091ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4101ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
41149b5e25fSSatish Balay 
41249b5e25fSSatish Balay   v  = a->a;
41349b5e25fSSatish Balay   xb = x;
41449b5e25fSSatish Balay 
41549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
41749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
41849b5e25fSSatish Balay     ib          = aj + *ai;
419831a3094SHong Zhang     jmin        = 0;
42098c9bda7SSatish Balay     nonzerorow += (n>0);
4217fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
42249b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
42349b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
42449b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
42549b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42649b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
427831a3094SHong Zhang       v        += 25; jmin++;
4287fbae186SHong Zhang     }
429444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
430444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
431831a3094SHong Zhang     for (j=jmin; j<n; j++) {
43249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
43349b5e25fSSatish Balay       cval       = ib[j]*5;
43449b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
43549b5e25fSSatish Balay       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
43649b5e25fSSatish Balay       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
43749b5e25fSSatish Balay       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
43849b5e25fSSatish Balay       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
43949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
44049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
44149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
44249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
44349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
44449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
44549b5e25fSSatish Balay       v        += 25;
44649b5e25fSSatish Balay     }
44749b5e25fSSatish Balay     xb +=5; ai++;
44849b5e25fSSatish Balay   }
44949b5e25fSSatish Balay 
4501ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4511ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
452dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
45349b5e25fSSatish Balay   PetscFunctionReturn(0);
45449b5e25fSSatish Balay }
45549b5e25fSSatish Balay 
45649b5e25fSSatish Balay 
4574a2ae208SSatish Balay #undef __FUNCT__
4584a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
459dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
46049b5e25fSSatish Balay {
46149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
46287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
46349b5e25fSSatish Balay   MatScalar      *v;
4646849ba73SBarry Smith   PetscErrorCode ierr;
46513f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
46698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
46749b5e25fSSatish Balay 
46849b5e25fSSatish Balay   PetscFunctionBegin;
4692dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4701ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
47249b5e25fSSatish Balay 
47349b5e25fSSatish Balay   v  = a->a;
47449b5e25fSSatish Balay   xb = x;
47549b5e25fSSatish Balay 
47649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47749b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
47849b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
47949b5e25fSSatish Balay     ib          = aj + *ai;
480831a3094SHong Zhang     jmin        = 0;
48198c9bda7SSatish Balay     nonzerorow += (n>0);
4827fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
48349b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
48449b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
48549b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
48649b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
48749b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
48849b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
489831a3094SHong Zhang       v        += 36; jmin++;
4907fbae186SHong Zhang     }
491444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
492444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
493831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49549b5e25fSSatish Balay       cval       = ib[j]*6;
49649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
49749b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
49849b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
49949b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
50049b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
50149b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
50249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50349b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
50449b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
50549b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
50649b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
50749b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
50849b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
50949b5e25fSSatish Balay       v        += 36;
51049b5e25fSSatish Balay     }
51149b5e25fSSatish Balay     xb +=6; ai++;
51249b5e25fSSatish Balay   }
51349b5e25fSSatish Balay 
5141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5151ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
516dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
51749b5e25fSSatish Balay   PetscFunctionReturn(0);
51849b5e25fSSatish Balay }
5194a2ae208SSatish Balay #undef __FUNCT__
5204a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
521dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
52249b5e25fSSatish Balay {
52349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
52487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
52549b5e25fSSatish Balay   MatScalar      *v;
5266849ba73SBarry Smith   PetscErrorCode ierr;
52713f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
52898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
52949b5e25fSSatish Balay 
53049b5e25fSSatish Balay   PetscFunctionBegin;
5312dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5321ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
53449b5e25fSSatish Balay 
53549b5e25fSSatish Balay   v  = a->a;
53649b5e25fSSatish Balay   xb = x;
53749b5e25fSSatish Balay 
53849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
53949b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
54049b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
54149b5e25fSSatish Balay     ib          = aj + *ai;
542831a3094SHong Zhang     jmin        = 0;
54398c9bda7SSatish Balay     nonzerorow += (n>0);
5447fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
54549b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
54649b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
54749b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
54849b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
54949b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
55049b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
55149b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
552831a3094SHong Zhang       v        += 49; jmin++;
5537fbae186SHong Zhang     }
554444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
555444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
556831a3094SHong Zhang     for (j=jmin; j<n; j++) {
55749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
55849b5e25fSSatish Balay       cval       = ib[j]*7;
55949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
56049b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
56149b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
56249b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
56349b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
56449b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
56549b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
56649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
56749b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
56849b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
56949b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
57049b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
57149b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
57249b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
57349b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
57449b5e25fSSatish Balay       v       += 49;
57549b5e25fSSatish Balay     }
57649b5e25fSSatish Balay     xb +=7; ai++;
57749b5e25fSSatish Balay   }
5781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
580dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
58149b5e25fSSatish Balay   PetscFunctionReturn(0);
58249b5e25fSSatish Balay }
58349b5e25fSSatish Balay 
58449b5e25fSSatish Balay /*
58549b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
58649b5e25fSSatish Balay */
5874a2ae208SSatish Balay #undef __FUNCT__
5884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
589dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
59049b5e25fSSatish Balay {
59149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
59287828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5930b60a74dSHong Zhang   MatScalar      *v;
594dfbe8321SBarry Smith   PetscErrorCode ierr;
595d0f46423SBarry Smith   PetscInt       mbs       =a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
59698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
59749b5e25fSSatish Balay 
59849b5e25fSSatish Balay   PetscFunctionBegin;
5992dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6001ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6011ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
60249b5e25fSSatish Balay 
60349b5e25fSSatish Balay   aj = a->j;
60449b5e25fSSatish Balay   v  = a->a;
60549b5e25fSSatish Balay   ii = a->i;
60649b5e25fSSatish Balay 
60749b5e25fSSatish Balay   if (!a->mult_work) {
608d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
60949b5e25fSSatish Balay   }
61049b5e25fSSatish Balay   work = a->mult_work;
61149b5e25fSSatish Balay 
61249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61349b5e25fSSatish Balay     n           = ii[1] - ii[0]; ncols = n*bs;
61449b5e25fSSatish Balay     workt       = work; idx=aj+ii[0];
61598c9bda7SSatish Balay     nonzerorow += (n>0);
61649b5e25fSSatish Balay 
61749b5e25fSSatish Balay     /* upper triangular part */
61849b5e25fSSatish Balay     for (j=0; j<n; j++) {
61949b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
62049b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
62149b5e25fSSatish Balay       workt += bs;
62249b5e25fSSatish Balay     }
62349b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
62496b95a6bSBarry Smith     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
62549b5e25fSSatish Balay 
62649b5e25fSSatish Balay     /* strict lower triangular part */
627831a3094SHong Zhang     idx = aj+ii[0];
628831a3094SHong Zhang     if (*idx == i) {
62996b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
630831a3094SHong Zhang     }
63196b9376eSHong Zhang 
63249b5e25fSSatish Balay     if (ncols > 0) {
63349b5e25fSSatish Balay       workt = work;
63487828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
63596b95a6bSBarry Smith       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
636831a3094SHong Zhang       for (j=0; j<n; j++) {
637831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
63849b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k];
63949b5e25fSSatish Balay         workt += bs;
64049b5e25fSSatish Balay       }
64149b5e25fSSatish Balay     }
64249b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
64349b5e25fSSatish Balay   }
64449b5e25fSSatish Balay 
6451ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6461ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
647dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
64849b5e25fSSatish Balay   PetscFunctionReturn(0);
64949b5e25fSSatish Balay }
65049b5e25fSSatish Balay 
6514a2ae208SSatish Balay #undef __FUNCT__
6524a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
653dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
65449b5e25fSSatish Balay {
65549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
656bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
65749b5e25fSSatish Balay   MatScalar      *v;
6586849ba73SBarry Smith   PetscErrorCode ierr;
65913f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
66098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
66149b5e25fSSatish Balay 
66249b5e25fSSatish Balay   PetscFunctionBegin;
663343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
6641ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6651ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
66649b5e25fSSatish Balay   v    = a->a;
66749b5e25fSSatish Balay   xb   = x;
66849b5e25fSSatish Balay 
66949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
67049b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th row of A */
67149b5e25fSSatish Balay     x1          = xb[0];
67249b5e25fSSatish Balay     ib          = aj + *ai;
673831a3094SHong Zhang     jmin        = 0;
67498c9bda7SSatish Balay     nonzerorow += (n>0);
675831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
676831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
677831a3094SHong Zhang     }
678831a3094SHong Zhang     for (j=jmin; j<n; j++) {
67949b5e25fSSatish Balay       cval    = *ib;
68049b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
68149b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
68249b5e25fSSatish Balay     }
68349b5e25fSSatish Balay     xb++; ai++;
68449b5e25fSSatish Balay   }
68549b5e25fSSatish Balay 
6861ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68849b5e25fSSatish Balay 
689dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
69049b5e25fSSatish Balay   PetscFunctionReturn(0);
69149b5e25fSSatish Balay }
69249b5e25fSSatish Balay 
6934a2ae208SSatish Balay #undef __FUNCT__
6944a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
695dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
69649b5e25fSSatish Balay {
69749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
698bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
69949b5e25fSSatish Balay   MatScalar      *v;
7006849ba73SBarry Smith   PetscErrorCode ierr;
70113f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
70298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
70349b5e25fSSatish Balay 
70449b5e25fSSatish Balay   PetscFunctionBegin;
705343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7061ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7071ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70849b5e25fSSatish Balay 
70949b5e25fSSatish Balay   v  = a->a;
71049b5e25fSSatish Balay   xb = x;
71149b5e25fSSatish Balay 
71249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
71349b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
71449b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1];
71549b5e25fSSatish Balay     ib          = aj + *ai;
716831a3094SHong Zhang     jmin        = 0;
71798c9bda7SSatish Balay     nonzerorow += (n>0);
7187fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
71949b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
72049b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
721831a3094SHong Zhang       v        += 4; jmin++;
7227fbae186SHong Zhang     }
723444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
724444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
725831a3094SHong Zhang     for (j=jmin; j<n; j++) {
72649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
72749b5e25fSSatish Balay       cval       = ib[j]*2;
72849b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2;
72949b5e25fSSatish Balay       z[cval+1] += v[2]*x1 + v[3]*x2;
73049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
73149b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
73249b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
73349b5e25fSSatish Balay       v        += 4;
73449b5e25fSSatish Balay     }
73549b5e25fSSatish Balay     xb +=2; ai++;
73649b5e25fSSatish Balay   }
7371ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
738bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
73949b5e25fSSatish Balay 
740dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
74149b5e25fSSatish Balay   PetscFunctionReturn(0);
74249b5e25fSSatish Balay }
74349b5e25fSSatish Balay 
7444a2ae208SSatish Balay #undef __FUNCT__
7454a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
746dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
74749b5e25fSSatish Balay {
74849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
749bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
75049b5e25fSSatish Balay   MatScalar      *v;
7516849ba73SBarry Smith   PetscErrorCode ierr;
75213f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
75398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
75449b5e25fSSatish Balay 
75549b5e25fSSatish Balay   PetscFunctionBegin;
756343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
75949b5e25fSSatish Balay 
76049b5e25fSSatish Balay   v  = a->a;
76149b5e25fSSatish Balay   xb = x;
76249b5e25fSSatish Balay 
76349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
76449b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
76549b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
76649b5e25fSSatish Balay     ib          = aj + *ai;
767831a3094SHong Zhang     jmin        = 0;
76898c9bda7SSatish Balay     nonzerorow += (n>0);
7697fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
77049b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
77149b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
77249b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
773831a3094SHong Zhang       v        += 9; jmin++;
7747fbae186SHong Zhang     }
775444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
776444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
777831a3094SHong Zhang     for (j=jmin; j<n; j++) {
77849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
77949b5e25fSSatish Balay       cval       = ib[j]*3;
78049b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
78149b5e25fSSatish Balay       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
78249b5e25fSSatish Balay       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
78349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
78449b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
78549b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
78649b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
78749b5e25fSSatish Balay       v        += 9;
78849b5e25fSSatish Balay     }
78949b5e25fSSatish Balay     xb +=3; ai++;
79049b5e25fSSatish Balay   }
79149b5e25fSSatish Balay 
7921ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
793bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
79449b5e25fSSatish Balay 
795dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
79649b5e25fSSatish Balay   PetscFunctionReturn(0);
79749b5e25fSSatish Balay }
79849b5e25fSSatish Balay 
7994a2ae208SSatish Balay #undef __FUNCT__
8004a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
801dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
80249b5e25fSSatish Balay {
80349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
804bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
80549b5e25fSSatish Balay   MatScalar      *v;
8066849ba73SBarry Smith   PetscErrorCode ierr;
80713f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
80898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
80949b5e25fSSatish Balay 
81049b5e25fSSatish Balay   PetscFunctionBegin;
811343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8121ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8131ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
81449b5e25fSSatish Balay 
81549b5e25fSSatish Balay   v  = a->a;
81649b5e25fSSatish Balay   xb = x;
81749b5e25fSSatish Balay 
81849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
81949b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
82049b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
82149b5e25fSSatish Balay     ib          = aj + *ai;
822831a3094SHong Zhang     jmin        = 0;
82398c9bda7SSatish Balay     nonzerorow += (n>0);
8247fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
82549b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
82649b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
82749b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
82849b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
829831a3094SHong Zhang       v        += 16; jmin++;
8307fbae186SHong Zhang     }
831444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
832444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
833831a3094SHong Zhang     for (j=jmin; j<n; j++) {
83449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
83549b5e25fSSatish Balay       cval       = ib[j]*4;
83649b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
83749b5e25fSSatish Balay       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
83849b5e25fSSatish Balay       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
83949b5e25fSSatish Balay       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
84049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
84149b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
84249b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
84349b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
84449b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
84549b5e25fSSatish Balay       v        += 16;
84649b5e25fSSatish Balay     }
84749b5e25fSSatish Balay     xb +=4; ai++;
84849b5e25fSSatish Balay   }
84949b5e25fSSatish Balay 
8501ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
851bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
85249b5e25fSSatish Balay 
853dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
85449b5e25fSSatish Balay   PetscFunctionReturn(0);
85549b5e25fSSatish Balay }
85649b5e25fSSatish Balay 
8574a2ae208SSatish Balay #undef __FUNCT__
8584a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
859dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
86049b5e25fSSatish Balay {
86149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
862bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
86349b5e25fSSatish Balay   MatScalar      *v;
8646849ba73SBarry Smith   PetscErrorCode ierr;
86513f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
86698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
86749b5e25fSSatish Balay 
86849b5e25fSSatish Balay   PetscFunctionBegin;
869343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8701ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
87249b5e25fSSatish Balay 
87349b5e25fSSatish Balay   v  = a->a;
87449b5e25fSSatish Balay   xb = x;
87549b5e25fSSatish Balay 
87649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
87749b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
87849b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
87949b5e25fSSatish Balay     ib          = aj + *ai;
880831a3094SHong Zhang     jmin        = 0;
88198c9bda7SSatish Balay     nonzerorow += (n>0);
8827fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
88349b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
88449b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
88549b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
88649b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
88749b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
888831a3094SHong Zhang       v        += 25; jmin++;
8897fbae186SHong Zhang     }
890444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
891444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
892831a3094SHong Zhang     for (j=jmin; j<n; j++) {
89349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
89449b5e25fSSatish Balay       cval       = ib[j]*5;
89549b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
89649b5e25fSSatish Balay       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
89749b5e25fSSatish Balay       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
89849b5e25fSSatish Balay       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
89949b5e25fSSatish Balay       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
90049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
90149b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
90249b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
90349b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
90449b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
90549b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
90649b5e25fSSatish Balay       v        += 25;
90749b5e25fSSatish Balay     }
90849b5e25fSSatish Balay     xb +=5; ai++;
90949b5e25fSSatish Balay   }
91049b5e25fSSatish Balay 
9111ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
912bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
91349b5e25fSSatish Balay 
914dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
91549b5e25fSSatish Balay   PetscFunctionReturn(0);
91649b5e25fSSatish Balay }
9174a2ae208SSatish Balay #undef __FUNCT__
9184a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
919dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
92049b5e25fSSatish Balay {
92149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
922bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
92349b5e25fSSatish Balay   MatScalar      *v;
9246849ba73SBarry Smith   PetscErrorCode ierr;
92513f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
92698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
92749b5e25fSSatish Balay 
92849b5e25fSSatish Balay   PetscFunctionBegin;
929343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9301ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9311ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
93249b5e25fSSatish Balay 
93349b5e25fSSatish Balay   v  = a->a;
93449b5e25fSSatish Balay   xb = x;
93549b5e25fSSatish Balay 
93649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
93749b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
93849b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
93949b5e25fSSatish Balay     ib          = aj + *ai;
940831a3094SHong Zhang     jmin        = 0;
94198c9bda7SSatish Balay     nonzerorow += (n>0);
9427fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
94349b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
94449b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
94549b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
94649b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
94749b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
94849b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
949831a3094SHong Zhang       v        += 36; jmin++;
9507fbae186SHong Zhang     }
951444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
952444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
953831a3094SHong Zhang     for (j=jmin; j<n; j++) {
95449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
95549b5e25fSSatish Balay       cval       = ib[j]*6;
95649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
95749b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
95849b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
95949b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
96049b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
96149b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
96249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96349b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
96449b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
96549b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
96649b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
96749b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
96849b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
96949b5e25fSSatish Balay       v        += 36;
97049b5e25fSSatish Balay     }
97149b5e25fSSatish Balay     xb +=6; ai++;
97249b5e25fSSatish Balay   }
97349b5e25fSSatish Balay 
9741ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
975bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
97649b5e25fSSatish Balay 
977dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
97849b5e25fSSatish Balay   PetscFunctionReturn(0);
97949b5e25fSSatish Balay }
98049b5e25fSSatish Balay 
9814a2ae208SSatish Balay #undef __FUNCT__
9824a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
983dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
98449b5e25fSSatish Balay {
98549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
986bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
98749b5e25fSSatish Balay   MatScalar      *v;
9886849ba73SBarry Smith   PetscErrorCode ierr;
98913f74950SBarry Smith   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
99098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
99149b5e25fSSatish Balay 
99249b5e25fSSatish Balay   PetscFunctionBegin;
993343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9941ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9951ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
99649b5e25fSSatish Balay 
99749b5e25fSSatish Balay   v  = a->a;
99849b5e25fSSatish Balay   xb = x;
99949b5e25fSSatish Balay 
100049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
100149b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
100249b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
100349b5e25fSSatish Balay     ib          = aj + *ai;
1004831a3094SHong Zhang     jmin        = 0;
100598c9bda7SSatish Balay     nonzerorow += (n>0);
10067fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
100749b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
100849b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
100949b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
101049b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
101149b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
101249b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
101349b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1014831a3094SHong Zhang       v        += 49; jmin++;
10157fbae186SHong Zhang     }
1016444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1017444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1018831a3094SHong Zhang     for (j=jmin; j<n; j++) {
101949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
102049b5e25fSSatish Balay       cval       = ib[j]*7;
102149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
102249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
102349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
102449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
102549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
102649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
102749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
102849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
102949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
103049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
103149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
103249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
103349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
103449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
103549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
103649b5e25fSSatish Balay       v       += 49;
103749b5e25fSSatish Balay     }
103849b5e25fSSatish Balay     xb +=7; ai++;
103949b5e25fSSatish Balay   }
104049b5e25fSSatish Balay 
10411ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1042bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
104349b5e25fSSatish Balay 
1044dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
104549b5e25fSSatish Balay   PetscFunctionReturn(0);
104649b5e25fSSatish Balay }
104749b5e25fSSatish Balay 
10484a2ae208SSatish Balay #undef __FUNCT__
10494a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1050dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
105149b5e25fSSatish Balay {
105249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1053bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1054066653e3SSatish Balay   MatScalar      *v;
1055dfbe8321SBarry Smith   PetscErrorCode ierr;
1056d0f46423SBarry Smith   PetscInt       mbs       =a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
105798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
105849b5e25fSSatish Balay 
105949b5e25fSSatish Balay   PetscFunctionBegin;
1060343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
10611ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10621ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
106349b5e25fSSatish Balay 
106449b5e25fSSatish Balay   aj = a->j;
106549b5e25fSSatish Balay   v  = a->a;
106649b5e25fSSatish Balay   ii = a->i;
106749b5e25fSSatish Balay 
106849b5e25fSSatish Balay   if (!a->mult_work) {
1069d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
107049b5e25fSSatish Balay   }
107149b5e25fSSatish Balay   work = a->mult_work;
107249b5e25fSSatish Balay 
107349b5e25fSSatish Balay 
107449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
107549b5e25fSSatish Balay     n           = ii[1] - ii[0]; ncols = n*bs;
107649b5e25fSSatish Balay     workt       = work; idx=aj+ii[0];
107798c9bda7SSatish Balay     nonzerorow += (n>0);
107849b5e25fSSatish Balay 
107949b5e25fSSatish Balay     /* upper triangular part */
108049b5e25fSSatish Balay     for (j=0; j<n; j++) {
108149b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
108249b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
108349b5e25fSSatish Balay       workt += bs;
108449b5e25fSSatish Balay     }
108549b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
108696b95a6bSBarry Smith     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
108749b5e25fSSatish Balay 
108849b5e25fSSatish Balay     /* strict lower triangular part */
1089831a3094SHong Zhang     idx = aj+ii[0];
1090831a3094SHong Zhang     if (*idx == i) {
109196b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1092831a3094SHong Zhang     }
109349b5e25fSSatish Balay     if (ncols > 0) {
109449b5e25fSSatish Balay       workt = work;
109587828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
109696b95a6bSBarry Smith       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1097831a3094SHong Zhang       for (j=0; j<n; j++) {
1098831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
109949b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k];
110049b5e25fSSatish Balay         workt += bs;
110149b5e25fSSatish Balay       }
110249b5e25fSSatish Balay     }
110349b5e25fSSatish Balay 
110449b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
110549b5e25fSSatish Balay   }
110649b5e25fSSatish Balay 
11071ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1108bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
110949b5e25fSSatish Balay 
1110dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
111149b5e25fSSatish Balay   PetscFunctionReturn(0);
111249b5e25fSSatish Balay }
111349b5e25fSSatish Balay 
11144a2ae208SSatish Balay #undef __FUNCT__
11154a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1116f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
111749b5e25fSSatish Balay {
111849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a     = (Mat_SeqSBAIJ*)inA->data;
1119f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1120efee365bSSatish Balay   PetscErrorCode ierr;
1121c5df96a5SBarry Smith   PetscBLASInt   one = 1,totalnz;
112249b5e25fSSatish Balay 
112349b5e25fSSatish Balay   PetscFunctionBegin;
1124c5df96a5SBarry Smith   ierr = PetscBLASIntCast(a->bs2*a->nz,&totalnz);CHKERRQ(ierr);
1125a83cb05cSBarry Smith   PetscStackCall("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one));
1126efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
112749b5e25fSSatish Balay   PetscFunctionReturn(0);
112849b5e25fSSatish Balay }
112949b5e25fSSatish Balay 
11304a2ae208SSatish Balay #undef __FUNCT__
11314a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1132dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
113349b5e25fSSatish Balay {
113449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a       = (Mat_SeqSBAIJ*)A->data;
113549b5e25fSSatish Balay   MatScalar      *v       = a->a;
113649b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1137d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11386849ba73SBarry Smith   PetscErrorCode ierr;
113913f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
114049b5e25fSSatish Balay 
114149b5e25fSSatish Balay   PetscFunctionBegin;
114249b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
114349b5e25fSSatish Balay     for (k=0; k<mbs; k++) {
114449b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1145831a3094SHong Zhang       col  = aj + jmin;
1146831a3094SHong Zhang       if (*col == k) {         /* diagonal block */
114749b5e25fSSatish Balay         for (i=0; i<bs2; i++) {
114849b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
114949b5e25fSSatish Balay         }
1150831a3094SHong Zhang         jmin++;
1151831a3094SHong Zhang       }
1152831a3094SHong Zhang       for (j=jmin; j<jmax; j++) {  /* off-diagonal blocks */
115349b5e25fSSatish Balay         for (i=0; i<bs2; i++) {
115449b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
115549b5e25fSSatish Balay         }
115649b5e25fSSatish Balay       }
115749b5e25fSSatish Balay     }
11588f1a2a5eSBarry Smith     *norm = PetscSqrtReal(sum_diag + 2*sum_off);
11590b8dc8d2SHong Zhang   } else if (type == NORM_INFINITY || type == NORM_1) {  /* maximum row/column sum */
116074ed9c26SBarry Smith     ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
11610b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11620b8dc8d2SHong Zhang     il[0] = 0;
116349b5e25fSSatish Balay 
116449b5e25fSSatish Balay     *norm = 0.0;
116549b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
116649b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
116749b5e25fSSatish Balay       /*-- col sum --*/
116849b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1169831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1170831a3094SHong Zhang                   at step k */
117149b5e25fSSatish Balay       while (i<mbs) {
117249b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
117349b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
117449b5e25fSSatish Balay         for (j=0; j<bs; j++) {
117549b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
117649b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
117749b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
117849b5e25fSSatish Balay           }
117949b5e25fSSatish Balay         }
118049b5e25fSSatish Balay         /* update il, jl */
1181831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1182831a3094SHong Zhang         jmax = a->i[i+1];
118349b5e25fSSatish Balay         if (jmin < jmax) {
118449b5e25fSSatish Balay           il[i] = jmin;
118549b5e25fSSatish Balay           j     = a->j[jmin];
118649b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
118749b5e25fSSatish Balay         }
118849b5e25fSSatish Balay         i = nexti;
118949b5e25fSSatish Balay       }
119049b5e25fSSatish Balay       /*-- row sum --*/
119149b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
119249b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
119349b5e25fSSatish Balay         for (j=0; j<bs; j++) {
119449b5e25fSSatish Balay           v = a->a + i*bs2 + j;
119549b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
11960b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
119749b5e25fSSatish Balay           }
119849b5e25fSSatish Balay         }
119949b5e25fSSatish Balay       }
120049b5e25fSSatish Balay       /* add k_th block row to il, jl */
1201831a3094SHong Zhang       col = aj+jmin;
1202831a3094SHong Zhang       if (*col == k) jmin++;
120349b5e25fSSatish Balay       if (jmin < jmax) {
120449b5e25fSSatish Balay         il[k] = jmin;
12050b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
120649b5e25fSSatish Balay       }
120749b5e25fSSatish Balay       for (j=0; j<bs; j++) {
120849b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
120949b5e25fSSatish Balay       }
121049b5e25fSSatish Balay     }
121174ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
1212f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
121349b5e25fSSatish Balay   PetscFunctionReturn(0);
121449b5e25fSSatish Balay }
121549b5e25fSSatish Balay 
12164a2ae208SSatish Balay #undef __FUNCT__
12174a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1218ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
121949b5e25fSSatish Balay {
122049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data;
1221dfbe8321SBarry Smith   PetscErrorCode ierr;
122249b5e25fSSatish Balay 
122349b5e25fSSatish Balay   PetscFunctionBegin;
122449b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1225d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1226ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1227ef511fbeSHong Zhang     PetscFunctionReturn(0);
122849b5e25fSSatish Balay   }
122949b5e25fSSatish Balay 
123049b5e25fSSatish Balay   /* if the a->i are the same */
123113f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
123226fbe8dcSKarl Rupp   if (!*flg) PetscFunctionReturn(0);
123349b5e25fSSatish Balay 
123449b5e25fSSatish Balay   /* if a->j are the same */
123513f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
123626fbe8dcSKarl Rupp   if (!*flg) PetscFunctionReturn(0);
123726fbe8dcSKarl Rupp 
123849b5e25fSSatish Balay   /* if a->a are the same */
1239d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1240935af2e7SHong Zhang   PetscFunctionReturn(0);
124149b5e25fSSatish Balay }
124249b5e25fSSatish Balay 
12434a2ae208SSatish Balay #undef __FUNCT__
12444a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1245dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
124649b5e25fSSatish Balay {
124749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1248dfbe8321SBarry Smith   PetscErrorCode ierr;
12498a0c6efdSHong Zhang   PetscInt       i,j,k,row,bs,*ai,*aj,ambs,bs2;
125087828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
125149b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
125249b5e25fSSatish Balay 
125349b5e25fSSatish Balay   PetscFunctionBegin;
1254d0f46423SBarry Smith   bs = A->rmap->bs;
1255e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
125682799104SHong Zhang 
125749b5e25fSSatish Balay   aa   = a->a;
12588a0c6efdSHong Zhang   ambs = a->mbs;
12598a0c6efdSHong Zhang 
12608a0c6efdSHong Zhang   if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) {
12618a0c6efdSHong Zhang     PetscInt *diag=a->diag;
12628a0c6efdSHong Zhang     aa   = a->a;
12638a0c6efdSHong Zhang     ambs = a->mbs;
12648a0c6efdSHong Zhang     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
12658a0c6efdSHong Zhang     for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
12668a0c6efdSHong Zhang     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
12678a0c6efdSHong Zhang     PetscFunctionReturn(0);
12688a0c6efdSHong Zhang   }
12698a0c6efdSHong Zhang 
127049b5e25fSSatish Balay   ai   = a->i;
127149b5e25fSSatish Balay   aj   = a->j;
127249b5e25fSSatish Balay   bs2  = a->bs2;
12732dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12741ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
127549b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
127649b5e25fSSatish Balay     j=ai[i];
127749b5e25fSSatish Balay     if (aj[j] == i) {    /* if this is a diagonal element */
127849b5e25fSSatish Balay       row  = i*bs;
127949b5e25fSSatish Balay       aa_j = aa + j*bs2;
128049b5e25fSSatish Balay       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
128149b5e25fSSatish Balay     }
128249b5e25fSSatish Balay   }
12831ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
128449b5e25fSSatish Balay   PetscFunctionReturn(0);
128549b5e25fSSatish Balay }
128649b5e25fSSatish Balay 
12874a2ae208SSatish Balay #undef __FUNCT__
12884a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1289dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
129049b5e25fSSatish Balay {
129149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
12925e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
129349b5e25fSSatish Balay   MatScalar      *aa,*v;
1294dfbe8321SBarry Smith   PetscErrorCode ierr;
12955e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1296ace3abfcSBarry Smith   PetscBool      flg;
129749b5e25fSSatish Balay 
129849b5e25fSSatish Balay   PetscFunctionBegin;
1299b3bf805bSHong Zhang   if (ll != rr) {
1300b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1301e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1302b3bf805bSHong Zhang   }
1303b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
130449b5e25fSSatish Balay   ai  = a->i;
130549b5e25fSSatish Balay   aj  = a->j;
130649b5e25fSSatish Balay   aa  = a->a;
1307d0f46423SBarry Smith   m   = A->rmap->N;
1308d0f46423SBarry Smith   bs  = A->rmap->bs;
130949b5e25fSSatish Balay   mbs = a->mbs;
131049b5e25fSSatish Balay   bs2 = a->bs2;
131149b5e25fSSatish Balay 
13121ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
131349b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1314e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
131549b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
131649b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
131749b5e25fSSatish Balay     li = l + i*bs;
131849b5e25fSSatish Balay     v  = aa + bs2*ai[i];
131949b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
132049b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13215e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
132249b5e25fSSatish Balay         x = ri[k];
132349b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
132449b5e25fSSatish Balay       }
132549b5e25fSSatish Balay     }
132649b5e25fSSatish Balay   }
13271ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1328dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
132949b5e25fSSatish Balay   PetscFunctionReturn(0);
133049b5e25fSSatish Balay }
133149b5e25fSSatish Balay 
13324a2ae208SSatish Balay #undef __FUNCT__
13334a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1334dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
133549b5e25fSSatish Balay {
133649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
133749b5e25fSSatish Balay 
133849b5e25fSSatish Balay   PetscFunctionBegin;
133949b5e25fSSatish Balay   info->block_size   = a->bs2;
1340ceed8ce5SJed Brown   info->nz_allocated = a->bs2*a->maxnz;   /*num. of nonzeros in upper triangular part */
13416c6c5352SBarry Smith   info->nz_used      = a->bs2*a->nz;   /*num. of nonzeros in upper triangular part */
134249b5e25fSSatish Balay   info->nz_unneeded  = (double)(info->nz_allocated - info->nz_used);
134349b5e25fSSatish Balay   info->assemblies   = A->num_ass;
13448e58a170SBarry Smith   info->mallocs      = A->info.mallocs;
13457adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1346d5f3da31SBarry Smith   if (A->factortype) {
134749b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
134849b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
134949b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
135049b5e25fSSatish Balay   } else {
135149b5e25fSSatish Balay     info->fill_ratio_given  = 0;
135249b5e25fSSatish Balay     info->fill_ratio_needed = 0;
135349b5e25fSSatish Balay     info->factor_mallocs    = 0;
135449b5e25fSSatish Balay   }
135549b5e25fSSatish Balay   PetscFunctionReturn(0);
135649b5e25fSSatish Balay }
135749b5e25fSSatish Balay 
135849b5e25fSSatish Balay 
13594a2ae208SSatish Balay #undef __FUNCT__
13604a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1361dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
136249b5e25fSSatish Balay {
136349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1364dfbe8321SBarry Smith   PetscErrorCode ierr;
136549b5e25fSSatish Balay 
136649b5e25fSSatish Balay   PetscFunctionBegin;
136749b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
136849b5e25fSSatish Balay   PetscFunctionReturn(0);
136949b5e25fSSatish Balay }
1370dc354874SHong Zhang 
13714a2ae208SSatish Balay #undef __FUNCT__
1372985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1373985db425SBarry Smith /*
1374985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1375985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1376985db425SBarry Smith */
1377985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1378dc354874SHong Zhang {
1379dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1380dfbe8321SBarry Smith   PetscErrorCode ierr;
138113f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1382c3fca9a7SHong Zhang   PetscReal      atmp;
1383273d9f13SBarry Smith   MatScalar      *aa;
1384985db425SBarry Smith   PetscScalar    *x;
138513f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1386dc354874SHong Zhang 
1387dc354874SHong Zhang   PetscFunctionBegin;
1388e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1389e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1390d0f46423SBarry Smith   bs  = A->rmap->bs;
1391dc354874SHong Zhang   aa  = a->a;
1392dc354874SHong Zhang   ai  = a->i;
1393dc354874SHong Zhang   aj  = a->j;
139444117c81SHong Zhang   mbs = a->mbs;
1395dc354874SHong Zhang 
1396985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
13971ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1398dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1399e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
140044117c81SHong Zhang   for (i=0; i<mbs; i++) {
1401d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1402d0f6400bSHong Zhang     brow  = bs*i;
140344117c81SHong Zhang     for (j=0; j<ncols; j++) {
1404d0f6400bSHong Zhang       bcol = bs*(*aj);
140544117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++) {
1406d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
140744117c81SHong Zhang         for (krow=0; krow<bs; krow++) {
1408d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1409d0f6400bSHong Zhang           row  = brow + krow;   /* row index */
1410c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1411c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
141244117c81SHong Zhang         }
141344117c81SHong Zhang       }
1414d0f6400bSHong Zhang       aj++;
1415dc354874SHong Zhang     }
1416dc354874SHong Zhang   }
14171ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1418dc354874SHong Zhang   PetscFunctionReturn(0);
1419dc354874SHong Zhang }
1420