xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision d94109b8b75ae69c612e05c6b163c5dfa64c1889)
173f4d377SMatthew Knepley /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
449b5e25fSSatish Balay #include "src/inline/spops.h"
549b5e25fSSatish Balay #include "src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
11268466fbSBarry Smith int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
145333f59eSHong Zhang   int          brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol,
15*d94109b8SHong Zhang                start,end,*ai,*aj,bs,*nidx2;
16*d94109b8SHong Zhang   PetscBT      table;
17*d94109b8SHong Zhang   PetscBT      table0;
18*d94109b8SHong Zhang 
19*d94109b8SHong Zhang   PetscFunctionBegin;
20*d94109b8SHong Zhang   mbs = a->mbs;
21*d94109b8SHong Zhang   ai  = a->i;
22*d94109b8SHong Zhang   aj  = a->j;
23*d94109b8SHong Zhang   bs  = a->bs;
24*d94109b8SHong Zhang 
25*d94109b8SHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
26*d94109b8SHong Zhang 
27*d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
28*d94109b8SHong Zhang   ierr = PetscMalloc((mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
29*d94109b8SHong Zhang   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
30*d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
31*d94109b8SHong Zhang 
32*d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
33*d94109b8SHong Zhang     isz  = 0;
34*d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
35*d94109b8SHong Zhang 
36*d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
37*d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
38*d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
39*d94109b8SHong Zhang 
40*d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
41*d94109b8SHong Zhang     for (j=0; j<n ; ++j){
42*d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
43*d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
44*d94109b8SHong Zhang       if(!PetscBTLookupSet(table,brow)) { nidx[isz++] = brow;}
45*d94109b8SHong Zhang     }
46*d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
47*d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
48*d94109b8SHong Zhang 
49*d94109b8SHong Zhang     k = 0;
50*d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
51*d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
52*d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
53*d94109b8SHong Zhang       for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]);
54*d94109b8SHong Zhang 
55*d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
56*d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
57*d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
58*d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
59*d94109b8SHong Zhang           for (l = start; l<end ; l++){
60*d94109b8SHong Zhang             bcol = aj[l];
61*d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
62*d94109b8SHong Zhang           }
63*d94109b8SHong Zhang           k++;
64*d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
65*d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
66*d94109b8SHong Zhang           for (l = start; l<end ; l++){
67*d94109b8SHong Zhang             bcol = aj[l];
68*d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
69*d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
70*d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
71*d94109b8SHong Zhang             }
72*d94109b8SHong Zhang           }
73*d94109b8SHong Zhang         }
74*d94109b8SHong Zhang       }
75*d94109b8SHong Zhang     } /* for each overlap */
76*d94109b8SHong Zhang 
77*d94109b8SHong Zhang     /* expand the Index Set */
78*d94109b8SHong Zhang     for (j=0; j<isz; j++) {
79*d94109b8SHong Zhang       for (k=0; k<bs; k++)
80*d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
81*d94109b8SHong Zhang     }
82*d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
83*d94109b8SHong Zhang   }
84*d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
85*d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
86*d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
87*d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
88*d94109b8SHong Zhang #ifdef VERSION_1
89*d94109b8SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
90*d94109b8SHong Zhang   int          brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol,
915333f59eSHong Zhang                start,end,*ai,*aj,bs,*nidx2,*colsearch;
925eee224dSHong Zhang   PetscBT      table;
935eee224dSHong Zhang 
9449b5e25fSSatish Balay   PetscFunctionBegin;
955eee224dSHong Zhang   mbs = a->mbs;
965eee224dSHong Zhang   ai  = a->i;
975eee224dSHong Zhang   aj  = a->j;
985eee224dSHong Zhang   bs  = a->bs;
995eee224dSHong Zhang 
1005eee224dSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
1015eee224dSHong Zhang 
1025eee224dSHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
1035333f59eSHong Zhang   ierr = PetscMalloc((2*mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
1045333f59eSHong Zhang   colsearch = nidx + mbs + 1;
1055eee224dSHong Zhang   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
1065eee224dSHong Zhang 
1075eee224dSHong Zhang   for (i=0; i<is_max; i++) {
1085333f59eSHong Zhang 
1095eee224dSHong Zhang     /* Initialise the two local arrays */
1105eee224dSHong Zhang     isz  = 0;
1115eee224dSHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
1125eee224dSHong Zhang 
1135eee224dSHong Zhang     /* Extract the indices, assume there can be duplicate entries */
1145eee224dSHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
1155eee224dSHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
1165eee224dSHong Zhang 
1175eee224dSHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
1185eee224dSHong Zhang     for (j=0; j<n ; ++j){
1195eee224dSHong Zhang       bcol = idx[j]/bs; /* convert the indices into block indices */
1205eee224dSHong Zhang       if (bcol >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
1215eee224dSHong Zhang       if(!PetscBTLookupSet(table,bcol)) { nidx[isz++] = bcol;}
1225eee224dSHong Zhang     }
1235eee224dSHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
1245eee224dSHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
1255eee224dSHong Zhang 
1265eee224dSHong Zhang     k = 0;
1275eee224dSHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
1285333f59eSHong Zhang       /* initialize colsearch -
1295333f59eSHong Zhang          colsearch[i] points to the possible non-zero (i,active_col)-th entry in the array aj */
1305333f59eSHong Zhang       for (brow=0; brow<mbs; brow++) colsearch[brow] = ai[brow];
1315333f59eSHong Zhang 
1325333f59eSHong Zhang       /* sort nidx[k:isz-1] - needed by col search */
1335333f59eSHong Zhang       ierr = PetscSortInt(isz-k,nidx+k);
1345333f59eSHong Zhang 
1355eee224dSHong Zhang       n = isz;
1365eee224dSHong Zhang       for (; k<n ; k++){ /* do only those brows in nidx[k], which are not done yet */
1375eee224dSHong Zhang         brow   = nidx[k];
1385333f59eSHong Zhang         /* search block column */
1395333f59eSHong Zhang         for (l=0; l<brow; l++){
1405333f59eSHong Zhang           if (colsearch[l]<ai[l+1]){
1415333f59eSHong Zhang             bcol = aj[colsearch[l]];
1425333f59eSHong Zhang             while (bcol < brow ) {
1435333f59eSHong Zhang               colsearch[l]++;
1445333f59eSHong Zhang               if (colsearch[l]<ai[l+1]){
1455333f59eSHong Zhang                 bcol = aj[colsearch[l]];
1465333f59eSHong Zhang               } else {
1475333f59eSHong Zhang                 bcol = mbs; break;
1485333f59eSHong Zhang               }
1495333f59eSHong Zhang             }
1505333f59eSHong Zhang             if (bcol==brow){
1515333f59eSHong Zhang               if (!PetscBTLookupSet(table,l)) {nidx[isz++] = l;}
1525333f59eSHong Zhang               colsearch[l]++;
1535333f59eSHong Zhang             }
1545333f59eSHong Zhang           }
1555333f59eSHong Zhang         }
1565333f59eSHong Zhang         /* search block row */
1575eee224dSHong Zhang         start = ai[brow];
1585eee224dSHong Zhang         end   = ai[brow+1];
1595eee224dSHong Zhang         for (l = start; l<end ; l++){
1605eee224dSHong Zhang           bcol = aj[l];
1615eee224dSHong Zhang           if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
1625eee224dSHong Zhang         }
1635eee224dSHong Zhang       }
1645333f59eSHong Zhang     } /* for each overlap */
1655333f59eSHong Zhang 
1665eee224dSHong Zhang     /* expand the Index Set */
1675eee224dSHong Zhang     for (j=0; j<isz; j++) {
1685eee224dSHong Zhang       for (k=0; k<bs; k++)
1695eee224dSHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
1705eee224dSHong Zhang     }
1715eee224dSHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
1725eee224dSHong Zhang   }
1735eee224dSHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
1745eee224dSHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
1755eee224dSHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
176*d94109b8SHong Zhang #endif /* VERSION_1 */
1775eee224dSHong Zhang   PetscFunctionReturn(0);
17849b5e25fSSatish Balay }
17949b5e25fSSatish Balay 
1804a2ae208SSatish Balay #undef __FUNCT__
1814a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
18249b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
18349b5e25fSSatish Balay {
18449b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
18549b5e25fSSatish Balay   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
18649b5e25fSSatish Balay   int          row,mat_i,*mat_j,tcol,*mat_ilen;
18749b5e25fSSatish Balay   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
18849b5e25fSSatish Balay   int          *aj = a->j,*ai = a->i;
18949b5e25fSSatish Balay   MatScalar    *mat_a;
19049b5e25fSSatish Balay   Mat          C;
19149b5e25fSSatish Balay   PetscTruth   flag;
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay   PetscFunctionBegin;
19449b5e25fSSatish Balay 
195ac355199SBarry Smith   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
19649b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
197347d480fSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
19849b5e25fSSatish Balay 
19949b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
20049b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
20149b5e25fSSatish Balay 
202b0a32e0cSBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
20349b5e25fSSatish Balay   ssmap = smap;
204b0a32e0cSBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
20549b5e25fSSatish Balay   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
20649b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
20749b5e25fSSatish Balay   /* determine lens of each row */
20849b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
20949b5e25fSSatish Balay     kstart  = ai[irow[i]];
21049b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
21149b5e25fSSatish Balay     lens[i] = 0;
21249b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
21349b5e25fSSatish Balay         if (ssmap[aj[k]]) {
21449b5e25fSSatish Balay           lens[i]++;
21549b5e25fSSatish Balay         }
21649b5e25fSSatish Balay       }
21749b5e25fSSatish Balay     }
21849b5e25fSSatish Balay   /* Create and fill new matrix */
21949b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
22049b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
22149b5e25fSSatish Balay 
222347d480fSBarry Smith     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
22349b5e25fSSatish Balay     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
22449b5e25fSSatish Balay     if (flag == PETSC_FALSE) {
225347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
22649b5e25fSSatish Balay     }
22749b5e25fSSatish Balay     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
22849b5e25fSSatish Balay     C = *B;
22949b5e25fSSatish Balay   } else {
230e2d9671bSKris Buschelman     ierr = MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr);
231e2d9671bSKris Buschelman     ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
232e2d9671bSKris Buschelman     ierr = MatSeqSBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr);
23349b5e25fSSatish Balay   }
23449b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
23549b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
23649b5e25fSSatish Balay     row    = irow[i];
23749b5e25fSSatish Balay     kstart = ai[row];
23849b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
23949b5e25fSSatish Balay     mat_i  = c->i[i];
24049b5e25fSSatish Balay     mat_j  = c->j + mat_i;
24149b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
24249b5e25fSSatish Balay     mat_ilen = c->ilen + i;
24349b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
24449b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
24549b5e25fSSatish Balay         *mat_j++ = tcol - 1;
24649b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
24749b5e25fSSatish Balay         mat_a   += bs2;
24849b5e25fSSatish Balay         (*mat_ilen)++;
24949b5e25fSSatish Balay       }
25049b5e25fSSatish Balay     }
25149b5e25fSSatish Balay   }
25249b5e25fSSatish Balay 
25349b5e25fSSatish Balay   /* Free work space */
25449b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
25549b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
25649b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25749b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25849b5e25fSSatish Balay 
25949b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
26049b5e25fSSatish Balay   *B = C;
26149b5e25fSSatish Balay   PetscFunctionReturn(0);
26249b5e25fSSatish Balay }
26349b5e25fSSatish Balay 
2644a2ae208SSatish Balay #undef __FUNCT__
2654a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
26649b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
26749b5e25fSSatish Balay {
26849b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
26949b5e25fSSatish Balay   IS          is1;
27049b5e25fSSatish Balay   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;
27149b5e25fSSatish Balay 
27249b5e25fSSatish Balay   PetscFunctionBegin;
273ac355199SBarry Smith   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
27449b5e25fSSatish Balay 
27549b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
27649b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
27749b5e25fSSatish Balay 
27849b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
27949b5e25fSSatish Balay    and form the IS with compressed IS */
28082502324SSatish Balay   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
28149b5e25fSSatish Balay   iary = vary + a->mbs;
28249b5e25fSSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
28349b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
28449b5e25fSSatish Balay 
28549b5e25fSSatish Balay   count = 0;
28649b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
287ac355199SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
28849b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
28949b5e25fSSatish Balay   }
29049b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
29149b5e25fSSatish Balay 
29249b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
29349b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
29449b5e25fSSatish Balay 
29549b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
29649b5e25fSSatish Balay   ISDestroy(is1);
29749b5e25fSSatish Balay   PetscFunctionReturn(0);
29849b5e25fSSatish Balay }
29949b5e25fSSatish Balay 
3004a2ae208SSatish Balay #undef __FUNCT__
3014a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
302268466fbSBarry Smith int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
30349b5e25fSSatish Balay {
30449b5e25fSSatish Balay   int ierr,i;
30549b5e25fSSatish Balay 
30649b5e25fSSatish Balay   PetscFunctionBegin;
30749b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
30882502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
30949b5e25fSSatish Balay   }
31049b5e25fSSatish Balay 
31149b5e25fSSatish Balay   for (i=0; i<n; i++) {
31249b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
31349b5e25fSSatish Balay   }
31449b5e25fSSatish Balay   PetscFunctionReturn(0);
31549b5e25fSSatish Balay }
31649b5e25fSSatish Balay 
31749b5e25fSSatish Balay /* -------------------------------------------------------*/
31849b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
31949b5e25fSSatish Balay /* -------------------------------------------------------*/
320d9eff348SSatish Balay #include "petscblaslapack.h"
32149b5e25fSSatish Balay 
3224a2ae208SSatish Balay #undef __FUNCT__
3234a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
32449b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
32549b5e25fSSatish Balay {
32649b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
32787828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,zero=0.0;
32849b5e25fSSatish Balay   MatScalar       *v;
329831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
33049b5e25fSSatish Balay 
33149b5e25fSSatish Balay   PetscFunctionBegin;
33249b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
333b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
334b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
33549b5e25fSSatish Balay 
33649b5e25fSSatish Balay   v  = a->a;
33749b5e25fSSatish Balay   xb = x;
33849b5e25fSSatish Balay 
33949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
34049b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
34149b5e25fSSatish Balay     x1 = xb[0];
34249b5e25fSSatish Balay     ib = aj + *ai;
343831a3094SHong Zhang     jmin = 0;
344831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
345831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
346831a3094SHong Zhang       jmin++;
347831a3094SHong Zhang     }
348831a3094SHong Zhang     for (j=jmin; j<n; j++) {
34949b5e25fSSatish Balay       cval    = *ib;
35049b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
35149b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
35249b5e25fSSatish Balay     }
35349b5e25fSSatish Balay     xb++; ai++;
35449b5e25fSSatish Balay   }
35549b5e25fSSatish Balay 
356b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
357b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
3586c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m) - A->m);  /* nz = (nz+m)/2 */
35949b5e25fSSatish Balay   PetscFunctionReturn(0);
36049b5e25fSSatish Balay }
36149b5e25fSSatish Balay 
3624a2ae208SSatish Balay #undef __FUNCT__
3634a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
36449b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
36549b5e25fSSatish Balay {
36649b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
36787828ca2SBarry Smith   PetscScalar      *x,*z,*xb,x1,x2,zero=0.0;
36849b5e25fSSatish Balay   MatScalar        *v;
369831a3094SHong Zhang   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
37049b5e25fSSatish Balay 
37149b5e25fSSatish Balay 
37249b5e25fSSatish Balay   PetscFunctionBegin;
37349b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
374b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
375b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
37649b5e25fSSatish Balay 
37749b5e25fSSatish Balay   v     = a->a;
37849b5e25fSSatish Balay   xb = x;
37949b5e25fSSatish Balay 
38049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
38149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
38249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
38349b5e25fSSatish Balay     ib = aj + *ai;
384831a3094SHong Zhang     jmin = 0;
3857fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
38649b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
38749b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
388831a3094SHong Zhang       v += 4; jmin++;
3897fbae186SHong Zhang     }
390831a3094SHong Zhang     for (j=jmin; j<n; j++) {
39149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
39249b5e25fSSatish Balay       cval       = ib[j]*2;
39349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
39449b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
39549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
39649b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
39749b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
39849b5e25fSSatish Balay       v  += 4;
39949b5e25fSSatish Balay     }
40049b5e25fSSatish Balay     xb +=2; ai++;
40149b5e25fSSatish Balay   }
40249b5e25fSSatish Balay 
403b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
404b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
4056c6c5352SBarry Smith   PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
40649b5e25fSSatish Balay   PetscFunctionReturn(0);
40749b5e25fSSatish Balay }
40849b5e25fSSatish Balay 
4094a2ae208SSatish Balay #undef __FUNCT__
4104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
41149b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
41249b5e25fSSatish Balay {
41349b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
41487828ca2SBarry Smith   PetscScalar   *x,*z,*xb,x1,x2,x3,zero=0.0;
41549b5e25fSSatish Balay   MatScalar     *v;
416831a3094SHong Zhang   int           mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
41749b5e25fSSatish Balay 
41849b5e25fSSatish Balay 
41949b5e25fSSatish Balay   PetscFunctionBegin;
42049b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
421b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
422b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
42349b5e25fSSatish Balay 
42449b5e25fSSatish Balay   v     = a->a;
42549b5e25fSSatish Balay   xb = x;
42649b5e25fSSatish Balay 
42749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
42849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
42949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
43049b5e25fSSatish Balay     ib = aj + *ai;
431831a3094SHong Zhang     jmin = 0;
4327fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
43349b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
43449b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
43549b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
436831a3094SHong Zhang       v += 9; jmin++;
4377fbae186SHong Zhang     }
438831a3094SHong Zhang     for (j=jmin; j<n; j++) {
43949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
44049b5e25fSSatish Balay       cval       = ib[j]*3;
44149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
44249b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
44349b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
44449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
44549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
44649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
44749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
44849b5e25fSSatish Balay       v  += 9;
44949b5e25fSSatish Balay     }
45049b5e25fSSatish Balay     xb +=3; ai++;
45149b5e25fSSatish Balay   }
45249b5e25fSSatish Balay 
453b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
454b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
4556c6c5352SBarry Smith   PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
45649b5e25fSSatish Balay   PetscFunctionReturn(0);
45749b5e25fSSatish Balay }
45849b5e25fSSatish Balay 
4594a2ae208SSatish Balay #undef __FUNCT__
4604a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
46149b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
46249b5e25fSSatish Balay {
46349b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
46487828ca2SBarry Smith   PetscScalar      *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
46549b5e25fSSatish Balay   MatScalar        *v;
466831a3094SHong Zhang   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
46749b5e25fSSatish Balay 
46849b5e25fSSatish Balay   PetscFunctionBegin;
46949b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
470b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
471b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
47249b5e25fSSatish Balay 
47349b5e25fSSatish Balay   v     = a->a;
47449b5e25fSSatish Balay   xb = x;
47549b5e25fSSatish Balay 
47649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
47849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
47949b5e25fSSatish Balay     ib = aj + *ai;
480831a3094SHong Zhang     jmin = 0;
4817fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
48249b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
48349b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
48449b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
48549b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
486831a3094SHong Zhang       v += 16; jmin++;
4877fbae186SHong Zhang     }
488831a3094SHong Zhang     for (j=jmin; j<n; j++) {
48949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49049b5e25fSSatish Balay       cval       = ib[j]*4;
49149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
49249b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
49349b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
49449b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
49549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
49649b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
49749b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
49849b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
49949b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
50049b5e25fSSatish Balay       v  += 16;
50149b5e25fSSatish Balay     }
50249b5e25fSSatish Balay     xb +=4; ai++;
50349b5e25fSSatish Balay   }
50449b5e25fSSatish Balay 
505b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
506b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
5076c6c5352SBarry Smith   PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
50849b5e25fSSatish Balay   PetscFunctionReturn(0);
50949b5e25fSSatish Balay }
51049b5e25fSSatish Balay 
5114a2ae208SSatish Balay #undef __FUNCT__
5124a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
51349b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
51449b5e25fSSatish Balay {
51549b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
51687828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
51749b5e25fSSatish Balay   MatScalar       *v;
518831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
51949b5e25fSSatish Balay 
52049b5e25fSSatish Balay   PetscFunctionBegin;
52149b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
522b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
523b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
52449b5e25fSSatish Balay 
52549b5e25fSSatish Balay   v     = a->a;
52649b5e25fSSatish Balay   xb = x;
52749b5e25fSSatish Balay 
52849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
52949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
53049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
53149b5e25fSSatish Balay     ib = aj + *ai;
532831a3094SHong Zhang     jmin = 0;
5337fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
53449b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
53549b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
53649b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
53749b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
53849b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
539831a3094SHong Zhang       v += 25; jmin++;
5407fbae186SHong Zhang     }
541831a3094SHong Zhang     for (j=jmin; j<n; j++) {
54249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
54349b5e25fSSatish Balay       cval       = ib[j]*5;
54449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
54549b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
54649b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
54749b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
54849b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
54949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
55049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
55149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
55249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
55349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
55449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
55549b5e25fSSatish Balay       v  += 25;
55649b5e25fSSatish Balay     }
55749b5e25fSSatish Balay     xb +=5; ai++;
55849b5e25fSSatish Balay   }
55949b5e25fSSatish Balay 
560b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
561b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
5626c6c5352SBarry Smith   PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
56349b5e25fSSatish Balay   PetscFunctionReturn(0);
56449b5e25fSSatish Balay }
56549b5e25fSSatish Balay 
56649b5e25fSSatish Balay 
5674a2ae208SSatish Balay #undef __FUNCT__
5684a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
56949b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
57049b5e25fSSatish Balay {
57149b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
57287828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
57349b5e25fSSatish Balay   MatScalar       *v;
574831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
57549b5e25fSSatish Balay 
57649b5e25fSSatish Balay   PetscFunctionBegin;
57749b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
578b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
579b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
58049b5e25fSSatish Balay 
58149b5e25fSSatish Balay   v     = a->a;
58249b5e25fSSatish Balay   xb = x;
58349b5e25fSSatish Balay 
58449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
58549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
58649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
58749b5e25fSSatish Balay     ib = aj + *ai;
588831a3094SHong Zhang     jmin = 0;
5897fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
59049b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
59149b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
59249b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
59349b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
59449b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
59549b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
596831a3094SHong Zhang       v += 36; jmin++;
5977fbae186SHong Zhang     }
598831a3094SHong Zhang     for (j=jmin; j<n; j++) {
59949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
60049b5e25fSSatish Balay       cval       = ib[j]*6;
60149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
60249b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
60349b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
60449b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
60549b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
60649b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
60749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
60849b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
60949b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
61049b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
61149b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
61249b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
61349b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
61449b5e25fSSatish Balay       v  += 36;
61549b5e25fSSatish Balay     }
61649b5e25fSSatish Balay     xb +=6; ai++;
61749b5e25fSSatish Balay   }
61849b5e25fSSatish Balay 
619b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
620b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
6216c6c5352SBarry Smith   PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
62249b5e25fSSatish Balay   PetscFunctionReturn(0);
62349b5e25fSSatish Balay }
6244a2ae208SSatish Balay #undef __FUNCT__
6254a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
62649b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
62749b5e25fSSatish Balay {
62849b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
62987828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
63049b5e25fSSatish Balay   MatScalar       *v;
631831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
63249b5e25fSSatish Balay 
63349b5e25fSSatish Balay   PetscFunctionBegin;
63449b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
635b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
636b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
63749b5e25fSSatish Balay 
63849b5e25fSSatish Balay   v     = a->a;
63949b5e25fSSatish Balay   xb = x;
64049b5e25fSSatish Balay 
64149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
64249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
64349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
64449b5e25fSSatish Balay     ib = aj + *ai;
645831a3094SHong Zhang     jmin = 0;
6467fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
64749b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
64849b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
64949b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
65049b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
65149b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
65249b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
65349b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
654831a3094SHong Zhang       v += 49; jmin++;
6557fbae186SHong Zhang     }
656831a3094SHong Zhang     for (j=jmin; j<n; j++) {
65749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
65849b5e25fSSatish Balay       cval       = ib[j]*7;
65949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
66049b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
66149b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
66249b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
66349b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
66449b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
66549b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
66649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
66749b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
66849b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
66949b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
67049b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
67149b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
67249b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
67349b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
67449b5e25fSSatish Balay       v  += 49;
67549b5e25fSSatish Balay     }
67649b5e25fSSatish Balay     xb +=7; ai++;
67749b5e25fSSatish Balay   }
678b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
679b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
6806c6c5352SBarry Smith   PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
68149b5e25fSSatish Balay   PetscFunctionReturn(0);
68249b5e25fSSatish Balay }
68349b5e25fSSatish Balay 
68449b5e25fSSatish Balay /*
68549b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
68649b5e25fSSatish Balay */
6874a2ae208SSatish Balay #undef __FUNCT__
6884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
68949b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
69049b5e25fSSatish Balay {
69149b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
69287828ca2SBarry Smith   PetscScalar     *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6930b60a74dSHong Zhang   MatScalar       *v;
694066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
6950b60a74dSHong Zhang   int             ncols,k;
69649b5e25fSSatish Balay 
69749b5e25fSSatish Balay   PetscFunctionBegin;
69849b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
699b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x;
700b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z;
70149b5e25fSSatish Balay 
70249b5e25fSSatish Balay   aj   = a->j;
70349b5e25fSSatish Balay   v    = a->a;
70449b5e25fSSatish Balay   ii   = a->i;
70549b5e25fSSatish Balay 
70649b5e25fSSatish Balay   if (!a->mult_work) {
70787828ca2SBarry Smith     ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
70849b5e25fSSatish Balay   }
70949b5e25fSSatish Balay   work = a->mult_work;
71049b5e25fSSatish Balay 
71149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
71249b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
71349b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
71449b5e25fSSatish Balay 
71549b5e25fSSatish Balay     /* upper triangular part */
71649b5e25fSSatish Balay     for (j=0; j<n; j++) {
71749b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
71849b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
71949b5e25fSSatish Balay       workt += bs;
72049b5e25fSSatish Balay     }
72149b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
72249b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
72349b5e25fSSatish Balay 
72449b5e25fSSatish Balay     /* strict lower triangular part */
725831a3094SHong Zhang     idx = aj+ii[0];
726831a3094SHong Zhang     if (*idx == i){
72796b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
728831a3094SHong Zhang     }
72996b9376eSHong Zhang 
73049b5e25fSSatish Balay     if (ncols > 0){
73149b5e25fSSatish Balay       workt = work;
73287828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
733831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
734831a3094SHong Zhang       for (j=0; j<n; j++) {
735831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
73649b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
73749b5e25fSSatish Balay         workt += bs;
73849b5e25fSSatish Balay       }
73949b5e25fSSatish Balay     }
74049b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
74149b5e25fSSatish Balay   }
74249b5e25fSSatish Balay 
743b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
744b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
7456c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
74649b5e25fSSatish Balay   PetscFunctionReturn(0);
74749b5e25fSSatish Balay }
74849b5e25fSSatish Balay 
7494a2ae208SSatish Balay #undef __FUNCT__
7504a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
75149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
75249b5e25fSSatish Balay {
75349b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
75487828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1;
75549b5e25fSSatish Balay   MatScalar       *v;
756831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
75749b5e25fSSatish Balay 
75849b5e25fSSatish Balay   PetscFunctionBegin;
759b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
76049b5e25fSSatish Balay   if (yy != xx) {
761b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
76249b5e25fSSatish Balay   } else {
76349b5e25fSSatish Balay     y = x;
76449b5e25fSSatish Balay   }
76549b5e25fSSatish Balay   if (zz != yy) {
766a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
767b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
768a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
76949b5e25fSSatish Balay   } else {
77049b5e25fSSatish Balay     z = y;
77149b5e25fSSatish Balay   }
77249b5e25fSSatish Balay 
77349b5e25fSSatish Balay   v  = a->a;
77449b5e25fSSatish Balay   xb = x;
77549b5e25fSSatish Balay 
77649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
77749b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
77849b5e25fSSatish Balay     x1 = xb[0];
77949b5e25fSSatish Balay     ib = aj + *ai;
780831a3094SHong Zhang     jmin = 0;
781831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
782831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
783831a3094SHong Zhang     }
784831a3094SHong Zhang     for (j=jmin; j<n; j++) {
78549b5e25fSSatish Balay       cval    = *ib;
78649b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
78749b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
78849b5e25fSSatish Balay     }
78949b5e25fSSatish Balay     xb++; ai++;
79049b5e25fSSatish Balay   }
79149b5e25fSSatish Balay 
792b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
793b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
794b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
79549b5e25fSSatish Balay 
7966c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m));
79749b5e25fSSatish Balay   PetscFunctionReturn(0);
79849b5e25fSSatish Balay }
79949b5e25fSSatish Balay 
8004a2ae208SSatish Balay #undef __FUNCT__
8014a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
80249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
80349b5e25fSSatish Balay {
80449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
80587828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2;
80649b5e25fSSatish Balay   MatScalar       *v;
807831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
80849b5e25fSSatish Balay 
80949b5e25fSSatish Balay   PetscFunctionBegin;
810b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
81149b5e25fSSatish Balay   if (yy != xx) {
812b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
81349b5e25fSSatish Balay   } else {
81449b5e25fSSatish Balay     y = x;
81549b5e25fSSatish Balay   }
81649b5e25fSSatish Balay   if (zz != yy) {
817a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
818b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
819a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
82049b5e25fSSatish Balay   } else {
82149b5e25fSSatish Balay     z = y;
82249b5e25fSSatish Balay   }
82349b5e25fSSatish Balay 
82449b5e25fSSatish Balay   v     = a->a;
82549b5e25fSSatish Balay   xb = x;
82649b5e25fSSatish Balay 
82749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
82849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
82949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
83049b5e25fSSatish Balay     ib = aj + *ai;
831831a3094SHong Zhang     jmin = 0;
8327fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
83349b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
83449b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
835831a3094SHong Zhang       v += 4; jmin++;
8367fbae186SHong Zhang     }
837831a3094SHong Zhang     for (j=jmin; j<n; j++) {
83849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
83949b5e25fSSatish Balay       cval       = ib[j]*2;
84049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
84149b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
84249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
84349b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
84449b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
84549b5e25fSSatish Balay       v  += 4;
84649b5e25fSSatish Balay     }
84749b5e25fSSatish Balay     xb +=2; ai++;
84849b5e25fSSatish Balay   }
84949b5e25fSSatish Balay 
850b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
851b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
852b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
85349b5e25fSSatish Balay 
8546c6c5352SBarry Smith   PetscLogFlops(4*(a->nz*2 - A->m));
85549b5e25fSSatish Balay   PetscFunctionReturn(0);
85649b5e25fSSatish Balay }
85749b5e25fSSatish Balay 
8584a2ae208SSatish Balay #undef __FUNCT__
8594a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
86049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
86149b5e25fSSatish Balay {
86249b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
86387828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3;
86449b5e25fSSatish Balay   MatScalar       *v;
865831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
86649b5e25fSSatish Balay 
86749b5e25fSSatish Balay   PetscFunctionBegin;
868b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
86949b5e25fSSatish Balay   if (yy != xx) {
870b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
87149b5e25fSSatish Balay   } else {
87249b5e25fSSatish Balay     y = x;
87349b5e25fSSatish Balay   }
87449b5e25fSSatish Balay   if (zz != yy) {
875a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
876b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
877a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
87849b5e25fSSatish Balay   } else {
87949b5e25fSSatish Balay     z = y;
88049b5e25fSSatish Balay   }
88149b5e25fSSatish Balay 
88249b5e25fSSatish Balay   v     = a->a;
88349b5e25fSSatish Balay   xb = x;
88449b5e25fSSatish Balay 
88549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
88649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
88749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
88849b5e25fSSatish Balay     ib = aj + *ai;
889831a3094SHong Zhang     jmin = 0;
8907fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
89149b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
89249b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
89349b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
894831a3094SHong Zhang      v += 9; jmin++;
8957fbae186SHong Zhang     }
896831a3094SHong Zhang     for (j=jmin; j<n; j++) {
89749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
89849b5e25fSSatish Balay       cval       = ib[j]*3;
89949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
90049b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
90149b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
90249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
90349b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
90449b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
90549b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
90649b5e25fSSatish Balay       v  += 9;
90749b5e25fSSatish Balay     }
90849b5e25fSSatish Balay     xb +=3; ai++;
90949b5e25fSSatish Balay   }
91049b5e25fSSatish Balay 
911b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
912b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
913b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
91449b5e25fSSatish Balay 
9156c6c5352SBarry Smith   PetscLogFlops(18*(a->nz*2 - A->m));
91649b5e25fSSatish Balay   PetscFunctionReturn(0);
91749b5e25fSSatish Balay }
91849b5e25fSSatish Balay 
9194a2ae208SSatish Balay #undef __FUNCT__
9204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
92149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
92249b5e25fSSatish Balay {
92349b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
92487828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4;
92549b5e25fSSatish Balay   MatScalar       *v;
926831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
92749b5e25fSSatish Balay 
92849b5e25fSSatish Balay   PetscFunctionBegin;
929b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
93049b5e25fSSatish Balay   if (yy != xx) {
931b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
93249b5e25fSSatish Balay   } else {
93349b5e25fSSatish Balay     y = x;
93449b5e25fSSatish Balay   }
93549b5e25fSSatish Balay   if (zz != yy) {
936a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
937b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
938a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
93949b5e25fSSatish Balay   } else {
94049b5e25fSSatish Balay     z = y;
94149b5e25fSSatish Balay   }
94249b5e25fSSatish Balay 
94349b5e25fSSatish Balay   v     = a->a;
94449b5e25fSSatish Balay   xb = x;
94549b5e25fSSatish Balay 
94649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
94749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
94849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
94949b5e25fSSatish Balay     ib = aj + *ai;
950831a3094SHong Zhang     jmin = 0;
9517fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
95249b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
95349b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
95449b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
95549b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
956831a3094SHong Zhang       v += 16; jmin++;
9577fbae186SHong Zhang     }
958831a3094SHong Zhang     for (j=jmin; j<n; j++) {
95949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
96049b5e25fSSatish Balay       cval       = ib[j]*4;
96149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
96249b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
96349b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
96449b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
96549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96649b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
96749b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
96849b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
96949b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
97049b5e25fSSatish Balay       v  += 16;
97149b5e25fSSatish Balay     }
97249b5e25fSSatish Balay     xb +=4; ai++;
97349b5e25fSSatish Balay   }
97449b5e25fSSatish Balay 
975b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
976b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
977b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
97849b5e25fSSatish Balay 
9796c6c5352SBarry Smith   PetscLogFlops(32*(a->nz*2 - A->m));
98049b5e25fSSatish Balay   PetscFunctionReturn(0);
98149b5e25fSSatish Balay }
98249b5e25fSSatish Balay 
9834a2ae208SSatish Balay #undef __FUNCT__
9844a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
98549b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
98649b5e25fSSatish Balay {
98749b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
98887828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5;
98949b5e25fSSatish Balay   MatScalar       *v;
990831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
99149b5e25fSSatish Balay 
99249b5e25fSSatish Balay   PetscFunctionBegin;
993b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
99449b5e25fSSatish Balay   if (yy != xx) {
995b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
99649b5e25fSSatish Balay   } else {
99749b5e25fSSatish Balay     y = x;
99849b5e25fSSatish Balay   }
99949b5e25fSSatish Balay   if (zz != yy) {
1000a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1001b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
1002a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
100349b5e25fSSatish Balay   } else {
100449b5e25fSSatish Balay     z = y;
100549b5e25fSSatish Balay   }
100649b5e25fSSatish Balay 
100749b5e25fSSatish Balay   v     = a->a;
100849b5e25fSSatish Balay   xb = x;
100949b5e25fSSatish Balay 
101049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
101149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
101249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
101349b5e25fSSatish Balay     ib = aj + *ai;
1014831a3094SHong Zhang     jmin = 0;
10157fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
101649b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
101749b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
101849b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
101949b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
102049b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
1021831a3094SHong Zhang       v += 25; jmin++;
10227fbae186SHong Zhang     }
1023831a3094SHong Zhang     for (j=jmin; j<n; j++) {
102449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
102549b5e25fSSatish Balay       cval       = ib[j]*5;
102649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
102749b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
102849b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
102949b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
103049b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
103149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
103249b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
103349b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
103449b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
103549b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
103649b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
103749b5e25fSSatish Balay       v  += 25;
103849b5e25fSSatish Balay     }
103949b5e25fSSatish Balay     xb +=5; ai++;
104049b5e25fSSatish Balay   }
104149b5e25fSSatish Balay 
1042b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1043b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1044b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
104549b5e25fSSatish Balay 
10466c6c5352SBarry Smith   PetscLogFlops(50*(a->nz*2 - A->m));
104749b5e25fSSatish Balay   PetscFunctionReturn(0);
104849b5e25fSSatish Balay }
10494a2ae208SSatish Balay #undef __FUNCT__
10504a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
105149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
105249b5e25fSSatish Balay {
105349b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
105487828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
105549b5e25fSSatish Balay   MatScalar       *v;
1056831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
105749b5e25fSSatish Balay 
105849b5e25fSSatish Balay   PetscFunctionBegin;
1059b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
106049b5e25fSSatish Balay   if (yy != xx) {
1061b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
106249b5e25fSSatish Balay   } else {
106349b5e25fSSatish Balay     y = x;
106449b5e25fSSatish Balay   }
106549b5e25fSSatish Balay   if (zz != yy) {
1066a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1067b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
1068a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
106949b5e25fSSatish Balay   } else {
107049b5e25fSSatish Balay     z = y;
107149b5e25fSSatish Balay   }
107249b5e25fSSatish Balay 
107349b5e25fSSatish Balay   v     = a->a;
107449b5e25fSSatish Balay   xb = x;
107549b5e25fSSatish Balay 
107649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
107749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
107849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
107949b5e25fSSatish Balay     ib = aj + *ai;
1080831a3094SHong Zhang     jmin = 0;
10817fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
108249b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
108349b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
108449b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
108549b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
108649b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
108749b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1088831a3094SHong Zhang       v += 36; jmin++;
10897fbae186SHong Zhang     }
1090831a3094SHong Zhang     for (j=jmin; j<n; j++) {
109149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
109249b5e25fSSatish Balay       cval       = ib[j]*6;
109349b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
109449b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
109549b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
109649b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
109749b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
109849b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
109949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
110049b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
110149b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
110249b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
110349b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
110449b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
110549b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
110649b5e25fSSatish Balay       v  += 36;
110749b5e25fSSatish Balay     }
110849b5e25fSSatish Balay     xb +=6; ai++;
110949b5e25fSSatish Balay   }
111049b5e25fSSatish Balay 
1111b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1112b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1113b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
111449b5e25fSSatish Balay 
11156c6c5352SBarry Smith   PetscLogFlops(72*(a->nz*2 - A->m));
111649b5e25fSSatish Balay   PetscFunctionReturn(0);
111749b5e25fSSatish Balay }
111849b5e25fSSatish Balay 
11194a2ae208SSatish Balay #undef __FUNCT__
11204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
112149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
112249b5e25fSSatish Balay {
112349b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
112487828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
112549b5e25fSSatish Balay   MatScalar       *v;
1126831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
112749b5e25fSSatish Balay 
112849b5e25fSSatish Balay   PetscFunctionBegin;
1129b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
113049b5e25fSSatish Balay   if (yy != xx) {
1131b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
113249b5e25fSSatish Balay   } else {
113349b5e25fSSatish Balay     y = x;
113449b5e25fSSatish Balay   }
113549b5e25fSSatish Balay   if (zz != yy) {
1136a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1137b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
1138a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
113949b5e25fSSatish Balay   } else {
114049b5e25fSSatish Balay     z = y;
114149b5e25fSSatish Balay   }
114249b5e25fSSatish Balay 
114349b5e25fSSatish Balay   v     = a->a;
114449b5e25fSSatish Balay   xb = x;
114549b5e25fSSatish Balay 
114649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
114749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
114849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
114949b5e25fSSatish Balay     ib = aj + *ai;
1150831a3094SHong Zhang     jmin = 0;
11517fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
115249b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
115349b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
115449b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
115549b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
115649b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
115749b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
115849b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1159831a3094SHong Zhang       v += 49; jmin++;
11607fbae186SHong Zhang     }
1161831a3094SHong Zhang     for (j=jmin; j<n; j++) {
116249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
116349b5e25fSSatish Balay       cval       = ib[j]*7;
116449b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
116549b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
116649b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
116749b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
116849b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
116949b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
117049b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
117149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
117249b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
117349b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
117449b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
117549b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
117649b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
117749b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
117849b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
117949b5e25fSSatish Balay       v  += 49;
118049b5e25fSSatish Balay     }
118149b5e25fSSatish Balay     xb +=7; ai++;
118249b5e25fSSatish Balay   }
118349b5e25fSSatish Balay 
1184b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1185b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1186b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
118749b5e25fSSatish Balay 
11886c6c5352SBarry Smith   PetscLogFlops(98*(a->nz*2 - A->m));
118949b5e25fSSatish Balay   PetscFunctionReturn(0);
119049b5e25fSSatish Balay }
119149b5e25fSSatish Balay 
11924a2ae208SSatish Balay #undef __FUNCT__
11934a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
119449b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
119549b5e25fSSatish Balay {
119649b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
119787828ca2SBarry Smith   PetscScalar     *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1198066653e3SSatish Balay   MatScalar       *v;
1199066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1200066653e3SSatish Balay   int             ncols,k;
120149b5e25fSSatish Balay 
120249b5e25fSSatish Balay   PetscFunctionBegin;
1203b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x;
120449b5e25fSSatish Balay   if (yy != xx) {
1205b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
120649b5e25fSSatish Balay   } else {
120749b5e25fSSatish Balay     y = x;
120849b5e25fSSatish Balay   }
120949b5e25fSSatish Balay   if (zz != yy) {
1210a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1211b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z;
1212a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
121349b5e25fSSatish Balay   } else {
121449b5e25fSSatish Balay     z = y;
121549b5e25fSSatish Balay   }
121649b5e25fSSatish Balay 
121749b5e25fSSatish Balay   aj   = a->j;
121849b5e25fSSatish Balay   v    = a->a;
121949b5e25fSSatish Balay   ii   = a->i;
122049b5e25fSSatish Balay 
122149b5e25fSSatish Balay   if (!a->mult_work) {
122287828ca2SBarry Smith     ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
122349b5e25fSSatish Balay   }
122449b5e25fSSatish Balay   work = a->mult_work;
122549b5e25fSSatish Balay 
122649b5e25fSSatish Balay 
122749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
122849b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
122949b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
123049b5e25fSSatish Balay 
123149b5e25fSSatish Balay     /* upper triangular part */
123249b5e25fSSatish Balay     for (j=0; j<n; j++) {
123349b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
123449b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
123549b5e25fSSatish Balay       workt += bs;
123649b5e25fSSatish Balay     }
123749b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
123849b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
123949b5e25fSSatish Balay 
124049b5e25fSSatish Balay     /* strict lower triangular part */
1241831a3094SHong Zhang     idx = aj+ii[0];
1242831a3094SHong Zhang     if (*idx == i){
124396b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1244831a3094SHong Zhang     }
124549b5e25fSSatish Balay     if (ncols > 0){
124649b5e25fSSatish Balay       workt = work;
124787828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1248831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1249831a3094SHong Zhang       for (j=0; j<n; j++) {
1250831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
1251831a3094SHong Zhang         /* idx++; */
125249b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
125349b5e25fSSatish Balay         workt += bs;
125449b5e25fSSatish Balay       }
125549b5e25fSSatish Balay     }
125649b5e25fSSatish Balay 
125749b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
125849b5e25fSSatish Balay   }
125949b5e25fSSatish Balay 
1260b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1261b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1262b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
126349b5e25fSSatish Balay 
12646c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m));
126549b5e25fSSatish Balay   PetscFunctionReturn(0);
126649b5e25fSSatish Balay }
126749b5e25fSSatish Balay 
12684a2ae208SSatish Balay #undef __FUNCT__
12694a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqSBAIJ"
127049b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
127149b5e25fSSatish Balay {
1272eeeff2ecSHong Zhang   int ierr;
1273eeeff2ecSHong Zhang 
127449b5e25fSSatish Balay   PetscFunctionBegin;
1275eeeff2ecSHong Zhang   ierr = MatMult(A,xx,zz);CHKERRQ(ierr);
1276eeeff2ecSHong Zhang   PetscFunctionReturn(0);
127749b5e25fSSatish Balay }
127849b5e25fSSatish Balay 
12794a2ae208SSatish Balay #undef __FUNCT__
12804a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ"
128149b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
128249b5e25fSSatish Balay 
128349b5e25fSSatish Balay {
1284eeeff2ecSHong Zhang   int ierr;
1285eeeff2ecSHong Zhang 
128649b5e25fSSatish Balay   PetscFunctionBegin;
1287eeeff2ecSHong Zhang   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1288eeeff2ecSHong Zhang   PetscFunctionReturn(0);
128949b5e25fSSatish Balay }
129049b5e25fSSatish Balay 
12914a2ae208SSatish Balay #undef __FUNCT__
12924a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1293268466fbSBarry Smith int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA)
129449b5e25fSSatish Balay {
129549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
12966c6c5352SBarry Smith   int         one = 1,totalnz = a->bs2*a->nz;
129749b5e25fSSatish Balay 
129849b5e25fSSatish Balay   PetscFunctionBegin;
1299268466fbSBarry Smith   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1300b0a32e0cSBarry Smith   PetscLogFlops(totalnz);
130149b5e25fSSatish Balay   PetscFunctionReturn(0);
130249b5e25fSSatish Balay }
130349b5e25fSSatish Balay 
13044a2ae208SSatish Balay #undef __FUNCT__
13054a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
130649b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
130749b5e25fSSatish Balay {
130849b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
130949b5e25fSSatish Balay   MatScalar   *v = a->a;
131049b5e25fSSatish Balay   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1311831a3094SHong Zhang   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1312831a3094SHong Zhang   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
131349b5e25fSSatish Balay 
131449b5e25fSSatish Balay   PetscFunctionBegin;
131549b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
131649b5e25fSSatish Balay     for (k=0; k<mbs; k++){
131749b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1318831a3094SHong Zhang       col  = aj + jmin;
1319831a3094SHong Zhang       if (*col == k){         /* diagonal block */
132049b5e25fSSatish Balay         for (i=0; i<bs2; i++){
132149b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
132249b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
132349b5e25fSSatish Balay #else
132449b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
132549b5e25fSSatish Balay #endif
132649b5e25fSSatish Balay         }
1327831a3094SHong Zhang         jmin++;
1328831a3094SHong Zhang       }
1329831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
133049b5e25fSSatish Balay         for (i=0; i<bs2; i++){
133149b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
133249b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
133349b5e25fSSatish Balay #else
133449b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
133549b5e25fSSatish Balay #endif
133649b5e25fSSatish Balay         }
133749b5e25fSSatish Balay       }
133849b5e25fSSatish Balay     }
133949b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
134049b5e25fSSatish Balay 
134149b5e25fSSatish Balay   }  else if (type == NORM_INFINITY) { /* maximum row sum */
134282502324SSatish Balay     ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr);
134382502324SSatish Balay     ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr);
134482502324SSatish Balay     ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr);
134549b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
134649b5e25fSSatish Balay       jl[i] = mbs; il[0] = 0;
134749b5e25fSSatish Balay     }
134849b5e25fSSatish Balay 
134949b5e25fSSatish Balay     *norm = 0.0;
135049b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
135149b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
135249b5e25fSSatish Balay 
135349b5e25fSSatish Balay       /*-- col sum --*/
135449b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1355831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1356831a3094SHong Zhang                   at step k */
135749b5e25fSSatish Balay       while (i<mbs){
135849b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
135949b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
136049b5e25fSSatish Balay         for (j=0; j<bs; j++){
136149b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
136249b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
136349b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
136449b5e25fSSatish Balay           }
136549b5e25fSSatish Balay         }
136649b5e25fSSatish Balay         /* update il, jl */
1367831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1368831a3094SHong Zhang         jmax = a->i[i+1];
136949b5e25fSSatish Balay         if (jmin < jmax){
137049b5e25fSSatish Balay           il[i] = jmin;
137149b5e25fSSatish Balay           j   = a->j[jmin];
137249b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
137349b5e25fSSatish Balay         }
137449b5e25fSSatish Balay         i = nexti;
137549b5e25fSSatish Balay       }
137649b5e25fSSatish Balay 
137749b5e25fSSatish Balay       /*-- row sum --*/
137849b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
137949b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
138049b5e25fSSatish Balay         for (j=0; j<bs; j++){
138149b5e25fSSatish Balay           v = a->a + i*bs2 + j;
138249b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
138349b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v);
138449b5e25fSSatish Balay             v   += bs;
138549b5e25fSSatish Balay           }
138649b5e25fSSatish Balay         }
138749b5e25fSSatish Balay       }
138849b5e25fSSatish Balay       /* add k_th block row to il, jl */
1389831a3094SHong Zhang       col = aj+jmin;
1390831a3094SHong Zhang       if (*col == k) jmin++;
139149b5e25fSSatish Balay       if (jmin < jmax){
139249b5e25fSSatish Balay         il[k] = jmin;
139349b5e25fSSatish Balay         j   = a->j[jmin];
139449b5e25fSSatish Balay         jl[k] = jl[j]; jl[j] = k;
139549b5e25fSSatish Balay       }
139649b5e25fSSatish Balay       for (j=0; j<bs; j++){
139749b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
139849b5e25fSSatish Balay       }
139949b5e25fSSatish Balay     }
140049b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
140149b5e25fSSatish Balay     ierr = PetscFree(jl);CHKERRQ(ierr);
140249b5e25fSSatish Balay     ierr = PetscFree(sum);CHKERRQ(ierr);
140349b5e25fSSatish Balay   } else {
1404347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
140549b5e25fSSatish Balay   }
140649b5e25fSSatish Balay   PetscFunctionReturn(0);
140749b5e25fSSatish Balay }
140849b5e25fSSatish Balay 
14094a2ae208SSatish Balay #undef __FUNCT__
14104a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
141149b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
141249b5e25fSSatish Balay {
141349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
141449b5e25fSSatish Balay   int          ierr;
141549b5e25fSSatish Balay 
141649b5e25fSSatish Balay   PetscFunctionBegin;
141749b5e25fSSatish Balay 
141849b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
14196c6c5352SBarry Smith   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1420ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1421ef511fbeSHong Zhang     PetscFunctionReturn(0);
142249b5e25fSSatish Balay   }
142349b5e25fSSatish Balay 
142449b5e25fSSatish Balay   /* if the a->i are the same */
142549b5e25fSSatish Balay   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
142649b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
142749b5e25fSSatish Balay     PetscFunctionReturn(0);
142849b5e25fSSatish Balay   }
142949b5e25fSSatish Balay 
143049b5e25fSSatish Balay   /* if a->j are the same */
14316c6c5352SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
143249b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
143349b5e25fSSatish Balay     PetscFunctionReturn(0);
143449b5e25fSSatish Balay   }
143549b5e25fSSatish Balay   /* if a->a are the same */
14366c6c5352SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
143749b5e25fSSatish Balay 
1438935af2e7SHong Zhang   PetscFunctionReturn(0);
143949b5e25fSSatish Balay }
144049b5e25fSSatish Balay 
14414a2ae208SSatish Balay #undef __FUNCT__
14424a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
144349b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
144449b5e25fSSatish Balay {
144549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
144649b5e25fSSatish Balay   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
144787828ca2SBarry Smith   PetscScalar  *x,zero = 0.0;
144849b5e25fSSatish Balay   MatScalar    *aa,*aa_j;
144949b5e25fSSatish Balay 
145049b5e25fSSatish Balay   PetscFunctionBegin;
145149b5e25fSSatish Balay   bs   = a->bs;
145282799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
145382799104SHong Zhang 
145449b5e25fSSatish Balay   aa   = a->a;
145549b5e25fSSatish Balay   ai   = a->i;
145649b5e25fSSatish Balay   aj   = a->j;
145749b5e25fSSatish Balay   ambs = a->mbs;
145849b5e25fSSatish Balay   bs2  = a->bs2;
145949b5e25fSSatish Balay 
146049b5e25fSSatish Balay   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1461b1d4fb26SBarry Smith   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
146249b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1463ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
146449b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
146549b5e25fSSatish Balay     j=ai[i];
146649b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
146749b5e25fSSatish Balay       row  = i*bs;
146849b5e25fSSatish Balay       aa_j = aa + j*bs2;
146982799104SHong Zhang       if (A->factor && bs==1){
147082799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
147182799104SHong Zhang       } else {
147249b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
147349b5e25fSSatish Balay       }
147449b5e25fSSatish Balay     }
147582799104SHong Zhang   }
147682799104SHong Zhang 
1477b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
147849b5e25fSSatish Balay   PetscFunctionReturn(0);
147949b5e25fSSatish Balay }
148049b5e25fSSatish Balay 
14814a2ae208SSatish Balay #undef __FUNCT__
14824a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
148349b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
148449b5e25fSSatish Balay {
148549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
148687828ca2SBarry Smith   PetscScalar  *l,*r,x,*li,*ri;
148749b5e25fSSatish Balay   MatScalar    *aa,*v;
1488066653e3SSatish Balay   int          ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;
148949b5e25fSSatish Balay 
149049b5e25fSSatish Balay   PetscFunctionBegin;
149149b5e25fSSatish Balay   ai  = a->i;
149249b5e25fSSatish Balay   aj  = a->j;
149349b5e25fSSatish Balay   aa  = a->a;
1494ef511fbeSHong Zhang   m   = A->m;
149549b5e25fSSatish Balay   bs  = a->bs;
149649b5e25fSSatish Balay   mbs = a->mbs;
149749b5e25fSSatish Balay   bs2 = a->bs2;
149849b5e25fSSatish Balay 
149949b5e25fSSatish Balay   if (ll != rr) {
1500347d480fSBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
150149b5e25fSSatish Balay   }
150249b5e25fSSatish Balay   if (ll) {
1503b1d4fb26SBarry Smith     ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr);
150449b5e25fSSatish Balay     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1505347d480fSBarry Smith     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
150649b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
150749b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
150849b5e25fSSatish Balay       li = l + i*bs;
150949b5e25fSSatish Balay       v  = aa + bs2*ai[i];
151049b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
151149b5e25fSSatish Balay         for (k=0; k<bs2; k++) {
151249b5e25fSSatish Balay           (*v++) *= li[k%bs];
151349b5e25fSSatish Balay         }
151449b5e25fSSatish Balay #ifdef CONT
151549b5e25fSSatish Balay         /* will be used to replace the above loop */
151649b5e25fSSatish Balay         ri = l + bs*aj[ai[i]+j];
151749b5e25fSSatish Balay         for (k=0; k<bs; k++) { /* column value */
151849b5e25fSSatish Balay           x = ri[k];
151949b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
152049b5e25fSSatish Balay         }
152149b5e25fSSatish Balay #endif
152249b5e25fSSatish Balay 
152349b5e25fSSatish Balay       }
152449b5e25fSSatish Balay     }
1525b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr);
15266c6c5352SBarry Smith     PetscLogFlops(2*a->nz);
152749b5e25fSSatish Balay   }
152849b5e25fSSatish Balay   /* will be deleted */
152949b5e25fSSatish Balay   if (rr) {
1530b1d4fb26SBarry Smith     ierr = VecGetArrayFast(rr,&r);CHKERRQ(ierr);
153149b5e25fSSatish Balay     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1532347d480fSBarry Smith     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
153349b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
153449b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
153549b5e25fSSatish Balay       v  = aa + bs2*ai[i];
153649b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
153749b5e25fSSatish Balay         ri = r + bs*aj[ai[i]+j];
153849b5e25fSSatish Balay         for (k=0; k<bs; k++) {
153949b5e25fSSatish Balay           x = ri[k];
154049b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
154149b5e25fSSatish Balay         }
154249b5e25fSSatish Balay       }
154349b5e25fSSatish Balay     }
1544b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(rr,&r);CHKERRQ(ierr);
15456c6c5352SBarry Smith     PetscLogFlops(a->nz);
154649b5e25fSSatish Balay   }
154749b5e25fSSatish Balay   PetscFunctionReturn(0);
154849b5e25fSSatish Balay }
154949b5e25fSSatish Balay 
15504a2ae208SSatish Balay #undef __FUNCT__
15514a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
155249b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
155349b5e25fSSatish Balay {
155449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
155549b5e25fSSatish Balay 
155649b5e25fSSatish Balay   PetscFunctionBegin;
1557ef511fbeSHong Zhang   info->rows_global    = (double)A->m;
1558ef511fbeSHong Zhang   info->columns_global = (double)A->m;
1559ef511fbeSHong Zhang   info->rows_local     = (double)A->m;
1560ef511fbeSHong Zhang   info->columns_local  = (double)A->m;
156149b5e25fSSatish Balay   info->block_size     = a->bs2;
15626c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
15636c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
156449b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
156549b5e25fSSatish Balay   info->assemblies   = A->num_ass;
156649b5e25fSSatish Balay   info->mallocs      = a->reallocs;
156749b5e25fSSatish Balay   info->memory       = A->mem;
156849b5e25fSSatish Balay   if (A->factor) {
156949b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
157049b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
157149b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
157249b5e25fSSatish Balay   } else {
157349b5e25fSSatish Balay     info->fill_ratio_given  = 0;
157449b5e25fSSatish Balay     info->fill_ratio_needed = 0;
157549b5e25fSSatish Balay     info->factor_mallocs    = 0;
157649b5e25fSSatish Balay   }
157749b5e25fSSatish Balay   PetscFunctionReturn(0);
157849b5e25fSSatish Balay }
157949b5e25fSSatish Balay 
158049b5e25fSSatish Balay 
15814a2ae208SSatish Balay #undef __FUNCT__
15824a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
158349b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A)
158449b5e25fSSatish Balay {
158549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
158649b5e25fSSatish Balay   int         ierr;
158749b5e25fSSatish Balay 
158849b5e25fSSatish Balay   PetscFunctionBegin;
158949b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
159049b5e25fSSatish Balay   PetscFunctionReturn(0);
159149b5e25fSSatish Balay }
1592dc354874SHong Zhang 
15934a2ae208SSatish Balay #undef __FUNCT__
15944a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ"
1595dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1596dc354874SHong Zhang {
1597dc354874SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1598d0f6400bSHong Zhang   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1599c3fca9a7SHong Zhang   PetscReal    atmp;
1600273d9f13SBarry Smith   MatScalar    *aa;
160187828ca2SBarry Smith   PetscScalar  zero = 0.0,*x;
1602d0f6400bSHong Zhang   int          ncols,brow,bcol,krow,kcol;
1603dc354874SHong Zhang 
1604dc354874SHong Zhang   PetscFunctionBegin;
1605dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1606dc354874SHong Zhang   bs   = a->bs;
1607dc354874SHong Zhang   aa   = a->a;
1608dc354874SHong Zhang   ai   = a->i;
1609dc354874SHong Zhang   aj   = a->j;
161044117c81SHong Zhang   mbs = a->mbs;
1611dc354874SHong Zhang 
1612dc354874SHong Zhang   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1613b1d4fb26SBarry Smith   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
1614dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1615ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
161644117c81SHong Zhang   for (i=0; i<mbs; i++) {
1617d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1618d0f6400bSHong Zhang     brow  = bs*i;
161944117c81SHong Zhang     for (j=0; j<ncols; j++){
1620d0f6400bSHong Zhang       bcol = bs*(*aj);
162144117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1622d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
162344117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1624d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1625d0f6400bSHong Zhang           row = brow + krow;    /* row index */
162644117c81SHong Zhang           /* printf("val[%d,%d]: %g\n",row,col,atmp); */
1627c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1628c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
162944117c81SHong Zhang         }
163044117c81SHong Zhang       }
1631d0f6400bSHong Zhang       aj++;
1632dc354874SHong Zhang     }
1633dc354874SHong Zhang   }
1634b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
1635dc354874SHong Zhang   PetscFunctionReturn(0);
1636dc354874SHong Zhang }
1637