173f4d377SMatthew Knepley /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/ 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 449b5e25fSSatish Balay #include "src/inline/spops.h" 549b5e25fSSatish Balay #include "src/inline/ilu.h" 649b5e25fSSatish Balay #include "petscbt.h" 73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 849b5e25fSSatish Balay 9*5eee224dSHong Zhang /* MatIncreaseOverlap_SeqSBAIJ() is vertually same as MatIncreaseOverlap_SeqBAIJ() 10*5eee224dSHong Zhang except in the line 1: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 11*5eee224dSHong Zhang and is slightly polished. 12*5eee224dSHong Zhang Should the two function be combined into a single piece of code? */ 134a2ae208SSatish Balay #undef __FUNCT__ 144a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 15268466fbSBarry Smith int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov) 1649b5e25fSSatish Balay { 17*5eee224dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 18*5eee224dSHong Zhang int brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol; 19*5eee224dSHong Zhang int start,end,*ai,*aj,bs,*nidx2; 20*5eee224dSHong Zhang PetscBT table; 21*5eee224dSHong Zhang 2249b5e25fSSatish Balay PetscFunctionBegin; 23*5eee224dSHong Zhang mbs = a->mbs; 24*5eee224dSHong Zhang ai = a->i; 25*5eee224dSHong Zhang aj = a->j; 26*5eee224dSHong Zhang bs = a->bs; 27*5eee224dSHong Zhang 28*5eee224dSHong Zhang if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 29*5eee224dSHong Zhang 30*5eee224dSHong Zhang ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr); 31*5eee224dSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 32*5eee224dSHong Zhang ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr); 33*5eee224dSHong Zhang 34*5eee224dSHong Zhang for (i=0; i<is_max; i++) { 35*5eee224dSHong Zhang /* Initialise the two local arrays */ 36*5eee224dSHong Zhang isz = 0; 37*5eee224dSHong Zhang ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr); 38*5eee224dSHong Zhang 39*5eee224dSHong Zhang /* Extract the indices, assume there can be duplicate entries */ 40*5eee224dSHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 41*5eee224dSHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 42*5eee224dSHong Zhang 43*5eee224dSHong Zhang /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */ 44*5eee224dSHong Zhang for (j=0; j<n ; ++j){ 45*5eee224dSHong Zhang bcol = idx[j]/bs; /* convert the indices into block indices */ 46*5eee224dSHong Zhang if (bcol >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 47*5eee224dSHong Zhang if(!PetscBTLookupSet(table,bcol)) { nidx[isz++] = bcol;} 48*5eee224dSHong Zhang } 49*5eee224dSHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 50*5eee224dSHong Zhang ierr = ISDestroy(is[i]);CHKERRQ(ierr); 51*5eee224dSHong Zhang 52*5eee224dSHong Zhang k = 0; 53*5eee224dSHong Zhang for (j=0; j<ov; j++){ /* for each overlap*/ 54*5eee224dSHong Zhang n = isz; 55*5eee224dSHong Zhang for (; k<n ; k++){ /* do only those brows in nidx[k], which are not done yet */ 56*5eee224dSHong Zhang brow = nidx[k]; 57*5eee224dSHong Zhang start = ai[brow]; 58*5eee224dSHong Zhang end = ai[brow+1]; 59*5eee224dSHong Zhang for (l = start; l<end ; l++){ 60*5eee224dSHong Zhang bcol = aj[l]; 61*5eee224dSHong Zhang if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;} 62*5eee224dSHong Zhang } 63*5eee224dSHong Zhang } 64*5eee224dSHong Zhang } 65*5eee224dSHong Zhang /* expand the Index Set */ 66*5eee224dSHong Zhang for (j=0; j<isz; j++) { 67*5eee224dSHong Zhang for (k=0; k<bs; k++) 68*5eee224dSHong Zhang nidx2[j*bs+k] = nidx[j]*bs+k; 69*5eee224dSHong Zhang } 70*5eee224dSHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 71*5eee224dSHong Zhang } 72*5eee224dSHong Zhang ierr = PetscBTDestroy(table);CHKERRQ(ierr); 73*5eee224dSHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 74*5eee224dSHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 75*5eee224dSHong Zhang PetscFunctionReturn(0); 7649b5e25fSSatish Balay } 7749b5e25fSSatish Balay 784a2ae208SSatish Balay #undef __FUNCT__ 794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 8049b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 8149b5e25fSSatish Balay { 8249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 8349b5e25fSSatish Balay int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 8449b5e25fSSatish Balay int row,mat_i,*mat_j,tcol,*mat_ilen; 8549b5e25fSSatish Balay int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 8649b5e25fSSatish Balay int *aj = a->j,*ai = a->i; 8749b5e25fSSatish Balay MatScalar *mat_a; 8849b5e25fSSatish Balay Mat C; 8949b5e25fSSatish Balay PetscTruth flag; 9049b5e25fSSatish Balay 9149b5e25fSSatish Balay PetscFunctionBegin; 9249b5e25fSSatish Balay 93ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 9449b5e25fSSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 95347d480fSBarry Smith if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 9649b5e25fSSatish Balay 9749b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 9849b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 9949b5e25fSSatish Balay 100b0a32e0cSBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr); 10149b5e25fSSatish Balay ssmap = smap; 102b0a32e0cSBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr); 10349b5e25fSSatish Balay ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 10449b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 10549b5e25fSSatish Balay /* determine lens of each row */ 10649b5e25fSSatish Balay for (i=0; i<nrows; i++) { 10749b5e25fSSatish Balay kstart = ai[irow[i]]; 10849b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 10949b5e25fSSatish Balay lens[i] = 0; 11049b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 11149b5e25fSSatish Balay if (ssmap[aj[k]]) { 11249b5e25fSSatish Balay lens[i]++; 11349b5e25fSSatish Balay } 11449b5e25fSSatish Balay } 11549b5e25fSSatish Balay } 11649b5e25fSSatish Balay /* Create and fill new matrix */ 11749b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 11849b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 11949b5e25fSSatish Balay 120347d480fSBarry Smith if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 12149b5e25fSSatish Balay ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 12249b5e25fSSatish Balay if (flag == PETSC_FALSE) { 123347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 12449b5e25fSSatish Balay } 12549b5e25fSSatish Balay ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 12649b5e25fSSatish Balay C = *B; 12749b5e25fSSatish Balay } else { 128e2d9671bSKris Buschelman ierr = MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr); 129e2d9671bSKris Buschelman ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 130e2d9671bSKris Buschelman ierr = MatSeqSBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr); 13149b5e25fSSatish Balay } 13249b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 13349b5e25fSSatish Balay for (i=0; i<nrows; i++) { 13449b5e25fSSatish Balay row = irow[i]; 13549b5e25fSSatish Balay kstart = ai[row]; 13649b5e25fSSatish Balay kend = kstart + a->ilen[row]; 13749b5e25fSSatish Balay mat_i = c->i[i]; 13849b5e25fSSatish Balay mat_j = c->j + mat_i; 13949b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 14049b5e25fSSatish Balay mat_ilen = c->ilen + i; 14149b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 14249b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 14349b5e25fSSatish Balay *mat_j++ = tcol - 1; 14449b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 14549b5e25fSSatish Balay mat_a += bs2; 14649b5e25fSSatish Balay (*mat_ilen)++; 14749b5e25fSSatish Balay } 14849b5e25fSSatish Balay } 14949b5e25fSSatish Balay } 15049b5e25fSSatish Balay 15149b5e25fSSatish Balay /* Free work space */ 15249b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 15349b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 15449b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15549b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15649b5e25fSSatish Balay 15749b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 15849b5e25fSSatish Balay *B = C; 15949b5e25fSSatish Balay PetscFunctionReturn(0); 16049b5e25fSSatish Balay } 16149b5e25fSSatish Balay 1624a2ae208SSatish Balay #undef __FUNCT__ 1634a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 16449b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 16549b5e25fSSatish Balay { 16649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 16749b5e25fSSatish Balay IS is1; 16849b5e25fSSatish Balay int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 16949b5e25fSSatish Balay 17049b5e25fSSatish Balay PetscFunctionBegin; 171ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 17249b5e25fSSatish Balay 17349b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 17449b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 17549b5e25fSSatish Balay 17649b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 17749b5e25fSSatish Balay and form the IS with compressed IS */ 17882502324SSatish Balay ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr); 17949b5e25fSSatish Balay iary = vary + a->mbs; 18049b5e25fSSatish Balay ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 18149b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 18249b5e25fSSatish Balay 18349b5e25fSSatish Balay count = 0; 18449b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 185ac355199SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks"); 18649b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 18749b5e25fSSatish Balay } 18849b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 18949b5e25fSSatish Balay 19049b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 19149b5e25fSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 19249b5e25fSSatish Balay 19349b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 19449b5e25fSSatish Balay ISDestroy(is1); 19549b5e25fSSatish Balay PetscFunctionReturn(0); 19649b5e25fSSatish Balay } 19749b5e25fSSatish Balay 1984a2ae208SSatish Balay #undef __FUNCT__ 1994a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 200268466fbSBarry Smith int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 20149b5e25fSSatish Balay { 20249b5e25fSSatish Balay int ierr,i; 20349b5e25fSSatish Balay 20449b5e25fSSatish Balay PetscFunctionBegin; 20549b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 20682502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 20749b5e25fSSatish Balay } 20849b5e25fSSatish Balay 20949b5e25fSSatish Balay for (i=0; i<n; i++) { 21049b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 21149b5e25fSSatish Balay } 21249b5e25fSSatish Balay PetscFunctionReturn(0); 21349b5e25fSSatish Balay } 21449b5e25fSSatish Balay 21549b5e25fSSatish Balay /* -------------------------------------------------------*/ 21649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 21749b5e25fSSatish Balay /* -------------------------------------------------------*/ 218d9eff348SSatish Balay #include "petscblaslapack.h" 21949b5e25fSSatish Balay 2204a2ae208SSatish Balay #undef __FUNCT__ 2214a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1" 22249b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 22349b5e25fSSatish Balay { 22449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,zero=0.0; 22649b5e25fSSatish Balay MatScalar *v; 227831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 22849b5e25fSSatish Balay 22949b5e25fSSatish Balay PetscFunctionBegin; 23049b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 231b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 232b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 23349b5e25fSSatish Balay 23449b5e25fSSatish Balay v = a->a; 23549b5e25fSSatish Balay xb = x; 23649b5e25fSSatish Balay 23749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 23849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 23949b5e25fSSatish Balay x1 = xb[0]; 24049b5e25fSSatish Balay ib = aj + *ai; 241831a3094SHong Zhang jmin = 0; 242831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 243831a3094SHong Zhang z[i] += *v++ * x[*ib++]; 244831a3094SHong Zhang jmin++; 245831a3094SHong Zhang } 246831a3094SHong Zhang for (j=jmin; j<n; j++) { 24749b5e25fSSatish Balay cval = *ib; 24849b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 24949b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 25049b5e25fSSatish Balay } 25149b5e25fSSatish Balay xb++; ai++; 25249b5e25fSSatish Balay } 25349b5e25fSSatish Balay 254b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 255b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 2566c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m) - A->m); /* nz = (nz+m)/2 */ 25749b5e25fSSatish Balay PetscFunctionReturn(0); 25849b5e25fSSatish Balay } 25949b5e25fSSatish Balay 2604a2ae208SSatish Balay #undef __FUNCT__ 2614a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 26249b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 26349b5e25fSSatish Balay { 26449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 26587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 26649b5e25fSSatish Balay MatScalar *v; 267831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 26849b5e25fSSatish Balay 26949b5e25fSSatish Balay 27049b5e25fSSatish Balay PetscFunctionBegin; 27149b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 272b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 273b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 27449b5e25fSSatish Balay 27549b5e25fSSatish Balay v = a->a; 27649b5e25fSSatish Balay xb = x; 27749b5e25fSSatish Balay 27849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 27949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 28049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 28149b5e25fSSatish Balay ib = aj + *ai; 282831a3094SHong Zhang jmin = 0; 2837fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 28449b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 28549b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 286831a3094SHong Zhang v += 4; jmin++; 2877fbae186SHong Zhang } 288831a3094SHong Zhang for (j=jmin; j<n; j++) { 28949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 29049b5e25fSSatish Balay cval = ib[j]*2; 29149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 29249b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 29349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 29449b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 29549b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 29649b5e25fSSatish Balay v += 4; 29749b5e25fSSatish Balay } 29849b5e25fSSatish Balay xb +=2; ai++; 29949b5e25fSSatish Balay } 30049b5e25fSSatish Balay 301b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 302b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 3036c6c5352SBarry Smith PetscLogFlops(8*(a->nz*2 - A->m) - A->m); 30449b5e25fSSatish Balay PetscFunctionReturn(0); 30549b5e25fSSatish Balay } 30649b5e25fSSatish Balay 3074a2ae208SSatish Balay #undef __FUNCT__ 3084a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 30949b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 31049b5e25fSSatish Balay { 31149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 31287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 31349b5e25fSSatish Balay MatScalar *v; 314831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 31549b5e25fSSatish Balay 31649b5e25fSSatish Balay 31749b5e25fSSatish Balay PetscFunctionBegin; 31849b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 319b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 320b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 32149b5e25fSSatish Balay 32249b5e25fSSatish Balay v = a->a; 32349b5e25fSSatish Balay xb = x; 32449b5e25fSSatish Balay 32549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 32649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 32749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 32849b5e25fSSatish Balay ib = aj + *ai; 329831a3094SHong Zhang jmin = 0; 3307fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 33149b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 33249b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 33349b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 334831a3094SHong Zhang v += 9; jmin++; 3357fbae186SHong Zhang } 336831a3094SHong Zhang for (j=jmin; j<n; j++) { 33749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 33849b5e25fSSatish Balay cval = ib[j]*3; 33949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 34049b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 34149b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 34249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 34349b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 34449b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 34549b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 34649b5e25fSSatish Balay v += 9; 34749b5e25fSSatish Balay } 34849b5e25fSSatish Balay xb +=3; ai++; 34949b5e25fSSatish Balay } 35049b5e25fSSatish Balay 351b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 352b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 3536c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m) - A->m); 35449b5e25fSSatish Balay PetscFunctionReturn(0); 35549b5e25fSSatish Balay } 35649b5e25fSSatish Balay 3574a2ae208SSatish Balay #undef __FUNCT__ 3584a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 35949b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 36049b5e25fSSatish Balay { 36149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 36287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 36349b5e25fSSatish Balay MatScalar *v; 364831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 36549b5e25fSSatish Balay 36649b5e25fSSatish Balay PetscFunctionBegin; 36749b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 368b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 369b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 37049b5e25fSSatish Balay 37149b5e25fSSatish Balay v = a->a; 37249b5e25fSSatish Balay xb = x; 37349b5e25fSSatish Balay 37449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 37549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 37649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 37749b5e25fSSatish Balay ib = aj + *ai; 378831a3094SHong Zhang jmin = 0; 3797fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 38049b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 38149b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 38249b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 38349b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 384831a3094SHong Zhang v += 16; jmin++; 3857fbae186SHong Zhang } 386831a3094SHong Zhang for (j=jmin; j<n; j++) { 38749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 38849b5e25fSSatish Balay cval = ib[j]*4; 38949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 39049b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 39149b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 39249b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 39349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 39449b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 39549b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 39649b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 39749b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 39849b5e25fSSatish Balay v += 16; 39949b5e25fSSatish Balay } 40049b5e25fSSatish Balay xb +=4; ai++; 40149b5e25fSSatish Balay } 40249b5e25fSSatish Balay 403b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 404b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 4056c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m) - A->m); 40649b5e25fSSatish Balay PetscFunctionReturn(0); 40749b5e25fSSatish Balay } 40849b5e25fSSatish Balay 4094a2ae208SSatish Balay #undef __FUNCT__ 4104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 41149b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 41249b5e25fSSatish Balay { 41349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41487828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 41549b5e25fSSatish Balay MatScalar *v; 416831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 41749b5e25fSSatish Balay 41849b5e25fSSatish Balay PetscFunctionBegin; 41949b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 420b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 421b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 42249b5e25fSSatish Balay 42349b5e25fSSatish Balay v = a->a; 42449b5e25fSSatish Balay xb = x; 42549b5e25fSSatish Balay 42649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 42749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 42849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 42949b5e25fSSatish Balay ib = aj + *ai; 430831a3094SHong Zhang jmin = 0; 4317fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 43249b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 43349b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 43449b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 43549b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 43649b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 437831a3094SHong Zhang v += 25; jmin++; 4387fbae186SHong Zhang } 439831a3094SHong Zhang for (j=jmin; j<n; j++) { 44049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 44149b5e25fSSatish Balay cval = ib[j]*5; 44249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 44349b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 44449b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 44549b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 44649b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 44749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 44849b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 44949b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 45049b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 45149b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 45249b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 45349b5e25fSSatish Balay v += 25; 45449b5e25fSSatish Balay } 45549b5e25fSSatish Balay xb +=5; ai++; 45649b5e25fSSatish Balay } 45749b5e25fSSatish Balay 458b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 459b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 4606c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m) - A->m); 46149b5e25fSSatish Balay PetscFunctionReturn(0); 46249b5e25fSSatish Balay } 46349b5e25fSSatish Balay 46449b5e25fSSatish Balay 4654a2ae208SSatish Balay #undef __FUNCT__ 4664a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 46749b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 46849b5e25fSSatish Balay { 46949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 47087828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 47149b5e25fSSatish Balay MatScalar *v; 472831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 47349b5e25fSSatish Balay 47449b5e25fSSatish Balay PetscFunctionBegin; 47549b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 476b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 477b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 47849b5e25fSSatish Balay 47949b5e25fSSatish Balay v = a->a; 48049b5e25fSSatish Balay xb = x; 48149b5e25fSSatish Balay 48249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 48349b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 48449b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 48549b5e25fSSatish Balay ib = aj + *ai; 486831a3094SHong Zhang jmin = 0; 4877fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 48849b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 48949b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 49049b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 49149b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 49249b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 49349b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 494831a3094SHong Zhang v += 36; jmin++; 4957fbae186SHong Zhang } 496831a3094SHong Zhang for (j=jmin; j<n; j++) { 49749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 49849b5e25fSSatish Balay cval = ib[j]*6; 49949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 50049b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 50149b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 50249b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 50349b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 50449b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 50549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 50649b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 50749b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 50849b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 50949b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 51049b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 51149b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 51249b5e25fSSatish Balay v += 36; 51349b5e25fSSatish Balay } 51449b5e25fSSatish Balay xb +=6; ai++; 51549b5e25fSSatish Balay } 51649b5e25fSSatish Balay 517b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 518b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 5196c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m) - A->m); 52049b5e25fSSatish Balay PetscFunctionReturn(0); 52149b5e25fSSatish Balay } 5224a2ae208SSatish Balay #undef __FUNCT__ 5234a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 52449b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 52549b5e25fSSatish Balay { 52649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 52787828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 52849b5e25fSSatish Balay MatScalar *v; 529831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 53049b5e25fSSatish Balay 53149b5e25fSSatish Balay PetscFunctionBegin; 53249b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 533b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 534b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 53549b5e25fSSatish Balay 53649b5e25fSSatish Balay v = a->a; 53749b5e25fSSatish Balay xb = x; 53849b5e25fSSatish Balay 53949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 54049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 54149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 54249b5e25fSSatish Balay ib = aj + *ai; 543831a3094SHong Zhang jmin = 0; 5447fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 54549b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 54649b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 54749b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 54849b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 54949b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 55049b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 55149b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 552831a3094SHong Zhang v += 49; jmin++; 5537fbae186SHong Zhang } 554831a3094SHong Zhang for (j=jmin; j<n; j++) { 55549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 55649b5e25fSSatish Balay cval = ib[j]*7; 55749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 55849b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 55949b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 56049b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 56149b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 56249b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 56349b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 56449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 56549b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 56649b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 56749b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 56849b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 56949b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 57049b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 57149b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 57249b5e25fSSatish Balay v += 49; 57349b5e25fSSatish Balay } 57449b5e25fSSatish Balay xb +=7; ai++; 57549b5e25fSSatish Balay } 576b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 577b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 5786c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m) - A->m); 57949b5e25fSSatish Balay PetscFunctionReturn(0); 58049b5e25fSSatish Balay } 58149b5e25fSSatish Balay 58249b5e25fSSatish Balay /* 58349b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 58449b5e25fSSatish Balay */ 5854a2ae208SSatish Balay #undef __FUNCT__ 5864a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 58749b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 58849b5e25fSSatish Balay { 58949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59087828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 5910b60a74dSHong Zhang MatScalar *v; 592066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 5930b60a74dSHong Zhang int ncols,k; 59449b5e25fSSatish Balay 59549b5e25fSSatish Balay PetscFunctionBegin; 59649b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 597b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x; 598b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z; 59949b5e25fSSatish Balay 60049b5e25fSSatish Balay aj = a->j; 60149b5e25fSSatish Balay v = a->a; 60249b5e25fSSatish Balay ii = a->i; 60349b5e25fSSatish Balay 60449b5e25fSSatish Balay if (!a->mult_work) { 60587828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 60649b5e25fSSatish Balay } 60749b5e25fSSatish Balay work = a->mult_work; 60849b5e25fSSatish Balay 60949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 61049b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 61149b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 61249b5e25fSSatish Balay 61349b5e25fSSatish Balay /* upper triangular part */ 61449b5e25fSSatish Balay for (j=0; j<n; j++) { 61549b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 61649b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 61749b5e25fSSatish Balay workt += bs; 61849b5e25fSSatish Balay } 61949b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 62049b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 62149b5e25fSSatish Balay 62249b5e25fSSatish Balay /* strict lower triangular part */ 623831a3094SHong Zhang idx = aj+ii[0]; 624831a3094SHong Zhang if (*idx == i){ 62596b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 626831a3094SHong Zhang } 62796b9376eSHong Zhang 62849b5e25fSSatish Balay if (ncols > 0){ 62949b5e25fSSatish Balay workt = work; 63087828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 631831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 632831a3094SHong Zhang for (j=0; j<n; j++) { 633831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 63449b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 63549b5e25fSSatish Balay workt += bs; 63649b5e25fSSatish Balay } 63749b5e25fSSatish Balay } 63849b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 63949b5e25fSSatish Balay } 64049b5e25fSSatish Balay 641b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 642b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 6436c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m); 64449b5e25fSSatish Balay PetscFunctionReturn(0); 64549b5e25fSSatish Balay } 64649b5e25fSSatish Balay 6474a2ae208SSatish Balay #undef __FUNCT__ 6484a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 64949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 65049b5e25fSSatish Balay { 65149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 65287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1; 65349b5e25fSSatish Balay MatScalar *v; 654831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 65549b5e25fSSatish Balay 65649b5e25fSSatish Balay PetscFunctionBegin; 657b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 65849b5e25fSSatish Balay if (yy != xx) { 659b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 66049b5e25fSSatish Balay } else { 66149b5e25fSSatish Balay y = x; 66249b5e25fSSatish Balay } 66349b5e25fSSatish Balay if (zz != yy) { 664a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 665b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 666a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 66749b5e25fSSatish Balay } else { 66849b5e25fSSatish Balay z = y; 66949b5e25fSSatish Balay } 67049b5e25fSSatish Balay 67149b5e25fSSatish Balay v = a->a; 67249b5e25fSSatish Balay xb = x; 67349b5e25fSSatish Balay 67449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 67549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 67649b5e25fSSatish Balay x1 = xb[0]; 67749b5e25fSSatish Balay ib = aj + *ai; 678831a3094SHong Zhang jmin = 0; 679831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 680831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 681831a3094SHong Zhang } 682831a3094SHong Zhang for (j=jmin; j<n; j++) { 68349b5e25fSSatish Balay cval = *ib; 68449b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 68549b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 68649b5e25fSSatish Balay } 68749b5e25fSSatish Balay xb++; ai++; 68849b5e25fSSatish Balay } 68949b5e25fSSatish Balay 690b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 691b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 692b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 69349b5e25fSSatish Balay 6946c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 69549b5e25fSSatish Balay PetscFunctionReturn(0); 69649b5e25fSSatish Balay } 69749b5e25fSSatish Balay 6984a2ae208SSatish Balay #undef __FUNCT__ 6994a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 70049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 70149b5e25fSSatish Balay { 70249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 70387828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2; 70449b5e25fSSatish Balay MatScalar *v; 705831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 70649b5e25fSSatish Balay 70749b5e25fSSatish Balay PetscFunctionBegin; 708b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 70949b5e25fSSatish Balay if (yy != xx) { 710b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 71149b5e25fSSatish Balay } else { 71249b5e25fSSatish Balay y = x; 71349b5e25fSSatish Balay } 71449b5e25fSSatish Balay if (zz != yy) { 715a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 716b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 717a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 71849b5e25fSSatish Balay } else { 71949b5e25fSSatish Balay z = y; 72049b5e25fSSatish Balay } 72149b5e25fSSatish Balay 72249b5e25fSSatish Balay v = a->a; 72349b5e25fSSatish Balay xb = x; 72449b5e25fSSatish Balay 72549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 72649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 72749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 72849b5e25fSSatish Balay ib = aj + *ai; 729831a3094SHong Zhang jmin = 0; 7307fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 73149b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 73249b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 733831a3094SHong Zhang v += 4; jmin++; 7347fbae186SHong Zhang } 735831a3094SHong Zhang for (j=jmin; j<n; j++) { 73649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 73749b5e25fSSatish Balay cval = ib[j]*2; 73849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 73949b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 74049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 74149b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 74249b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 74349b5e25fSSatish Balay v += 4; 74449b5e25fSSatish Balay } 74549b5e25fSSatish Balay xb +=2; ai++; 74649b5e25fSSatish Balay } 74749b5e25fSSatish Balay 748b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 749b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 750b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 75149b5e25fSSatish Balay 7526c6c5352SBarry Smith PetscLogFlops(4*(a->nz*2 - A->m)); 75349b5e25fSSatish Balay PetscFunctionReturn(0); 75449b5e25fSSatish Balay } 75549b5e25fSSatish Balay 7564a2ae208SSatish Balay #undef __FUNCT__ 7574a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 75849b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 75949b5e25fSSatish Balay { 76049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 76187828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3; 76249b5e25fSSatish Balay MatScalar *v; 763831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 76449b5e25fSSatish Balay 76549b5e25fSSatish Balay PetscFunctionBegin; 766b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 76749b5e25fSSatish Balay if (yy != xx) { 768b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 76949b5e25fSSatish Balay } else { 77049b5e25fSSatish Balay y = x; 77149b5e25fSSatish Balay } 77249b5e25fSSatish Balay if (zz != yy) { 773a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 774b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 775a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 77649b5e25fSSatish Balay } else { 77749b5e25fSSatish Balay z = y; 77849b5e25fSSatish Balay } 77949b5e25fSSatish Balay 78049b5e25fSSatish Balay v = a->a; 78149b5e25fSSatish Balay xb = x; 78249b5e25fSSatish Balay 78349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 78449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 78549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 78649b5e25fSSatish Balay ib = aj + *ai; 787831a3094SHong Zhang jmin = 0; 7887fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 78949b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 79049b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 79149b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 792831a3094SHong Zhang v += 9; jmin++; 7937fbae186SHong Zhang } 794831a3094SHong Zhang for (j=jmin; j<n; j++) { 79549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 79649b5e25fSSatish Balay cval = ib[j]*3; 79749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 79849b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 79949b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 80049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 80149b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 80249b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 80349b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 80449b5e25fSSatish Balay v += 9; 80549b5e25fSSatish Balay } 80649b5e25fSSatish Balay xb +=3; ai++; 80749b5e25fSSatish Balay } 80849b5e25fSSatish Balay 809b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 810b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 811b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 81249b5e25fSSatish Balay 8136c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m)); 81449b5e25fSSatish Balay PetscFunctionReturn(0); 81549b5e25fSSatish Balay } 81649b5e25fSSatish Balay 8174a2ae208SSatish Balay #undef __FUNCT__ 8184a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 81949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 82049b5e25fSSatish Balay { 82149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 82287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4; 82349b5e25fSSatish Balay MatScalar *v; 824831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 82549b5e25fSSatish Balay 82649b5e25fSSatish Balay PetscFunctionBegin; 827b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 82849b5e25fSSatish Balay if (yy != xx) { 829b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 83049b5e25fSSatish Balay } else { 83149b5e25fSSatish Balay y = x; 83249b5e25fSSatish Balay } 83349b5e25fSSatish Balay if (zz != yy) { 834a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 835b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 836a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 83749b5e25fSSatish Balay } else { 83849b5e25fSSatish Balay z = y; 83949b5e25fSSatish Balay } 84049b5e25fSSatish Balay 84149b5e25fSSatish Balay v = a->a; 84249b5e25fSSatish Balay xb = x; 84349b5e25fSSatish Balay 84449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 84549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 84649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 84749b5e25fSSatish Balay ib = aj + *ai; 848831a3094SHong Zhang jmin = 0; 8497fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 85049b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 85149b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 85249b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 85349b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 854831a3094SHong Zhang v += 16; jmin++; 8557fbae186SHong Zhang } 856831a3094SHong Zhang for (j=jmin; j<n; j++) { 85749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 85849b5e25fSSatish Balay cval = ib[j]*4; 85949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 86049b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 86149b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 86249b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 86349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 86449b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 86549b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 86649b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 86749b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 86849b5e25fSSatish Balay v += 16; 86949b5e25fSSatish Balay } 87049b5e25fSSatish Balay xb +=4; ai++; 87149b5e25fSSatish Balay } 87249b5e25fSSatish Balay 873b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 874b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 875b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 87649b5e25fSSatish Balay 8776c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m)); 87849b5e25fSSatish Balay PetscFunctionReturn(0); 87949b5e25fSSatish Balay } 88049b5e25fSSatish Balay 8814a2ae208SSatish Balay #undef __FUNCT__ 8824a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 88349b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 88449b5e25fSSatish Balay { 88549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 88687828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 88749b5e25fSSatish Balay MatScalar *v; 888831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 88949b5e25fSSatish Balay 89049b5e25fSSatish Balay PetscFunctionBegin; 891b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 89249b5e25fSSatish Balay if (yy != xx) { 893b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 89449b5e25fSSatish Balay } else { 89549b5e25fSSatish Balay y = x; 89649b5e25fSSatish Balay } 89749b5e25fSSatish Balay if (zz != yy) { 898a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 899b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 900a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 90149b5e25fSSatish Balay } else { 90249b5e25fSSatish Balay z = y; 90349b5e25fSSatish Balay } 90449b5e25fSSatish Balay 90549b5e25fSSatish Balay v = a->a; 90649b5e25fSSatish Balay xb = x; 90749b5e25fSSatish Balay 90849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 90949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 91049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 91149b5e25fSSatish Balay ib = aj + *ai; 912831a3094SHong Zhang jmin = 0; 9137fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 91449b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 91549b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 91649b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 91749b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 91849b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 919831a3094SHong Zhang v += 25; jmin++; 9207fbae186SHong Zhang } 921831a3094SHong Zhang for (j=jmin; j<n; j++) { 92249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 92349b5e25fSSatish Balay cval = ib[j]*5; 92449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 92549b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 92649b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 92749b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 92849b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 92949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 93049b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 93149b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 93249b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 93349b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 93449b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 93549b5e25fSSatish Balay v += 25; 93649b5e25fSSatish Balay } 93749b5e25fSSatish Balay xb +=5; ai++; 93849b5e25fSSatish Balay } 93949b5e25fSSatish Balay 940b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 941b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 942b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 94349b5e25fSSatish Balay 9446c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m)); 94549b5e25fSSatish Balay PetscFunctionReturn(0); 94649b5e25fSSatish Balay } 9474a2ae208SSatish Balay #undef __FUNCT__ 9484a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 94949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 95049b5e25fSSatish Balay { 95149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 95349b5e25fSSatish Balay MatScalar *v; 954831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 95549b5e25fSSatish Balay 95649b5e25fSSatish Balay PetscFunctionBegin; 957b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 95849b5e25fSSatish Balay if (yy != xx) { 959b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 96049b5e25fSSatish Balay } else { 96149b5e25fSSatish Balay y = x; 96249b5e25fSSatish Balay } 96349b5e25fSSatish Balay if (zz != yy) { 964a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 965b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 966a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 96749b5e25fSSatish Balay } else { 96849b5e25fSSatish Balay z = y; 96949b5e25fSSatish Balay } 97049b5e25fSSatish Balay 97149b5e25fSSatish Balay v = a->a; 97249b5e25fSSatish Balay xb = x; 97349b5e25fSSatish Balay 97449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 97549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 97649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 97749b5e25fSSatish Balay ib = aj + *ai; 978831a3094SHong Zhang jmin = 0; 9797fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 98049b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 98149b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 98249b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 98349b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 98449b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 98549b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 986831a3094SHong Zhang v += 36; jmin++; 9877fbae186SHong Zhang } 988831a3094SHong Zhang for (j=jmin; j<n; j++) { 98949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 99049b5e25fSSatish Balay cval = ib[j]*6; 99149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 99249b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 99349b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 99449b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 99549b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 99649b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 99749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 99849b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 99949b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 100049b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 100149b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 100249b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 100349b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 100449b5e25fSSatish Balay v += 36; 100549b5e25fSSatish Balay } 100649b5e25fSSatish Balay xb +=6; ai++; 100749b5e25fSSatish Balay } 100849b5e25fSSatish Balay 1009b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1010b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1011b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 101249b5e25fSSatish Balay 10136c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m)); 101449b5e25fSSatish Balay PetscFunctionReturn(0); 101549b5e25fSSatish Balay } 101649b5e25fSSatish Balay 10174a2ae208SSatish Balay #undef __FUNCT__ 10184a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 101949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 102049b5e25fSSatish Balay { 102149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 102287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 102349b5e25fSSatish Balay MatScalar *v; 1024831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 102549b5e25fSSatish Balay 102649b5e25fSSatish Balay PetscFunctionBegin; 1027b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 102849b5e25fSSatish Balay if (yy != xx) { 1029b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 103049b5e25fSSatish Balay } else { 103149b5e25fSSatish Balay y = x; 103249b5e25fSSatish Balay } 103349b5e25fSSatish Balay if (zz != yy) { 1034a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1035b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 1036a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 103749b5e25fSSatish Balay } else { 103849b5e25fSSatish Balay z = y; 103949b5e25fSSatish Balay } 104049b5e25fSSatish Balay 104149b5e25fSSatish Balay v = a->a; 104249b5e25fSSatish Balay xb = x; 104349b5e25fSSatish Balay 104449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 104549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 104649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 104749b5e25fSSatish Balay ib = aj + *ai; 1048831a3094SHong Zhang jmin = 0; 10497fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 105049b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 105149b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 105249b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 105349b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 105449b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 105549b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 105649b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1057831a3094SHong Zhang v += 49; jmin++; 10587fbae186SHong Zhang } 1059831a3094SHong Zhang for (j=jmin; j<n; j++) { 106049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 106149b5e25fSSatish Balay cval = ib[j]*7; 106249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 106349b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 106449b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 106549b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 106649b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 106749b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 106849b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 106949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 107049b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 107149b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 107249b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 107349b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 107449b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 107549b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 107649b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 107749b5e25fSSatish Balay v += 49; 107849b5e25fSSatish Balay } 107949b5e25fSSatish Balay xb +=7; ai++; 108049b5e25fSSatish Balay } 108149b5e25fSSatish Balay 1082b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1083b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1084b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 108549b5e25fSSatish Balay 10866c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m)); 108749b5e25fSSatish Balay PetscFunctionReturn(0); 108849b5e25fSSatish Balay } 108949b5e25fSSatish Balay 10904a2ae208SSatish Balay #undef __FUNCT__ 10914a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 109249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 109349b5e25fSSatish Balay { 109449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 109587828ca2SBarry Smith PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1096066653e3SSatish Balay MatScalar *v; 1097066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1098066653e3SSatish Balay int ncols,k; 109949b5e25fSSatish Balay 110049b5e25fSSatish Balay PetscFunctionBegin; 1101b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x; 110249b5e25fSSatish Balay if (yy != xx) { 1103b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 110449b5e25fSSatish Balay } else { 110549b5e25fSSatish Balay y = x; 110649b5e25fSSatish Balay } 110749b5e25fSSatish Balay if (zz != yy) { 1108a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1109b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z; 1110a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 111149b5e25fSSatish Balay } else { 111249b5e25fSSatish Balay z = y; 111349b5e25fSSatish Balay } 111449b5e25fSSatish Balay 111549b5e25fSSatish Balay aj = a->j; 111649b5e25fSSatish Balay v = a->a; 111749b5e25fSSatish Balay ii = a->i; 111849b5e25fSSatish Balay 111949b5e25fSSatish Balay if (!a->mult_work) { 112087828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 112149b5e25fSSatish Balay } 112249b5e25fSSatish Balay work = a->mult_work; 112349b5e25fSSatish Balay 112449b5e25fSSatish Balay 112549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 112649b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 112749b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 112849b5e25fSSatish Balay 112949b5e25fSSatish Balay /* upper triangular part */ 113049b5e25fSSatish Balay for (j=0; j<n; j++) { 113149b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 113249b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 113349b5e25fSSatish Balay workt += bs; 113449b5e25fSSatish Balay } 113549b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 113649b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 113749b5e25fSSatish Balay 113849b5e25fSSatish Balay /* strict lower triangular part */ 1139831a3094SHong Zhang idx = aj+ii[0]; 1140831a3094SHong Zhang if (*idx == i){ 114196b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1142831a3094SHong Zhang } 114349b5e25fSSatish Balay if (ncols > 0){ 114449b5e25fSSatish Balay workt = work; 114587828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1146831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1147831a3094SHong Zhang for (j=0; j<n; j++) { 1148831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 1149831a3094SHong Zhang /* idx++; */ 115049b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 115149b5e25fSSatish Balay workt += bs; 115249b5e25fSSatish Balay } 115349b5e25fSSatish Balay } 115449b5e25fSSatish Balay 115549b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 115649b5e25fSSatish Balay } 115749b5e25fSSatish Balay 1158b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1159b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1160b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 116149b5e25fSSatish Balay 11626c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 116349b5e25fSSatish Balay PetscFunctionReturn(0); 116449b5e25fSSatish Balay } 116549b5e25fSSatish Balay 11664a2ae208SSatish Balay #undef __FUNCT__ 11674a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqSBAIJ" 116849b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 116949b5e25fSSatish Balay { 1170eeeff2ecSHong Zhang int ierr; 1171eeeff2ecSHong Zhang 117249b5e25fSSatish Balay PetscFunctionBegin; 1173eeeff2ecSHong Zhang ierr = MatMult(A,xx,zz);CHKERRQ(ierr); 1174eeeff2ecSHong Zhang PetscFunctionReturn(0); 117549b5e25fSSatish Balay } 117649b5e25fSSatish Balay 11774a2ae208SSatish Balay #undef __FUNCT__ 11784a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ" 117949b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 118049b5e25fSSatish Balay 118149b5e25fSSatish Balay { 1182eeeff2ecSHong Zhang int ierr; 1183eeeff2ecSHong Zhang 118449b5e25fSSatish Balay PetscFunctionBegin; 1185eeeff2ecSHong Zhang ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1186eeeff2ecSHong Zhang PetscFunctionReturn(0); 118749b5e25fSSatish Balay } 118849b5e25fSSatish Balay 11894a2ae208SSatish Balay #undef __FUNCT__ 11904a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1191268466fbSBarry Smith int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA) 119249b5e25fSSatish Balay { 119349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 11946c6c5352SBarry Smith int one = 1,totalnz = a->bs2*a->nz; 119549b5e25fSSatish Balay 119649b5e25fSSatish Balay PetscFunctionBegin; 1197268466fbSBarry Smith BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one); 1198b0a32e0cSBarry Smith PetscLogFlops(totalnz); 119949b5e25fSSatish Balay PetscFunctionReturn(0); 120049b5e25fSSatish Balay } 120149b5e25fSSatish Balay 12024a2ae208SSatish Balay #undef __FUNCT__ 12034a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 120449b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 120549b5e25fSSatish Balay { 120649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 120749b5e25fSSatish Balay MatScalar *v = a->a; 120849b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1209831a3094SHong Zhang int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 1210831a3094SHong Zhang int *jl,*il,jmin,jmax,ierr,nexti,ik,*col; 121149b5e25fSSatish Balay 121249b5e25fSSatish Balay PetscFunctionBegin; 121349b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 121449b5e25fSSatish Balay for (k=0; k<mbs; k++){ 121549b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1216831a3094SHong Zhang col = aj + jmin; 1217831a3094SHong Zhang if (*col == k){ /* diagonal block */ 121849b5e25fSSatish Balay for (i=0; i<bs2; i++){ 121949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 122049b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 122149b5e25fSSatish Balay #else 122249b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 122349b5e25fSSatish Balay #endif 122449b5e25fSSatish Balay } 1225831a3094SHong Zhang jmin++; 1226831a3094SHong Zhang } 1227831a3094SHong Zhang for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 122849b5e25fSSatish Balay for (i=0; i<bs2; i++){ 122949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 123049b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 123149b5e25fSSatish Balay #else 123249b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 123349b5e25fSSatish Balay #endif 123449b5e25fSSatish Balay } 123549b5e25fSSatish Balay } 123649b5e25fSSatish Balay } 123749b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 123849b5e25fSSatish Balay 123949b5e25fSSatish Balay } else if (type == NORM_INFINITY) { /* maximum row sum */ 124082502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr); 124182502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 124282502324SSatish Balay ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr); 124349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 124449b5e25fSSatish Balay jl[i] = mbs; il[0] = 0; 124549b5e25fSSatish Balay } 124649b5e25fSSatish Balay 124749b5e25fSSatish Balay *norm = 0.0; 124849b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 124949b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 125049b5e25fSSatish Balay 125149b5e25fSSatish Balay /*-- col sum --*/ 125249b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1253831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1254831a3094SHong Zhang at step k */ 125549b5e25fSSatish Balay while (i<mbs){ 125649b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 125749b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 125849b5e25fSSatish Balay for (j=0; j<bs; j++){ 125949b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 126049b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 126149b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 126249b5e25fSSatish Balay } 126349b5e25fSSatish Balay } 126449b5e25fSSatish Balay /* update il, jl */ 1265831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1266831a3094SHong Zhang jmax = a->i[i+1]; 126749b5e25fSSatish Balay if (jmin < jmax){ 126849b5e25fSSatish Balay il[i] = jmin; 126949b5e25fSSatish Balay j = a->j[jmin]; 127049b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 127149b5e25fSSatish Balay } 127249b5e25fSSatish Balay i = nexti; 127349b5e25fSSatish Balay } 127449b5e25fSSatish Balay 127549b5e25fSSatish Balay /*-- row sum --*/ 127649b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 127749b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 127849b5e25fSSatish Balay for (j=0; j<bs; j++){ 127949b5e25fSSatish Balay v = a->a + i*bs2 + j; 128049b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 128149b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); 128249b5e25fSSatish Balay v += bs; 128349b5e25fSSatish Balay } 128449b5e25fSSatish Balay } 128549b5e25fSSatish Balay } 128649b5e25fSSatish Balay /* add k_th block row to il, jl */ 1287831a3094SHong Zhang col = aj+jmin; 1288831a3094SHong Zhang if (*col == k) jmin++; 128949b5e25fSSatish Balay if (jmin < jmax){ 129049b5e25fSSatish Balay il[k] = jmin; 129149b5e25fSSatish Balay j = a->j[jmin]; 129249b5e25fSSatish Balay jl[k] = jl[j]; jl[j] = k; 129349b5e25fSSatish Balay } 129449b5e25fSSatish Balay for (j=0; j<bs; j++){ 129549b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 129649b5e25fSSatish Balay } 129749b5e25fSSatish Balay } 129849b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 129949b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 130049b5e25fSSatish Balay ierr = PetscFree(sum);CHKERRQ(ierr); 130149b5e25fSSatish Balay } else { 1302347d480fSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 130349b5e25fSSatish Balay } 130449b5e25fSSatish Balay PetscFunctionReturn(0); 130549b5e25fSSatish Balay } 130649b5e25fSSatish Balay 13074a2ae208SSatish Balay #undef __FUNCT__ 13084a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 130949b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 131049b5e25fSSatish Balay { 131149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 131249b5e25fSSatish Balay int ierr; 131349b5e25fSSatish Balay 131449b5e25fSSatish Balay PetscFunctionBegin; 131549b5e25fSSatish Balay 131649b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 13176c6c5352SBarry Smith if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) { 1318ef511fbeSHong Zhang *flg = PETSC_FALSE; 1319ef511fbeSHong Zhang PetscFunctionReturn(0); 132049b5e25fSSatish Balay } 132149b5e25fSSatish Balay 132249b5e25fSSatish Balay /* if the a->i are the same */ 132349b5e25fSSatish Balay ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 132449b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 132549b5e25fSSatish Balay PetscFunctionReturn(0); 132649b5e25fSSatish Balay } 132749b5e25fSSatish Balay 132849b5e25fSSatish Balay /* if a->j are the same */ 13296c6c5352SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr); 133049b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 133149b5e25fSSatish Balay PetscFunctionReturn(0); 133249b5e25fSSatish Balay } 133349b5e25fSSatish Balay /* if a->a are the same */ 13346c6c5352SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 133549b5e25fSSatish Balay 1336935af2e7SHong Zhang PetscFunctionReturn(0); 133749b5e25fSSatish Balay } 133849b5e25fSSatish Balay 13394a2ae208SSatish Balay #undef __FUNCT__ 13404a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 134149b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 134249b5e25fSSatish Balay { 134349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 134449b5e25fSSatish Balay int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 134587828ca2SBarry Smith PetscScalar *x,zero = 0.0; 134649b5e25fSSatish Balay MatScalar *aa,*aa_j; 134749b5e25fSSatish Balay 134849b5e25fSSatish Balay PetscFunctionBegin; 134949b5e25fSSatish Balay bs = a->bs; 135082799104SHong Zhang if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 135182799104SHong Zhang 135249b5e25fSSatish Balay aa = a->a; 135349b5e25fSSatish Balay ai = a->i; 135449b5e25fSSatish Balay aj = a->j; 135549b5e25fSSatish Balay ambs = a->mbs; 135649b5e25fSSatish Balay bs2 = a->bs2; 135749b5e25fSSatish Balay 135849b5e25fSSatish Balay ierr = VecSet(&zero,v);CHKERRQ(ierr); 1359b1d4fb26SBarry Smith ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 136049b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1361ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 136249b5e25fSSatish Balay for (i=0; i<ambs; i++) { 136349b5e25fSSatish Balay j=ai[i]; 136449b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 136549b5e25fSSatish Balay row = i*bs; 136649b5e25fSSatish Balay aa_j = aa + j*bs2; 136782799104SHong Zhang if (A->factor && bs==1){ 136882799104SHong Zhang for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k]; 136982799104SHong Zhang } else { 137049b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 137149b5e25fSSatish Balay } 137249b5e25fSSatish Balay } 137382799104SHong Zhang } 137482799104SHong Zhang 1375b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 137649b5e25fSSatish Balay PetscFunctionReturn(0); 137749b5e25fSSatish Balay } 137849b5e25fSSatish Balay 13794a2ae208SSatish Balay #undef __FUNCT__ 13804a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 138149b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 138249b5e25fSSatish Balay { 138349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 138487828ca2SBarry Smith PetscScalar *l,*r,x,*li,*ri; 138549b5e25fSSatish Balay MatScalar *aa,*v; 1386066653e3SSatish Balay int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 138749b5e25fSSatish Balay 138849b5e25fSSatish Balay PetscFunctionBegin; 138949b5e25fSSatish Balay ai = a->i; 139049b5e25fSSatish Balay aj = a->j; 139149b5e25fSSatish Balay aa = a->a; 1392ef511fbeSHong Zhang m = A->m; 139349b5e25fSSatish Balay bs = a->bs; 139449b5e25fSSatish Balay mbs = a->mbs; 139549b5e25fSSatish Balay bs2 = a->bs2; 139649b5e25fSSatish Balay 139749b5e25fSSatish Balay if (ll != rr) { 1398347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 139949b5e25fSSatish Balay } 140049b5e25fSSatish Balay if (ll) { 1401b1d4fb26SBarry Smith ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr); 140249b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1403347d480fSBarry Smith if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 140449b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 140549b5e25fSSatish Balay M = ai[i+1] - ai[i]; 140649b5e25fSSatish Balay li = l + i*bs; 140749b5e25fSSatish Balay v = aa + bs2*ai[i]; 140849b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 140949b5e25fSSatish Balay for (k=0; k<bs2; k++) { 141049b5e25fSSatish Balay (*v++) *= li[k%bs]; 141149b5e25fSSatish Balay } 141249b5e25fSSatish Balay #ifdef CONT 141349b5e25fSSatish Balay /* will be used to replace the above loop */ 141449b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 141549b5e25fSSatish Balay for (k=0; k<bs; k++) { /* column value */ 141649b5e25fSSatish Balay x = ri[k]; 141749b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 141849b5e25fSSatish Balay } 141949b5e25fSSatish Balay #endif 142049b5e25fSSatish Balay 142149b5e25fSSatish Balay } 142249b5e25fSSatish Balay } 1423b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr); 14246c6c5352SBarry Smith PetscLogFlops(2*a->nz); 142549b5e25fSSatish Balay } 142649b5e25fSSatish Balay /* will be deleted */ 142749b5e25fSSatish Balay if (rr) { 1428b1d4fb26SBarry Smith ierr = VecGetArrayFast(rr,&r);CHKERRQ(ierr); 142949b5e25fSSatish Balay ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1430347d480fSBarry Smith if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 143149b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 143249b5e25fSSatish Balay M = ai[i+1] - ai[i]; 143349b5e25fSSatish Balay v = aa + bs2*ai[i]; 143449b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 143549b5e25fSSatish Balay ri = r + bs*aj[ai[i]+j]; 143649b5e25fSSatish Balay for (k=0; k<bs; k++) { 143749b5e25fSSatish Balay x = ri[k]; 143849b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 143949b5e25fSSatish Balay } 144049b5e25fSSatish Balay } 144149b5e25fSSatish Balay } 1442b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(rr,&r);CHKERRQ(ierr); 14436c6c5352SBarry Smith PetscLogFlops(a->nz); 144449b5e25fSSatish Balay } 144549b5e25fSSatish Balay PetscFunctionReturn(0); 144649b5e25fSSatish Balay } 144749b5e25fSSatish Balay 14484a2ae208SSatish Balay #undef __FUNCT__ 14494a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 145049b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 145149b5e25fSSatish Balay { 145249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 145349b5e25fSSatish Balay 145449b5e25fSSatish Balay PetscFunctionBegin; 1455ef511fbeSHong Zhang info->rows_global = (double)A->m; 1456ef511fbeSHong Zhang info->columns_global = (double)A->m; 1457ef511fbeSHong Zhang info->rows_local = (double)A->m; 1458ef511fbeSHong Zhang info->columns_local = (double)A->m; 145949b5e25fSSatish Balay info->block_size = a->bs2; 14606c6c5352SBarry Smith info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */ 14616c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 146249b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 146349b5e25fSSatish Balay info->assemblies = A->num_ass; 146449b5e25fSSatish Balay info->mallocs = a->reallocs; 146549b5e25fSSatish Balay info->memory = A->mem; 146649b5e25fSSatish Balay if (A->factor) { 146749b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 146849b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 146949b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 147049b5e25fSSatish Balay } else { 147149b5e25fSSatish Balay info->fill_ratio_given = 0; 147249b5e25fSSatish Balay info->fill_ratio_needed = 0; 147349b5e25fSSatish Balay info->factor_mallocs = 0; 147449b5e25fSSatish Balay } 147549b5e25fSSatish Balay PetscFunctionReturn(0); 147649b5e25fSSatish Balay } 147749b5e25fSSatish Balay 147849b5e25fSSatish Balay 14794a2ae208SSatish Balay #undef __FUNCT__ 14804a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 148149b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A) 148249b5e25fSSatish Balay { 148349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148449b5e25fSSatish Balay int ierr; 148549b5e25fSSatish Balay 148649b5e25fSSatish Balay PetscFunctionBegin; 148749b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 148849b5e25fSSatish Balay PetscFunctionReturn(0); 148949b5e25fSSatish Balay } 1490dc354874SHong Zhang 14914a2ae208SSatish Balay #undef __FUNCT__ 14924a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ" 1493dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1494dc354874SHong Zhang { 1495dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1496d0f6400bSHong Zhang int ierr,i,j,n,row,col,bs,*ai,*aj,mbs; 1497c3fca9a7SHong Zhang PetscReal atmp; 1498273d9f13SBarry Smith MatScalar *aa; 149987828ca2SBarry Smith PetscScalar zero = 0.0,*x; 1500d0f6400bSHong Zhang int ncols,brow,bcol,krow,kcol; 1501dc354874SHong Zhang 1502dc354874SHong Zhang PetscFunctionBegin; 1503dc354874SHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1504dc354874SHong Zhang bs = a->bs; 1505dc354874SHong Zhang aa = a->a; 1506dc354874SHong Zhang ai = a->i; 1507dc354874SHong Zhang aj = a->j; 150844117c81SHong Zhang mbs = a->mbs; 1509dc354874SHong Zhang 1510dc354874SHong Zhang ierr = VecSet(&zero,v);CHKERRQ(ierr); 1511b1d4fb26SBarry Smith ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 1512dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1513ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 151444117c81SHong Zhang for (i=0; i<mbs; i++) { 1515d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1516d0f6400bSHong Zhang brow = bs*i; 151744117c81SHong Zhang for (j=0; j<ncols; j++){ 1518d0f6400bSHong Zhang bcol = bs*(*aj); 151944117c81SHong Zhang for (kcol=0; kcol<bs; kcol++){ 1520d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 152144117c81SHong Zhang for (krow=0; krow<bs; krow++){ 1522d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1523d0f6400bSHong Zhang row = brow + krow; /* row index */ 152444117c81SHong Zhang /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 1525c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1526c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 152744117c81SHong Zhang } 152844117c81SHong Zhang } 1529d0f6400bSHong Zhang aj++; 1530dc354874SHong Zhang } 1531dc354874SHong Zhang } 1532b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 1533dc354874SHong Zhang PetscFunctionReturn(0); 1534dc354874SHong Zhang } 1535