173f4d377SMatthew Knepley /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/ 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 449b5e25fSSatish Balay #include "src/inline/spops.h" 549b5e25fSSatish Balay #include "src/inline/ilu.h" 649b5e25fSSatish Balay #include "petscbt.h" 73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 849b5e25fSSatish Balay 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 11268466fbSBarry Smith int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov) 1249b5e25fSSatish Balay { 1349b5e25fSSatish Balay PetscFunctionBegin; 140eaf17bfSBarry Smith SETERRQ(1,"Function not yet written for SBAIJ format"); 155535bfb1SHong Zhang /* PetscFunctionReturn(0); */ 1649b5e25fSSatish Balay } 1749b5e25fSSatish Balay 184a2ae208SSatish Balay #undef __FUNCT__ 194a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 2049b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 2149b5e25fSSatish Balay { 2249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 2349b5e25fSSatish Balay int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 2449b5e25fSSatish Balay int row,mat_i,*mat_j,tcol,*mat_ilen; 2549b5e25fSSatish Balay int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 2649b5e25fSSatish Balay int *aj = a->j,*ai = a->i; 2749b5e25fSSatish Balay MatScalar *mat_a; 2849b5e25fSSatish Balay Mat C; 2949b5e25fSSatish Balay PetscTruth flag; 3049b5e25fSSatish Balay 3149b5e25fSSatish Balay PetscFunctionBegin; 3249b5e25fSSatish Balay 33ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 3449b5e25fSSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 35347d480fSBarry Smith if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 3649b5e25fSSatish Balay 3749b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 3849b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 3949b5e25fSSatish Balay 40b0a32e0cSBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr); 4149b5e25fSSatish Balay ssmap = smap; 42b0a32e0cSBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr); 4349b5e25fSSatish Balay ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 4449b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 4549b5e25fSSatish Balay /* determine lens of each row */ 4649b5e25fSSatish Balay for (i=0; i<nrows; i++) { 4749b5e25fSSatish Balay kstart = ai[irow[i]]; 4849b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 4949b5e25fSSatish Balay lens[i] = 0; 5049b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 5149b5e25fSSatish Balay if (ssmap[aj[k]]) { 5249b5e25fSSatish Balay lens[i]++; 5349b5e25fSSatish Balay } 5449b5e25fSSatish Balay } 5549b5e25fSSatish Balay } 5649b5e25fSSatish Balay /* Create and fill new matrix */ 5749b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 5849b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 5949b5e25fSSatish Balay 60347d480fSBarry Smith if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 6149b5e25fSSatish Balay ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 6249b5e25fSSatish Balay if (flag == PETSC_FALSE) { 63347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 6449b5e25fSSatish Balay } 6549b5e25fSSatish Balay ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 6649b5e25fSSatish Balay C = *B; 6749b5e25fSSatish Balay } else { 68*e2d9671bSKris Buschelman ierr = MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr); 69*e2d9671bSKris Buschelman ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 70*e2d9671bSKris Buschelman ierr = MatSeqSBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr); 7149b5e25fSSatish Balay } 7249b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 7349b5e25fSSatish Balay for (i=0; i<nrows; i++) { 7449b5e25fSSatish Balay row = irow[i]; 7549b5e25fSSatish Balay kstart = ai[row]; 7649b5e25fSSatish Balay kend = kstart + a->ilen[row]; 7749b5e25fSSatish Balay mat_i = c->i[i]; 7849b5e25fSSatish Balay mat_j = c->j + mat_i; 7949b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 8049b5e25fSSatish Balay mat_ilen = c->ilen + i; 8149b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 8249b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 8349b5e25fSSatish Balay *mat_j++ = tcol - 1; 8449b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 8549b5e25fSSatish Balay mat_a += bs2; 8649b5e25fSSatish Balay (*mat_ilen)++; 8749b5e25fSSatish Balay } 8849b5e25fSSatish Balay } 8949b5e25fSSatish Balay } 9049b5e25fSSatish Balay 9149b5e25fSSatish Balay /* Free work space */ 9249b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 9349b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 9449b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9549b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9649b5e25fSSatish Balay 9749b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 9849b5e25fSSatish Balay *B = C; 9949b5e25fSSatish Balay PetscFunctionReturn(0); 10049b5e25fSSatish Balay } 10149b5e25fSSatish Balay 1024a2ae208SSatish Balay #undef __FUNCT__ 1034a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 10449b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 10549b5e25fSSatish Balay { 10649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 10749b5e25fSSatish Balay IS is1; 10849b5e25fSSatish Balay int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 10949b5e25fSSatish Balay 11049b5e25fSSatish Balay PetscFunctionBegin; 111ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 11249b5e25fSSatish Balay 11349b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 11449b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 11549b5e25fSSatish Balay 11649b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 11749b5e25fSSatish Balay and form the IS with compressed IS */ 11882502324SSatish Balay ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr); 11949b5e25fSSatish Balay iary = vary + a->mbs; 12049b5e25fSSatish Balay ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 12149b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 12249b5e25fSSatish Balay 12349b5e25fSSatish Balay count = 0; 12449b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 125ac355199SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks"); 12649b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 12749b5e25fSSatish Balay } 12849b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 12949b5e25fSSatish Balay 13049b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 13149b5e25fSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 13249b5e25fSSatish Balay 13349b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 13449b5e25fSSatish Balay ISDestroy(is1); 13549b5e25fSSatish Balay PetscFunctionReturn(0); 13649b5e25fSSatish Balay } 13749b5e25fSSatish Balay 1384a2ae208SSatish Balay #undef __FUNCT__ 1394a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 140268466fbSBarry Smith int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 14149b5e25fSSatish Balay { 14249b5e25fSSatish Balay int ierr,i; 14349b5e25fSSatish Balay 14449b5e25fSSatish Balay PetscFunctionBegin; 14549b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 14682502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 14749b5e25fSSatish Balay } 14849b5e25fSSatish Balay 14949b5e25fSSatish Balay for (i=0; i<n; i++) { 15049b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 15149b5e25fSSatish Balay } 15249b5e25fSSatish Balay PetscFunctionReturn(0); 15349b5e25fSSatish Balay } 15449b5e25fSSatish Balay 15549b5e25fSSatish Balay /* -------------------------------------------------------*/ 15649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 15749b5e25fSSatish Balay /* -------------------------------------------------------*/ 158d9eff348SSatish Balay #include "petscblaslapack.h" 15949b5e25fSSatish Balay 1604a2ae208SSatish Balay #undef __FUNCT__ 1614a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1" 16249b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 16349b5e25fSSatish Balay { 16449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 16587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,zero=0.0; 16649b5e25fSSatish Balay MatScalar *v; 167831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 16849b5e25fSSatish Balay 16949b5e25fSSatish Balay PetscFunctionBegin; 17049b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 171b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 172b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 17349b5e25fSSatish Balay 17449b5e25fSSatish Balay v = a->a; 17549b5e25fSSatish Balay xb = x; 17649b5e25fSSatish Balay 17749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 17849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 17949b5e25fSSatish Balay x1 = xb[0]; 18049b5e25fSSatish Balay ib = aj + *ai; 181831a3094SHong Zhang jmin = 0; 182831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 183831a3094SHong Zhang z[i] += *v++ * x[*ib++]; 184831a3094SHong Zhang jmin++; 185831a3094SHong Zhang } 186831a3094SHong Zhang for (j=jmin; j<n; j++) { 18749b5e25fSSatish Balay cval = *ib; 18849b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 18949b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 19049b5e25fSSatish Balay } 19149b5e25fSSatish Balay xb++; ai++; 19249b5e25fSSatish Balay } 19349b5e25fSSatish Balay 194b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 195b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 1966c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m) - A->m); /* nz = (nz+m)/2 */ 19749b5e25fSSatish Balay PetscFunctionReturn(0); 19849b5e25fSSatish Balay } 19949b5e25fSSatish Balay 2004a2ae208SSatish Balay #undef __FUNCT__ 2014a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 20249b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 20349b5e25fSSatish Balay { 20449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 20649b5e25fSSatish Balay MatScalar *v; 207831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 20849b5e25fSSatish Balay 20949b5e25fSSatish Balay 21049b5e25fSSatish Balay PetscFunctionBegin; 21149b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 212b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 213b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 21449b5e25fSSatish Balay 21549b5e25fSSatish Balay v = a->a; 21649b5e25fSSatish Balay xb = x; 21749b5e25fSSatish Balay 21849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 21949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 22049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 22149b5e25fSSatish Balay ib = aj + *ai; 222831a3094SHong Zhang jmin = 0; 2237fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 22449b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 22549b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 226831a3094SHong Zhang v += 4; jmin++; 2277fbae186SHong Zhang } 228831a3094SHong Zhang for (j=jmin; j<n; j++) { 22949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 23049b5e25fSSatish Balay cval = ib[j]*2; 23149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 23249b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 23349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 23449b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 23549b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 23649b5e25fSSatish Balay v += 4; 23749b5e25fSSatish Balay } 23849b5e25fSSatish Balay xb +=2; ai++; 23949b5e25fSSatish Balay } 24049b5e25fSSatish Balay 241b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 242b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 2436c6c5352SBarry Smith PetscLogFlops(8*(a->nz*2 - A->m) - A->m); 24449b5e25fSSatish Balay PetscFunctionReturn(0); 24549b5e25fSSatish Balay } 24649b5e25fSSatish Balay 2474a2ae208SSatish Balay #undef __FUNCT__ 2484a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 24949b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 25049b5e25fSSatish Balay { 25149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 25287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 25349b5e25fSSatish Balay MatScalar *v; 254831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 25549b5e25fSSatish Balay 25649b5e25fSSatish Balay 25749b5e25fSSatish Balay PetscFunctionBegin; 25849b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 259b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 260b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 26149b5e25fSSatish Balay 26249b5e25fSSatish Balay v = a->a; 26349b5e25fSSatish Balay xb = x; 26449b5e25fSSatish Balay 26549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 26649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 26749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 26849b5e25fSSatish Balay ib = aj + *ai; 269831a3094SHong Zhang jmin = 0; 2707fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 27149b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 27249b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 27349b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 274831a3094SHong Zhang v += 9; jmin++; 2757fbae186SHong Zhang } 276831a3094SHong Zhang for (j=jmin; j<n; j++) { 27749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 27849b5e25fSSatish Balay cval = ib[j]*3; 27949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 28049b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 28149b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 28249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 28349b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 28449b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 28549b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 28649b5e25fSSatish Balay v += 9; 28749b5e25fSSatish Balay } 28849b5e25fSSatish Balay xb +=3; ai++; 28949b5e25fSSatish Balay } 29049b5e25fSSatish Balay 291b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 292b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 2936c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m) - A->m); 29449b5e25fSSatish Balay PetscFunctionReturn(0); 29549b5e25fSSatish Balay } 29649b5e25fSSatish Balay 2974a2ae208SSatish Balay #undef __FUNCT__ 2984a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 29949b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 30049b5e25fSSatish Balay { 30149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 30287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 30349b5e25fSSatish Balay MatScalar *v; 304831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 30549b5e25fSSatish Balay 30649b5e25fSSatish Balay PetscFunctionBegin; 30749b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 308b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 309b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 31049b5e25fSSatish Balay 31149b5e25fSSatish Balay v = a->a; 31249b5e25fSSatish Balay xb = x; 31349b5e25fSSatish Balay 31449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 31549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 31649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 31749b5e25fSSatish Balay ib = aj + *ai; 318831a3094SHong Zhang jmin = 0; 3197fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 32049b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 32149b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 32249b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 32349b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 324831a3094SHong Zhang v += 16; jmin++; 3257fbae186SHong Zhang } 326831a3094SHong Zhang for (j=jmin; j<n; j++) { 32749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 32849b5e25fSSatish Balay cval = ib[j]*4; 32949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 33049b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 33149b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 33249b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 33349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 33449b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 33549b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 33649b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 33749b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 33849b5e25fSSatish Balay v += 16; 33949b5e25fSSatish Balay } 34049b5e25fSSatish Balay xb +=4; ai++; 34149b5e25fSSatish Balay } 34249b5e25fSSatish Balay 343b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 344b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 3456c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m) - A->m); 34649b5e25fSSatish Balay PetscFunctionReturn(0); 34749b5e25fSSatish Balay } 34849b5e25fSSatish Balay 3494a2ae208SSatish Balay #undef __FUNCT__ 3504a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 35149b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 35249b5e25fSSatish Balay { 35349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 35487828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 35549b5e25fSSatish Balay MatScalar *v; 356831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 35749b5e25fSSatish Balay 35849b5e25fSSatish Balay PetscFunctionBegin; 35949b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 360b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 361b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 36249b5e25fSSatish Balay 36349b5e25fSSatish Balay v = a->a; 36449b5e25fSSatish Balay xb = x; 36549b5e25fSSatish Balay 36649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 36749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 36849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 36949b5e25fSSatish Balay ib = aj + *ai; 370831a3094SHong Zhang jmin = 0; 3717fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 37249b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 37349b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 37449b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 37549b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 37649b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 377831a3094SHong Zhang v += 25; jmin++; 3787fbae186SHong Zhang } 379831a3094SHong Zhang for (j=jmin; j<n; j++) { 38049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 38149b5e25fSSatish Balay cval = ib[j]*5; 38249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 38349b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 38449b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 38549b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 38649b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 38749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 38849b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 38949b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 39049b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 39149b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 39249b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 39349b5e25fSSatish Balay v += 25; 39449b5e25fSSatish Balay } 39549b5e25fSSatish Balay xb +=5; ai++; 39649b5e25fSSatish Balay } 39749b5e25fSSatish Balay 398b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 399b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 4006c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m) - A->m); 40149b5e25fSSatish Balay PetscFunctionReturn(0); 40249b5e25fSSatish Balay } 40349b5e25fSSatish Balay 40449b5e25fSSatish Balay 4054a2ae208SSatish Balay #undef __FUNCT__ 4064a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 40749b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 40849b5e25fSSatish Balay { 40949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41087828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 41149b5e25fSSatish Balay MatScalar *v; 412831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 41349b5e25fSSatish Balay 41449b5e25fSSatish Balay PetscFunctionBegin; 41549b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 416b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 417b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 41849b5e25fSSatish Balay 41949b5e25fSSatish Balay v = a->a; 42049b5e25fSSatish Balay xb = x; 42149b5e25fSSatish Balay 42249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 42349b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 42449b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 42549b5e25fSSatish Balay ib = aj + *ai; 426831a3094SHong Zhang jmin = 0; 4277fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 42849b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 42949b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 43049b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 43149b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 43249b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 43349b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 434831a3094SHong Zhang v += 36; jmin++; 4357fbae186SHong Zhang } 436831a3094SHong Zhang for (j=jmin; j<n; j++) { 43749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 43849b5e25fSSatish Balay cval = ib[j]*6; 43949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 44049b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 44149b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 44249b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 44349b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 44449b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 44549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 44649b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 44749b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 44849b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 44949b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 45049b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 45149b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 45249b5e25fSSatish Balay v += 36; 45349b5e25fSSatish Balay } 45449b5e25fSSatish Balay xb +=6; ai++; 45549b5e25fSSatish Balay } 45649b5e25fSSatish Balay 457b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 458b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 4596c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m) - A->m); 46049b5e25fSSatish Balay PetscFunctionReturn(0); 46149b5e25fSSatish Balay } 4624a2ae208SSatish Balay #undef __FUNCT__ 4634a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 46449b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 46549b5e25fSSatish Balay { 46649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 46787828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 46849b5e25fSSatish Balay MatScalar *v; 469831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 47049b5e25fSSatish Balay 47149b5e25fSSatish Balay PetscFunctionBegin; 47249b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 473b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 474b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 47549b5e25fSSatish Balay 47649b5e25fSSatish Balay v = a->a; 47749b5e25fSSatish Balay xb = x; 47849b5e25fSSatish Balay 47949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 48049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 48149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 48249b5e25fSSatish Balay ib = aj + *ai; 483831a3094SHong Zhang jmin = 0; 4847fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 48549b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 48649b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 48749b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 48849b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 48949b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 49049b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 49149b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 492831a3094SHong Zhang v += 49; jmin++; 4937fbae186SHong Zhang } 494831a3094SHong Zhang for (j=jmin; j<n; j++) { 49549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 49649b5e25fSSatish Balay cval = ib[j]*7; 49749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 49849b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 49949b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 50049b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 50149b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 50249b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 50349b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 50449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 50549b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 50649b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 50749b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 50849b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 50949b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 51049b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 51149b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 51249b5e25fSSatish Balay v += 49; 51349b5e25fSSatish Balay } 51449b5e25fSSatish Balay xb +=7; ai++; 51549b5e25fSSatish Balay } 516b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 517b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 5186c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m) - A->m); 51949b5e25fSSatish Balay PetscFunctionReturn(0); 52049b5e25fSSatish Balay } 52149b5e25fSSatish Balay 52249b5e25fSSatish Balay /* 52349b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 52449b5e25fSSatish Balay */ 5254a2ae208SSatish Balay #undef __FUNCT__ 5264a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 52749b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 52849b5e25fSSatish Balay { 52949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 53087828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 5310b60a74dSHong Zhang MatScalar *v; 532066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 5330b60a74dSHong Zhang int ncols,k; 53449b5e25fSSatish Balay 53549b5e25fSSatish Balay PetscFunctionBegin; 53649b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 537b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x; 538b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z; 53949b5e25fSSatish Balay 54049b5e25fSSatish Balay aj = a->j; 54149b5e25fSSatish Balay v = a->a; 54249b5e25fSSatish Balay ii = a->i; 54349b5e25fSSatish Balay 54449b5e25fSSatish Balay if (!a->mult_work) { 54587828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 54649b5e25fSSatish Balay } 54749b5e25fSSatish Balay work = a->mult_work; 54849b5e25fSSatish Balay 54949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 55049b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 55149b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 55249b5e25fSSatish Balay 55349b5e25fSSatish Balay /* upper triangular part */ 55449b5e25fSSatish Balay for (j=0; j<n; j++) { 55549b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 55649b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 55749b5e25fSSatish Balay workt += bs; 55849b5e25fSSatish Balay } 55949b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 56049b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 56149b5e25fSSatish Balay 56249b5e25fSSatish Balay /* strict lower triangular part */ 563831a3094SHong Zhang idx = aj+ii[0]; 564831a3094SHong Zhang if (*idx == i){ 56596b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 566831a3094SHong Zhang } 56796b9376eSHong Zhang 56849b5e25fSSatish Balay if (ncols > 0){ 56949b5e25fSSatish Balay workt = work; 57087828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 571831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 572831a3094SHong Zhang for (j=0; j<n; j++) { 573831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 57449b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 57549b5e25fSSatish Balay workt += bs; 57649b5e25fSSatish Balay } 57749b5e25fSSatish Balay } 57849b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 57949b5e25fSSatish Balay } 58049b5e25fSSatish Balay 581b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 582b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 5836c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m); 58449b5e25fSSatish Balay PetscFunctionReturn(0); 58549b5e25fSSatish Balay } 58649b5e25fSSatish Balay 5874a2ae208SSatish Balay #undef __FUNCT__ 5884a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 58949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 59049b5e25fSSatish Balay { 59149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1; 59349b5e25fSSatish Balay MatScalar *v; 594831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 59549b5e25fSSatish Balay 59649b5e25fSSatish Balay PetscFunctionBegin; 597b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 59849b5e25fSSatish Balay if (yy != xx) { 599b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 60049b5e25fSSatish Balay } else { 60149b5e25fSSatish Balay y = x; 60249b5e25fSSatish Balay } 60349b5e25fSSatish Balay if (zz != yy) { 604a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 605b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 606a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 60749b5e25fSSatish Balay } else { 60849b5e25fSSatish Balay z = y; 60949b5e25fSSatish Balay } 61049b5e25fSSatish Balay 61149b5e25fSSatish Balay v = a->a; 61249b5e25fSSatish Balay xb = x; 61349b5e25fSSatish Balay 61449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 61549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 61649b5e25fSSatish Balay x1 = xb[0]; 61749b5e25fSSatish Balay ib = aj + *ai; 618831a3094SHong Zhang jmin = 0; 619831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 620831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 621831a3094SHong Zhang } 622831a3094SHong Zhang for (j=jmin; j<n; j++) { 62349b5e25fSSatish Balay cval = *ib; 62449b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 62549b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 62649b5e25fSSatish Balay } 62749b5e25fSSatish Balay xb++; ai++; 62849b5e25fSSatish Balay } 62949b5e25fSSatish Balay 630b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 631b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 632b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 63349b5e25fSSatish Balay 6346c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 63549b5e25fSSatish Balay PetscFunctionReturn(0); 63649b5e25fSSatish Balay } 63749b5e25fSSatish Balay 6384a2ae208SSatish Balay #undef __FUNCT__ 6394a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 64049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 64149b5e25fSSatish Balay { 64249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 64387828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2; 64449b5e25fSSatish Balay MatScalar *v; 645831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 64649b5e25fSSatish Balay 64749b5e25fSSatish Balay PetscFunctionBegin; 648b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 64949b5e25fSSatish Balay if (yy != xx) { 650b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 65149b5e25fSSatish Balay } else { 65249b5e25fSSatish Balay y = x; 65349b5e25fSSatish Balay } 65449b5e25fSSatish Balay if (zz != yy) { 655a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 656b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 657a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 65849b5e25fSSatish Balay } else { 65949b5e25fSSatish Balay z = y; 66049b5e25fSSatish Balay } 66149b5e25fSSatish Balay 66249b5e25fSSatish Balay v = a->a; 66349b5e25fSSatish Balay xb = x; 66449b5e25fSSatish Balay 66549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 66649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 66749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 66849b5e25fSSatish Balay ib = aj + *ai; 669831a3094SHong Zhang jmin = 0; 6707fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 67149b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 67249b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 673831a3094SHong Zhang v += 4; jmin++; 6747fbae186SHong Zhang } 675831a3094SHong Zhang for (j=jmin; j<n; j++) { 67649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 67749b5e25fSSatish Balay cval = ib[j]*2; 67849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 67949b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 68049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 68149b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 68249b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 68349b5e25fSSatish Balay v += 4; 68449b5e25fSSatish Balay } 68549b5e25fSSatish Balay xb +=2; ai++; 68649b5e25fSSatish Balay } 68749b5e25fSSatish Balay 688b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 689b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 690b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 69149b5e25fSSatish Balay 6926c6c5352SBarry Smith PetscLogFlops(4*(a->nz*2 - A->m)); 69349b5e25fSSatish Balay PetscFunctionReturn(0); 69449b5e25fSSatish Balay } 69549b5e25fSSatish Balay 6964a2ae208SSatish Balay #undef __FUNCT__ 6974a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 69849b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 69949b5e25fSSatish Balay { 70049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 70187828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3; 70249b5e25fSSatish Balay MatScalar *v; 703831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 70449b5e25fSSatish Balay 70549b5e25fSSatish Balay PetscFunctionBegin; 706b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 70749b5e25fSSatish Balay if (yy != xx) { 708b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 70949b5e25fSSatish Balay } else { 71049b5e25fSSatish Balay y = x; 71149b5e25fSSatish Balay } 71249b5e25fSSatish Balay if (zz != yy) { 713a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 714b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 715a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 71649b5e25fSSatish Balay } else { 71749b5e25fSSatish Balay z = y; 71849b5e25fSSatish Balay } 71949b5e25fSSatish Balay 72049b5e25fSSatish Balay v = a->a; 72149b5e25fSSatish Balay xb = x; 72249b5e25fSSatish Balay 72349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 72449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 72549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 72649b5e25fSSatish Balay ib = aj + *ai; 727831a3094SHong Zhang jmin = 0; 7287fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 72949b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 73049b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 73149b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 732831a3094SHong Zhang v += 9; jmin++; 7337fbae186SHong Zhang } 734831a3094SHong Zhang for (j=jmin; j<n; j++) { 73549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 73649b5e25fSSatish Balay cval = ib[j]*3; 73749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 73849b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 73949b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 74049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 74149b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 74249b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 74349b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 74449b5e25fSSatish Balay v += 9; 74549b5e25fSSatish Balay } 74649b5e25fSSatish Balay xb +=3; ai++; 74749b5e25fSSatish Balay } 74849b5e25fSSatish Balay 749b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 750b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 751b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 75249b5e25fSSatish Balay 7536c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m)); 75449b5e25fSSatish Balay PetscFunctionReturn(0); 75549b5e25fSSatish Balay } 75649b5e25fSSatish Balay 7574a2ae208SSatish Balay #undef __FUNCT__ 7584a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 75949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 76049b5e25fSSatish Balay { 76149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 76287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4; 76349b5e25fSSatish Balay MatScalar *v; 764831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 76549b5e25fSSatish Balay 76649b5e25fSSatish Balay PetscFunctionBegin; 767b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 76849b5e25fSSatish Balay if (yy != xx) { 769b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 77049b5e25fSSatish Balay } else { 77149b5e25fSSatish Balay y = x; 77249b5e25fSSatish Balay } 77349b5e25fSSatish Balay if (zz != yy) { 774a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 775b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 776a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 77749b5e25fSSatish Balay } else { 77849b5e25fSSatish Balay z = y; 77949b5e25fSSatish Balay } 78049b5e25fSSatish Balay 78149b5e25fSSatish Balay v = a->a; 78249b5e25fSSatish Balay xb = x; 78349b5e25fSSatish Balay 78449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 78549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 78649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 78749b5e25fSSatish Balay ib = aj + *ai; 788831a3094SHong Zhang jmin = 0; 7897fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 79049b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 79149b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 79249b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 79349b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 794831a3094SHong Zhang v += 16; jmin++; 7957fbae186SHong Zhang } 796831a3094SHong Zhang for (j=jmin; j<n; j++) { 79749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 79849b5e25fSSatish Balay cval = ib[j]*4; 79949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 80049b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 80149b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 80249b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 80349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 80449b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 80549b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 80649b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 80749b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 80849b5e25fSSatish Balay v += 16; 80949b5e25fSSatish Balay } 81049b5e25fSSatish Balay xb +=4; ai++; 81149b5e25fSSatish Balay } 81249b5e25fSSatish Balay 813b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 814b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 815b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 81649b5e25fSSatish Balay 8176c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m)); 81849b5e25fSSatish Balay PetscFunctionReturn(0); 81949b5e25fSSatish Balay } 82049b5e25fSSatish Balay 8214a2ae208SSatish Balay #undef __FUNCT__ 8224a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 82349b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 82449b5e25fSSatish Balay { 82549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 82687828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 82749b5e25fSSatish Balay MatScalar *v; 828831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 82949b5e25fSSatish Balay 83049b5e25fSSatish Balay PetscFunctionBegin; 831b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 83249b5e25fSSatish Balay if (yy != xx) { 833b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 83449b5e25fSSatish Balay } else { 83549b5e25fSSatish Balay y = x; 83649b5e25fSSatish Balay } 83749b5e25fSSatish Balay if (zz != yy) { 838a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 839b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 840a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 84149b5e25fSSatish Balay } else { 84249b5e25fSSatish Balay z = y; 84349b5e25fSSatish Balay } 84449b5e25fSSatish Balay 84549b5e25fSSatish Balay v = a->a; 84649b5e25fSSatish Balay xb = x; 84749b5e25fSSatish Balay 84849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 84949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 85049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 85149b5e25fSSatish Balay ib = aj + *ai; 852831a3094SHong Zhang jmin = 0; 8537fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 85449b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 85549b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 85649b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 85749b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 85849b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 859831a3094SHong Zhang v += 25; jmin++; 8607fbae186SHong Zhang } 861831a3094SHong Zhang for (j=jmin; j<n; j++) { 86249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 86349b5e25fSSatish Balay cval = ib[j]*5; 86449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 86549b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 86649b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 86749b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 86849b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 86949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 87049b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 87149b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 87249b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 87349b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 87449b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 87549b5e25fSSatish Balay v += 25; 87649b5e25fSSatish Balay } 87749b5e25fSSatish Balay xb +=5; ai++; 87849b5e25fSSatish Balay } 87949b5e25fSSatish Balay 880b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 881b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 882b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 88349b5e25fSSatish Balay 8846c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m)); 88549b5e25fSSatish Balay PetscFunctionReturn(0); 88649b5e25fSSatish Balay } 8874a2ae208SSatish Balay #undef __FUNCT__ 8884a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 88949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 89049b5e25fSSatish Balay { 89149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 89287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 89349b5e25fSSatish Balay MatScalar *v; 894831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 89549b5e25fSSatish Balay 89649b5e25fSSatish Balay PetscFunctionBegin; 897b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 89849b5e25fSSatish Balay if (yy != xx) { 899b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 90049b5e25fSSatish Balay } else { 90149b5e25fSSatish Balay y = x; 90249b5e25fSSatish Balay } 90349b5e25fSSatish Balay if (zz != yy) { 904a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 905b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 906a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 90749b5e25fSSatish Balay } else { 90849b5e25fSSatish Balay z = y; 90949b5e25fSSatish Balay } 91049b5e25fSSatish Balay 91149b5e25fSSatish Balay v = a->a; 91249b5e25fSSatish Balay xb = x; 91349b5e25fSSatish Balay 91449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 91549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 91649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 91749b5e25fSSatish Balay ib = aj + *ai; 918831a3094SHong Zhang jmin = 0; 9197fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 92049b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 92149b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 92249b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 92349b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 92449b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 92549b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 926831a3094SHong Zhang v += 36; jmin++; 9277fbae186SHong Zhang } 928831a3094SHong Zhang for (j=jmin; j<n; j++) { 92949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 93049b5e25fSSatish Balay cval = ib[j]*6; 93149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 93249b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 93349b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 93449b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 93549b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 93649b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 93749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 93849b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 93949b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 94049b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 94149b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 94249b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 94349b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 94449b5e25fSSatish Balay v += 36; 94549b5e25fSSatish Balay } 94649b5e25fSSatish Balay xb +=6; ai++; 94749b5e25fSSatish Balay } 94849b5e25fSSatish Balay 949b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 950b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 951b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 95249b5e25fSSatish Balay 9536c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m)); 95449b5e25fSSatish Balay PetscFunctionReturn(0); 95549b5e25fSSatish Balay } 95649b5e25fSSatish Balay 9574a2ae208SSatish Balay #undef __FUNCT__ 9584a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 95949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 96049b5e25fSSatish Balay { 96149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96287828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 96349b5e25fSSatish Balay MatScalar *v; 964831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 96549b5e25fSSatish Balay 96649b5e25fSSatish Balay PetscFunctionBegin; 967b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 96849b5e25fSSatish Balay if (yy != xx) { 969b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 97049b5e25fSSatish Balay } else { 97149b5e25fSSatish Balay y = x; 97249b5e25fSSatish Balay } 97349b5e25fSSatish Balay if (zz != yy) { 974a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 975b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 976a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 97749b5e25fSSatish Balay } else { 97849b5e25fSSatish Balay z = y; 97949b5e25fSSatish Balay } 98049b5e25fSSatish Balay 98149b5e25fSSatish Balay v = a->a; 98249b5e25fSSatish Balay xb = x; 98349b5e25fSSatish Balay 98449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 98549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 98649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 98749b5e25fSSatish Balay ib = aj + *ai; 988831a3094SHong Zhang jmin = 0; 9897fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 99049b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 99149b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 99249b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 99349b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 99449b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 99549b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 99649b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 997831a3094SHong Zhang v += 49; jmin++; 9987fbae186SHong Zhang } 999831a3094SHong Zhang for (j=jmin; j<n; j++) { 100049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 100149b5e25fSSatish Balay cval = ib[j]*7; 100249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 100349b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 100449b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 100549b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 100649b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 100749b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 100849b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 100949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 101049b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 101149b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 101249b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 101349b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 101449b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 101549b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 101649b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 101749b5e25fSSatish Balay v += 49; 101849b5e25fSSatish Balay } 101949b5e25fSSatish Balay xb +=7; ai++; 102049b5e25fSSatish Balay } 102149b5e25fSSatish Balay 1022b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1023b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1024b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 102549b5e25fSSatish Balay 10266c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m)); 102749b5e25fSSatish Balay PetscFunctionReturn(0); 102849b5e25fSSatish Balay } 102949b5e25fSSatish Balay 10304a2ae208SSatish Balay #undef __FUNCT__ 10314a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 103249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 103349b5e25fSSatish Balay { 103449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 103587828ca2SBarry Smith PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1036066653e3SSatish Balay MatScalar *v; 1037066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1038066653e3SSatish Balay int ncols,k; 103949b5e25fSSatish Balay 104049b5e25fSSatish Balay PetscFunctionBegin; 1041b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x; 104249b5e25fSSatish Balay if (yy != xx) { 1043b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 104449b5e25fSSatish Balay } else { 104549b5e25fSSatish Balay y = x; 104649b5e25fSSatish Balay } 104749b5e25fSSatish Balay if (zz != yy) { 1048a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1049b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z; 1050a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 105149b5e25fSSatish Balay } else { 105249b5e25fSSatish Balay z = y; 105349b5e25fSSatish Balay } 105449b5e25fSSatish Balay 105549b5e25fSSatish Balay aj = a->j; 105649b5e25fSSatish Balay v = a->a; 105749b5e25fSSatish Balay ii = a->i; 105849b5e25fSSatish Balay 105949b5e25fSSatish Balay if (!a->mult_work) { 106087828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 106149b5e25fSSatish Balay } 106249b5e25fSSatish Balay work = a->mult_work; 106349b5e25fSSatish Balay 106449b5e25fSSatish Balay 106549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 106649b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 106749b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 106849b5e25fSSatish Balay 106949b5e25fSSatish Balay /* upper triangular part */ 107049b5e25fSSatish Balay for (j=0; j<n; j++) { 107149b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 107249b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 107349b5e25fSSatish Balay workt += bs; 107449b5e25fSSatish Balay } 107549b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 107649b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 107749b5e25fSSatish Balay 107849b5e25fSSatish Balay /* strict lower triangular part */ 1079831a3094SHong Zhang idx = aj+ii[0]; 1080831a3094SHong Zhang if (*idx == i){ 108196b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1082831a3094SHong Zhang } 108349b5e25fSSatish Balay if (ncols > 0){ 108449b5e25fSSatish Balay workt = work; 108587828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1086831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1087831a3094SHong Zhang for (j=0; j<n; j++) { 1088831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 1089831a3094SHong Zhang /* idx++; */ 109049b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 109149b5e25fSSatish Balay workt += bs; 109249b5e25fSSatish Balay } 109349b5e25fSSatish Balay } 109449b5e25fSSatish Balay 109549b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 109649b5e25fSSatish Balay } 109749b5e25fSSatish Balay 1098b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1099b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1100b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 110149b5e25fSSatish Balay 11026c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 110349b5e25fSSatish Balay PetscFunctionReturn(0); 110449b5e25fSSatish Balay } 110549b5e25fSSatish Balay 11064a2ae208SSatish Balay #undef __FUNCT__ 11074a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqSBAIJ" 110849b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 110949b5e25fSSatish Balay { 1110eeeff2ecSHong Zhang int ierr; 1111eeeff2ecSHong Zhang 111249b5e25fSSatish Balay PetscFunctionBegin; 1113eeeff2ecSHong Zhang ierr = MatMult(A,xx,zz);CHKERRQ(ierr); 1114eeeff2ecSHong Zhang PetscFunctionReturn(0); 111549b5e25fSSatish Balay } 111649b5e25fSSatish Balay 11174a2ae208SSatish Balay #undef __FUNCT__ 11184a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ" 111949b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 112049b5e25fSSatish Balay 112149b5e25fSSatish Balay { 1122eeeff2ecSHong Zhang int ierr; 1123eeeff2ecSHong Zhang 112449b5e25fSSatish Balay PetscFunctionBegin; 1125eeeff2ecSHong Zhang ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1126eeeff2ecSHong Zhang PetscFunctionReturn(0); 112749b5e25fSSatish Balay } 112849b5e25fSSatish Balay 11294a2ae208SSatish Balay #undef __FUNCT__ 11304a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1131268466fbSBarry Smith int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA) 113249b5e25fSSatish Balay { 113349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 11346c6c5352SBarry Smith int one = 1,totalnz = a->bs2*a->nz; 113549b5e25fSSatish Balay 113649b5e25fSSatish Balay PetscFunctionBegin; 1137268466fbSBarry Smith BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one); 1138b0a32e0cSBarry Smith PetscLogFlops(totalnz); 113949b5e25fSSatish Balay PetscFunctionReturn(0); 114049b5e25fSSatish Balay } 114149b5e25fSSatish Balay 11424a2ae208SSatish Balay #undef __FUNCT__ 11434a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 114449b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 114549b5e25fSSatish Balay { 114649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 114749b5e25fSSatish Balay MatScalar *v = a->a; 114849b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1149831a3094SHong Zhang int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 1150831a3094SHong Zhang int *jl,*il,jmin,jmax,ierr,nexti,ik,*col; 115149b5e25fSSatish Balay 115249b5e25fSSatish Balay PetscFunctionBegin; 115349b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 115449b5e25fSSatish Balay for (k=0; k<mbs; k++){ 115549b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1156831a3094SHong Zhang col = aj + jmin; 1157831a3094SHong Zhang if (*col == k){ /* diagonal block */ 115849b5e25fSSatish Balay for (i=0; i<bs2; i++){ 115949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 116049b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 116149b5e25fSSatish Balay #else 116249b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 116349b5e25fSSatish Balay #endif 116449b5e25fSSatish Balay } 1165831a3094SHong Zhang jmin++; 1166831a3094SHong Zhang } 1167831a3094SHong Zhang for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 116849b5e25fSSatish Balay for (i=0; i<bs2; i++){ 116949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 117049b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 117149b5e25fSSatish Balay #else 117249b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 117349b5e25fSSatish Balay #endif 117449b5e25fSSatish Balay } 117549b5e25fSSatish Balay } 117649b5e25fSSatish Balay } 117749b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 117849b5e25fSSatish Balay 117949b5e25fSSatish Balay } else if (type == NORM_INFINITY) { /* maximum row sum */ 118082502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr); 118182502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 118282502324SSatish Balay ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr); 118349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 118449b5e25fSSatish Balay jl[i] = mbs; il[0] = 0; 118549b5e25fSSatish Balay } 118649b5e25fSSatish Balay 118749b5e25fSSatish Balay *norm = 0.0; 118849b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 118949b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 119049b5e25fSSatish Balay 119149b5e25fSSatish Balay /*-- col sum --*/ 119249b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1193831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1194831a3094SHong Zhang at step k */ 119549b5e25fSSatish Balay while (i<mbs){ 119649b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 119749b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 119849b5e25fSSatish Balay for (j=0; j<bs; j++){ 119949b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 120049b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 120149b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 120249b5e25fSSatish Balay } 120349b5e25fSSatish Balay } 120449b5e25fSSatish Balay /* update il, jl */ 1205831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1206831a3094SHong Zhang jmax = a->i[i+1]; 120749b5e25fSSatish Balay if (jmin < jmax){ 120849b5e25fSSatish Balay il[i] = jmin; 120949b5e25fSSatish Balay j = a->j[jmin]; 121049b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 121149b5e25fSSatish Balay } 121249b5e25fSSatish Balay i = nexti; 121349b5e25fSSatish Balay } 121449b5e25fSSatish Balay 121549b5e25fSSatish Balay /*-- row sum --*/ 121649b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 121749b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 121849b5e25fSSatish Balay for (j=0; j<bs; j++){ 121949b5e25fSSatish Balay v = a->a + i*bs2 + j; 122049b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 122149b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); 122249b5e25fSSatish Balay v += bs; 122349b5e25fSSatish Balay } 122449b5e25fSSatish Balay } 122549b5e25fSSatish Balay } 122649b5e25fSSatish Balay /* add k_th block row to il, jl */ 1227831a3094SHong Zhang col = aj+jmin; 1228831a3094SHong Zhang if (*col == k) jmin++; 122949b5e25fSSatish Balay if (jmin < jmax){ 123049b5e25fSSatish Balay il[k] = jmin; 123149b5e25fSSatish Balay j = a->j[jmin]; 123249b5e25fSSatish Balay jl[k] = jl[j]; jl[j] = k; 123349b5e25fSSatish Balay } 123449b5e25fSSatish Balay for (j=0; j<bs; j++){ 123549b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 123649b5e25fSSatish Balay } 123749b5e25fSSatish Balay } 123849b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 123949b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 124049b5e25fSSatish Balay ierr = PetscFree(sum);CHKERRQ(ierr); 124149b5e25fSSatish Balay } else { 1242347d480fSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 124349b5e25fSSatish Balay } 124449b5e25fSSatish Balay PetscFunctionReturn(0); 124549b5e25fSSatish Balay } 124649b5e25fSSatish Balay 12474a2ae208SSatish Balay #undef __FUNCT__ 12484a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 124949b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 125049b5e25fSSatish Balay { 125149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 125249b5e25fSSatish Balay int ierr; 125349b5e25fSSatish Balay 125449b5e25fSSatish Balay PetscFunctionBegin; 125549b5e25fSSatish Balay 125649b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 12576c6c5352SBarry Smith if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) { 1258ef511fbeSHong Zhang *flg = PETSC_FALSE; 1259ef511fbeSHong Zhang PetscFunctionReturn(0); 126049b5e25fSSatish Balay } 126149b5e25fSSatish Balay 126249b5e25fSSatish Balay /* if the a->i are the same */ 126349b5e25fSSatish Balay ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 126449b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 126549b5e25fSSatish Balay PetscFunctionReturn(0); 126649b5e25fSSatish Balay } 126749b5e25fSSatish Balay 126849b5e25fSSatish Balay /* if a->j are the same */ 12696c6c5352SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr); 127049b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 127149b5e25fSSatish Balay PetscFunctionReturn(0); 127249b5e25fSSatish Balay } 127349b5e25fSSatish Balay /* if a->a are the same */ 12746c6c5352SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 127549b5e25fSSatish Balay 1276935af2e7SHong Zhang PetscFunctionReturn(0); 127749b5e25fSSatish Balay } 127849b5e25fSSatish Balay 12794a2ae208SSatish Balay #undef __FUNCT__ 12804a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 128149b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 128249b5e25fSSatish Balay { 128349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 128449b5e25fSSatish Balay int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 128587828ca2SBarry Smith PetscScalar *x,zero = 0.0; 128649b5e25fSSatish Balay MatScalar *aa,*aa_j; 128749b5e25fSSatish Balay 128849b5e25fSSatish Balay PetscFunctionBegin; 128949b5e25fSSatish Balay bs = a->bs; 129082799104SHong Zhang if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 129182799104SHong Zhang 129249b5e25fSSatish Balay aa = a->a; 129349b5e25fSSatish Balay ai = a->i; 129449b5e25fSSatish Balay aj = a->j; 129549b5e25fSSatish Balay ambs = a->mbs; 129649b5e25fSSatish Balay bs2 = a->bs2; 129749b5e25fSSatish Balay 129849b5e25fSSatish Balay ierr = VecSet(&zero,v);CHKERRQ(ierr); 1299b1d4fb26SBarry Smith ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 130049b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1301ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 130249b5e25fSSatish Balay for (i=0; i<ambs; i++) { 130349b5e25fSSatish Balay j=ai[i]; 130449b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 130549b5e25fSSatish Balay row = i*bs; 130649b5e25fSSatish Balay aa_j = aa + j*bs2; 130782799104SHong Zhang if (A->factor && bs==1){ 130882799104SHong Zhang for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k]; 130982799104SHong Zhang } else { 131049b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 131149b5e25fSSatish Balay } 131249b5e25fSSatish Balay } 131382799104SHong Zhang } 131482799104SHong Zhang 1315b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 131649b5e25fSSatish Balay PetscFunctionReturn(0); 131749b5e25fSSatish Balay } 131849b5e25fSSatish Balay 13194a2ae208SSatish Balay #undef __FUNCT__ 13204a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 132149b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 132249b5e25fSSatish Balay { 132349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 132487828ca2SBarry Smith PetscScalar *l,*r,x,*li,*ri; 132549b5e25fSSatish Balay MatScalar *aa,*v; 1326066653e3SSatish Balay int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 132749b5e25fSSatish Balay 132849b5e25fSSatish Balay PetscFunctionBegin; 132949b5e25fSSatish Balay ai = a->i; 133049b5e25fSSatish Balay aj = a->j; 133149b5e25fSSatish Balay aa = a->a; 1332ef511fbeSHong Zhang m = A->m; 133349b5e25fSSatish Balay bs = a->bs; 133449b5e25fSSatish Balay mbs = a->mbs; 133549b5e25fSSatish Balay bs2 = a->bs2; 133649b5e25fSSatish Balay 133749b5e25fSSatish Balay if (ll != rr) { 1338347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 133949b5e25fSSatish Balay } 134049b5e25fSSatish Balay if (ll) { 1341b1d4fb26SBarry Smith ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr); 134249b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1343347d480fSBarry Smith if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 134449b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 134549b5e25fSSatish Balay M = ai[i+1] - ai[i]; 134649b5e25fSSatish Balay li = l + i*bs; 134749b5e25fSSatish Balay v = aa + bs2*ai[i]; 134849b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 134949b5e25fSSatish Balay for (k=0; k<bs2; k++) { 135049b5e25fSSatish Balay (*v++) *= li[k%bs]; 135149b5e25fSSatish Balay } 135249b5e25fSSatish Balay #ifdef CONT 135349b5e25fSSatish Balay /* will be used to replace the above loop */ 135449b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 135549b5e25fSSatish Balay for (k=0; k<bs; k++) { /* column value */ 135649b5e25fSSatish Balay x = ri[k]; 135749b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 135849b5e25fSSatish Balay } 135949b5e25fSSatish Balay #endif 136049b5e25fSSatish Balay 136149b5e25fSSatish Balay } 136249b5e25fSSatish Balay } 1363b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr); 13646c6c5352SBarry Smith PetscLogFlops(2*a->nz); 136549b5e25fSSatish Balay } 136649b5e25fSSatish Balay /* will be deleted */ 136749b5e25fSSatish Balay if (rr) { 1368b1d4fb26SBarry Smith ierr = VecGetArrayFast(rr,&r);CHKERRQ(ierr); 136949b5e25fSSatish Balay ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1370347d480fSBarry Smith if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 137149b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 137249b5e25fSSatish Balay M = ai[i+1] - ai[i]; 137349b5e25fSSatish Balay v = aa + bs2*ai[i]; 137449b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 137549b5e25fSSatish Balay ri = r + bs*aj[ai[i]+j]; 137649b5e25fSSatish Balay for (k=0; k<bs; k++) { 137749b5e25fSSatish Balay x = ri[k]; 137849b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 137949b5e25fSSatish Balay } 138049b5e25fSSatish Balay } 138149b5e25fSSatish Balay } 1382b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(rr,&r);CHKERRQ(ierr); 13836c6c5352SBarry Smith PetscLogFlops(a->nz); 138449b5e25fSSatish Balay } 138549b5e25fSSatish Balay PetscFunctionReturn(0); 138649b5e25fSSatish Balay } 138749b5e25fSSatish Balay 13884a2ae208SSatish Balay #undef __FUNCT__ 13894a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 139049b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 139149b5e25fSSatish Balay { 139249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 139349b5e25fSSatish Balay 139449b5e25fSSatish Balay PetscFunctionBegin; 1395ef511fbeSHong Zhang info->rows_global = (double)A->m; 1396ef511fbeSHong Zhang info->columns_global = (double)A->m; 1397ef511fbeSHong Zhang info->rows_local = (double)A->m; 1398ef511fbeSHong Zhang info->columns_local = (double)A->m; 139949b5e25fSSatish Balay info->block_size = a->bs2; 14006c6c5352SBarry Smith info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */ 14016c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 140249b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 140349b5e25fSSatish Balay info->assemblies = A->num_ass; 140449b5e25fSSatish Balay info->mallocs = a->reallocs; 140549b5e25fSSatish Balay info->memory = A->mem; 140649b5e25fSSatish Balay if (A->factor) { 140749b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 140849b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 140949b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 141049b5e25fSSatish Balay } else { 141149b5e25fSSatish Balay info->fill_ratio_given = 0; 141249b5e25fSSatish Balay info->fill_ratio_needed = 0; 141349b5e25fSSatish Balay info->factor_mallocs = 0; 141449b5e25fSSatish Balay } 141549b5e25fSSatish Balay PetscFunctionReturn(0); 141649b5e25fSSatish Balay } 141749b5e25fSSatish Balay 141849b5e25fSSatish Balay 14194a2ae208SSatish Balay #undef __FUNCT__ 14204a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 142149b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A) 142249b5e25fSSatish Balay { 142349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 142449b5e25fSSatish Balay int ierr; 142549b5e25fSSatish Balay 142649b5e25fSSatish Balay PetscFunctionBegin; 142749b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 142849b5e25fSSatish Balay PetscFunctionReturn(0); 142949b5e25fSSatish Balay } 1430dc354874SHong Zhang 14314a2ae208SSatish Balay #undef __FUNCT__ 14324a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ" 1433dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1434dc354874SHong Zhang { 1435dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1436d0f6400bSHong Zhang int ierr,i,j,n,row,col,bs,*ai,*aj,mbs; 1437c3fca9a7SHong Zhang PetscReal atmp; 1438273d9f13SBarry Smith MatScalar *aa; 143987828ca2SBarry Smith PetscScalar zero = 0.0,*x; 1440d0f6400bSHong Zhang int ncols,brow,bcol,krow,kcol; 1441dc354874SHong Zhang 1442dc354874SHong Zhang PetscFunctionBegin; 1443dc354874SHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1444dc354874SHong Zhang bs = a->bs; 1445dc354874SHong Zhang aa = a->a; 1446dc354874SHong Zhang ai = a->i; 1447dc354874SHong Zhang aj = a->j; 144844117c81SHong Zhang mbs = a->mbs; 1449dc354874SHong Zhang 1450dc354874SHong Zhang ierr = VecSet(&zero,v);CHKERRQ(ierr); 1451b1d4fb26SBarry Smith ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 1452dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1453ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 145444117c81SHong Zhang for (i=0; i<mbs; i++) { 1455d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1456d0f6400bSHong Zhang brow = bs*i; 145744117c81SHong Zhang for (j=0; j<ncols; j++){ 1458d0f6400bSHong Zhang bcol = bs*(*aj); 145944117c81SHong Zhang for (kcol=0; kcol<bs; kcol++){ 1460d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 146144117c81SHong Zhang for (krow=0; krow<bs; krow++){ 1462d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1463d0f6400bSHong Zhang row = brow + krow; /* row index */ 146444117c81SHong Zhang /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 1465c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1466c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 146744117c81SHong Zhang } 146844117c81SHong Zhang } 1469d0f6400bSHong Zhang aj++; 1470dc354874SHong Zhang } 1471dc354874SHong Zhang } 1472b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 1473dc354874SHong Zhang PetscFunctionReturn(0); 1474dc354874SHong Zhang } 1475