173f4d377SMatthew Knepley /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/ 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 449b5e25fSSatish Balay #include "src/inline/spops.h" 549b5e25fSSatish Balay #include "src/inline/ilu.h" 649b5e25fSSatish Balay #include "petscbt.h" 73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 849b5e25fSSatish Balay 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 11268466fbSBarry Smith int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov) 1249b5e25fSSatish Balay { 135eee224dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 145333f59eSHong Zhang int brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol, 15d94109b8SHong Zhang start,end,*ai,*aj,bs,*nidx2; 16d94109b8SHong Zhang PetscBT table; 17d94109b8SHong Zhang PetscBT table0; 18d94109b8SHong Zhang 19d94109b8SHong Zhang PetscFunctionBegin; 20d94109b8SHong Zhang mbs = a->mbs; 21d94109b8SHong Zhang ai = a->i; 22d94109b8SHong Zhang aj = a->j; 23d94109b8SHong Zhang bs = a->bs; 24d94109b8SHong Zhang 25d94109b8SHong Zhang if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 26d94109b8SHong Zhang 27d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr); 28d94109b8SHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 29d94109b8SHong Zhang ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr); 30d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr); 31d94109b8SHong Zhang 32d94109b8SHong Zhang for (i=0; i<is_max; i++) { /* for each is */ 33d94109b8SHong Zhang isz = 0; 34d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr); 35d94109b8SHong Zhang 36d94109b8SHong Zhang /* Extract the indices, assume there can be duplicate entries */ 37d94109b8SHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 38d94109b8SHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 39d94109b8SHong Zhang 40d94109b8SHong Zhang /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */ 41d94109b8SHong Zhang for (j=0; j<n ; ++j){ 42d94109b8SHong Zhang brow = idx[j]/bs; /* convert the indices into block indices */ 43d94109b8SHong Zhang if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 44d94109b8SHong Zhang if(!PetscBTLookupSet(table,brow)) { nidx[isz++] = brow;} 45d94109b8SHong Zhang } 46d94109b8SHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 47d94109b8SHong Zhang ierr = ISDestroy(is[i]);CHKERRQ(ierr); 48d94109b8SHong Zhang 49d94109b8SHong Zhang k = 0; 50d94109b8SHong Zhang for (j=0; j<ov; j++){ /* for each overlap */ 51d94109b8SHong Zhang /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */ 52d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr); 53d94109b8SHong Zhang for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]); 54d94109b8SHong Zhang 55d94109b8SHong Zhang n = isz; /* length of the updated is[i] */ 56d94109b8SHong Zhang for (brow=0; brow<mbs; brow++){ 57d94109b8SHong Zhang start = ai[brow]; end = ai[brow+1]; 58d94109b8SHong Zhang if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */ 59d94109b8SHong Zhang for (l = start; l<end ; l++){ 60d94109b8SHong Zhang bcol = aj[l]; 61d94109b8SHong Zhang if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;} 62d94109b8SHong Zhang } 63d94109b8SHong Zhang k++; 64d94109b8SHong Zhang if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */ 65d94109b8SHong Zhang } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */ 66d94109b8SHong Zhang for (l = start; l<end ; l++){ 67d94109b8SHong Zhang bcol = aj[l]; 68d94109b8SHong Zhang if (PetscBTLookup(table0,bcol)){ 69d94109b8SHong Zhang if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;} 70d94109b8SHong Zhang break; /* for l = start; l<end ; l++) */ 71d94109b8SHong Zhang } 72d94109b8SHong Zhang } 73d94109b8SHong Zhang } 74d94109b8SHong Zhang } 75d94109b8SHong Zhang } /* for each overlap */ 76d94109b8SHong Zhang 77d94109b8SHong Zhang /* expand the Index Set */ 78d94109b8SHong Zhang for (j=0; j<isz; j++) { 79d94109b8SHong Zhang for (k=0; k<bs; k++) 80d94109b8SHong Zhang nidx2[j*bs+k] = nidx[j]*bs+k; 81d94109b8SHong Zhang } 82d94109b8SHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 83d94109b8SHong Zhang } 84d94109b8SHong Zhang ierr = PetscBTDestroy(table);CHKERRQ(ierr); 85d94109b8SHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 86d94109b8SHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 87d94109b8SHong Zhang ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 885eee224dSHong Zhang PetscFunctionReturn(0); 8949b5e25fSSatish Balay } 9049b5e25fSSatish Balay 914a2ae208SSatish Balay #undef __FUNCT__ 924a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 9349b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 9449b5e25fSSatish Balay { 9549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 9649b5e25fSSatish Balay int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 9749b5e25fSSatish Balay int row,mat_i,*mat_j,tcol,*mat_ilen; 9849b5e25fSSatish Balay int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 9949b5e25fSSatish Balay int *aj = a->j,*ai = a->i; 10049b5e25fSSatish Balay MatScalar *mat_a; 10149b5e25fSSatish Balay Mat C; 10249b5e25fSSatish Balay PetscTruth flag; 10349b5e25fSSatish Balay 10449b5e25fSSatish Balay PetscFunctionBegin; 10549b5e25fSSatish Balay 106ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 10749b5e25fSSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 108347d480fSBarry Smith if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 10949b5e25fSSatish Balay 11049b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 11149b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 11249b5e25fSSatish Balay 113b0a32e0cSBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr); 11449b5e25fSSatish Balay ssmap = smap; 115b0a32e0cSBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr); 11649b5e25fSSatish Balay ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 11749b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 11849b5e25fSSatish Balay /* determine lens of each row */ 11949b5e25fSSatish Balay for (i=0; i<nrows; i++) { 12049b5e25fSSatish Balay kstart = ai[irow[i]]; 12149b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 12249b5e25fSSatish Balay lens[i] = 0; 12349b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 12449b5e25fSSatish Balay if (ssmap[aj[k]]) { 12549b5e25fSSatish Balay lens[i]++; 12649b5e25fSSatish Balay } 12749b5e25fSSatish Balay } 12849b5e25fSSatish Balay } 12949b5e25fSSatish Balay /* Create and fill new matrix */ 13049b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 13149b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 13249b5e25fSSatish Balay 133347d480fSBarry Smith if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 13449b5e25fSSatish Balay ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 13549b5e25fSSatish Balay if (flag == PETSC_FALSE) { 136347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 13749b5e25fSSatish Balay } 13849b5e25fSSatish Balay ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 13949b5e25fSSatish Balay C = *B; 14049b5e25fSSatish Balay } else { 141e2d9671bSKris Buschelman ierr = MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr); 142e2d9671bSKris Buschelman ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 143e2d9671bSKris Buschelman ierr = MatSeqSBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr); 14449b5e25fSSatish Balay } 14549b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 14649b5e25fSSatish Balay for (i=0; i<nrows; i++) { 14749b5e25fSSatish Balay row = irow[i]; 14849b5e25fSSatish Balay kstart = ai[row]; 14949b5e25fSSatish Balay kend = kstart + a->ilen[row]; 15049b5e25fSSatish Balay mat_i = c->i[i]; 15149b5e25fSSatish Balay mat_j = c->j + mat_i; 15249b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 15349b5e25fSSatish Balay mat_ilen = c->ilen + i; 15449b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 15549b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 15649b5e25fSSatish Balay *mat_j++ = tcol - 1; 15749b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 15849b5e25fSSatish Balay mat_a += bs2; 15949b5e25fSSatish Balay (*mat_ilen)++; 16049b5e25fSSatish Balay } 16149b5e25fSSatish Balay } 16249b5e25fSSatish Balay } 16349b5e25fSSatish Balay 16449b5e25fSSatish Balay /* Free work space */ 16549b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 16649b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 16749b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16849b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16949b5e25fSSatish Balay 17049b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 17149b5e25fSSatish Balay *B = C; 17249b5e25fSSatish Balay PetscFunctionReturn(0); 17349b5e25fSSatish Balay } 17449b5e25fSSatish Balay 1754a2ae208SSatish Balay #undef __FUNCT__ 1764a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 17749b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 17849b5e25fSSatish Balay { 17949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 18049b5e25fSSatish Balay IS is1; 18149b5e25fSSatish Balay int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 18249b5e25fSSatish Balay 18349b5e25fSSatish Balay PetscFunctionBegin; 184ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 18549b5e25fSSatish Balay 18649b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 18749b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 18849b5e25fSSatish Balay 18949b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 19049b5e25fSSatish Balay and form the IS with compressed IS */ 19182502324SSatish Balay ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr); 19249b5e25fSSatish Balay iary = vary + a->mbs; 19349b5e25fSSatish Balay ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 19449b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 19549b5e25fSSatish Balay 19649b5e25fSSatish Balay count = 0; 19749b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 198ac355199SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks"); 19949b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 20049b5e25fSSatish Balay } 20149b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 20249b5e25fSSatish Balay 20349b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 20449b5e25fSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 20549b5e25fSSatish Balay 20649b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 20749b5e25fSSatish Balay ISDestroy(is1); 20849b5e25fSSatish Balay PetscFunctionReturn(0); 20949b5e25fSSatish Balay } 21049b5e25fSSatish Balay 2114a2ae208SSatish Balay #undef __FUNCT__ 2124a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 213268466fbSBarry Smith int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 21449b5e25fSSatish Balay { 21549b5e25fSSatish Balay int ierr,i; 21649b5e25fSSatish Balay 21749b5e25fSSatish Balay PetscFunctionBegin; 21849b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 21982502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 22049b5e25fSSatish Balay } 22149b5e25fSSatish Balay 22249b5e25fSSatish Balay for (i=0; i<n; i++) { 22349b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 22449b5e25fSSatish Balay } 22549b5e25fSSatish Balay PetscFunctionReturn(0); 22649b5e25fSSatish Balay } 22749b5e25fSSatish Balay 22849b5e25fSSatish Balay /* -------------------------------------------------------*/ 22949b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 23049b5e25fSSatish Balay /* -------------------------------------------------------*/ 231d9eff348SSatish Balay #include "petscblaslapack.h" 23249b5e25fSSatish Balay 2334a2ae208SSatish Balay #undef __FUNCT__ 2344a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1" 23549b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 23649b5e25fSSatish Balay { 23749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 23887828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,zero=0.0; 23949b5e25fSSatish Balay MatScalar *v; 240831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 24149b5e25fSSatish Balay 24249b5e25fSSatish Balay PetscFunctionBegin; 24349b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 244*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 245*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 24649b5e25fSSatish Balay 24749b5e25fSSatish Balay v = a->a; 24849b5e25fSSatish Balay xb = x; 24949b5e25fSSatish Balay 25049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 25149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 25249b5e25fSSatish Balay x1 = xb[0]; 25349b5e25fSSatish Balay ib = aj + *ai; 254831a3094SHong Zhang jmin = 0; 255831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 256831a3094SHong Zhang z[i] += *v++ * x[*ib++]; 257831a3094SHong Zhang jmin++; 258831a3094SHong Zhang } 259831a3094SHong Zhang for (j=jmin; j<n; j++) { 26049b5e25fSSatish Balay cval = *ib; 26149b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 26249b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 26349b5e25fSSatish Balay } 26449b5e25fSSatish Balay xb++; ai++; 26549b5e25fSSatish Balay } 26649b5e25fSSatish Balay 267*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 268*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 2696c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m) - A->m); /* nz = (nz+m)/2 */ 27049b5e25fSSatish Balay PetscFunctionReturn(0); 27149b5e25fSSatish Balay } 27249b5e25fSSatish Balay 2734a2ae208SSatish Balay #undef __FUNCT__ 2744a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 27549b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 27649b5e25fSSatish Balay { 27749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 27887828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 27949b5e25fSSatish Balay MatScalar *v; 280831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 28149b5e25fSSatish Balay 28249b5e25fSSatish Balay 28349b5e25fSSatish Balay PetscFunctionBegin; 28449b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 285*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 286*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 28749b5e25fSSatish Balay 28849b5e25fSSatish Balay v = a->a; 28949b5e25fSSatish Balay xb = x; 29049b5e25fSSatish Balay 29149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 29249b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 29349b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 29449b5e25fSSatish Balay ib = aj + *ai; 295831a3094SHong Zhang jmin = 0; 2967fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 29749b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 29849b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 299831a3094SHong Zhang v += 4; jmin++; 3007fbae186SHong Zhang } 301831a3094SHong Zhang for (j=jmin; j<n; j++) { 30249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 30349b5e25fSSatish Balay cval = ib[j]*2; 30449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 30549b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 30649b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 30749b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 30849b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 30949b5e25fSSatish Balay v += 4; 31049b5e25fSSatish Balay } 31149b5e25fSSatish Balay xb +=2; ai++; 31249b5e25fSSatish Balay } 31349b5e25fSSatish Balay 314*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 315*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 3166c6c5352SBarry Smith PetscLogFlops(8*(a->nz*2 - A->m) - A->m); 31749b5e25fSSatish Balay PetscFunctionReturn(0); 31849b5e25fSSatish Balay } 31949b5e25fSSatish Balay 3204a2ae208SSatish Balay #undef __FUNCT__ 3214a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 32249b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 32349b5e25fSSatish Balay { 32449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 32587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 32649b5e25fSSatish Balay MatScalar *v; 327831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 32849b5e25fSSatish Balay 32949b5e25fSSatish Balay 33049b5e25fSSatish Balay PetscFunctionBegin; 33149b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 332*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 333*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 33449b5e25fSSatish Balay 33549b5e25fSSatish Balay v = a->a; 33649b5e25fSSatish Balay xb = x; 33749b5e25fSSatish Balay 33849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 33949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 34049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 34149b5e25fSSatish Balay ib = aj + *ai; 342831a3094SHong Zhang jmin = 0; 3437fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 34449b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 34549b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 34649b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 347831a3094SHong Zhang v += 9; jmin++; 3487fbae186SHong Zhang } 349831a3094SHong Zhang for (j=jmin; j<n; j++) { 35049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 35149b5e25fSSatish Balay cval = ib[j]*3; 35249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 35349b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 35449b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 35549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 35649b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 35749b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 35849b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 35949b5e25fSSatish Balay v += 9; 36049b5e25fSSatish Balay } 36149b5e25fSSatish Balay xb +=3; ai++; 36249b5e25fSSatish Balay } 36349b5e25fSSatish Balay 364*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 365*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 3666c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m) - A->m); 36749b5e25fSSatish Balay PetscFunctionReturn(0); 36849b5e25fSSatish Balay } 36949b5e25fSSatish Balay 3704a2ae208SSatish Balay #undef __FUNCT__ 3714a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 37249b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 37349b5e25fSSatish Balay { 37449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 37649b5e25fSSatish Balay MatScalar *v; 377831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 37849b5e25fSSatish Balay 37949b5e25fSSatish Balay PetscFunctionBegin; 38049b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 381*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 382*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 38349b5e25fSSatish Balay 38449b5e25fSSatish Balay v = a->a; 38549b5e25fSSatish Balay xb = x; 38649b5e25fSSatish Balay 38749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 38849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 38949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 39049b5e25fSSatish Balay ib = aj + *ai; 391831a3094SHong Zhang jmin = 0; 3927fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 39349b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 39449b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 39549b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 39649b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 397831a3094SHong Zhang v += 16; jmin++; 3987fbae186SHong Zhang } 399831a3094SHong Zhang for (j=jmin; j<n; j++) { 40049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 40149b5e25fSSatish Balay cval = ib[j]*4; 40249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 40349b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 40449b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 40549b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 40649b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 40749b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 40849b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 40949b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 41049b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 41149b5e25fSSatish Balay v += 16; 41249b5e25fSSatish Balay } 41349b5e25fSSatish Balay xb +=4; ai++; 41449b5e25fSSatish Balay } 41549b5e25fSSatish Balay 416*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 417*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 4186c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m) - A->m); 41949b5e25fSSatish Balay PetscFunctionReturn(0); 42049b5e25fSSatish Balay } 42149b5e25fSSatish Balay 4224a2ae208SSatish Balay #undef __FUNCT__ 4234a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 42449b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 42549b5e25fSSatish Balay { 42649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 42787828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 42849b5e25fSSatish Balay MatScalar *v; 429831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 43049b5e25fSSatish Balay 43149b5e25fSSatish Balay PetscFunctionBegin; 43249b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 433*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 434*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 43549b5e25fSSatish Balay 43649b5e25fSSatish Balay v = a->a; 43749b5e25fSSatish Balay xb = x; 43849b5e25fSSatish Balay 43949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 44049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 44149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 44249b5e25fSSatish Balay ib = aj + *ai; 443831a3094SHong Zhang jmin = 0; 4447fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 44549b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 44649b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 44749b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 44849b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 44949b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 450831a3094SHong Zhang v += 25; jmin++; 4517fbae186SHong Zhang } 452831a3094SHong Zhang for (j=jmin; j<n; j++) { 45349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 45449b5e25fSSatish Balay cval = ib[j]*5; 45549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 45649b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 45749b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 45849b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 45949b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 46049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 46149b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 46249b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 46349b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 46449b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 46549b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 46649b5e25fSSatish Balay v += 25; 46749b5e25fSSatish Balay } 46849b5e25fSSatish Balay xb +=5; ai++; 46949b5e25fSSatish Balay } 47049b5e25fSSatish Balay 471*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 472*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 4736c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m) - A->m); 47449b5e25fSSatish Balay PetscFunctionReturn(0); 47549b5e25fSSatish Balay } 47649b5e25fSSatish Balay 47749b5e25fSSatish Balay 4784a2ae208SSatish Balay #undef __FUNCT__ 4794a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 48049b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 48149b5e25fSSatish Balay { 48249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 48387828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 48449b5e25fSSatish Balay MatScalar *v; 485831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 48649b5e25fSSatish Balay 48749b5e25fSSatish Balay PetscFunctionBegin; 48849b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 489*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 490*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 49149b5e25fSSatish Balay 49249b5e25fSSatish Balay v = a->a; 49349b5e25fSSatish Balay xb = x; 49449b5e25fSSatish Balay 49549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 49649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 49749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 49849b5e25fSSatish Balay ib = aj + *ai; 499831a3094SHong Zhang jmin = 0; 5007fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 50149b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 50249b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 50349b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 50449b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 50549b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 50649b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 507831a3094SHong Zhang v += 36; jmin++; 5087fbae186SHong Zhang } 509831a3094SHong Zhang for (j=jmin; j<n; j++) { 51049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 51149b5e25fSSatish Balay cval = ib[j]*6; 51249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 51349b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 51449b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 51549b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 51649b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 51749b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 51849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 51949b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 52049b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 52149b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 52249b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 52349b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 52449b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 52549b5e25fSSatish Balay v += 36; 52649b5e25fSSatish Balay } 52749b5e25fSSatish Balay xb +=6; ai++; 52849b5e25fSSatish Balay } 52949b5e25fSSatish Balay 530*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 531*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 5326c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m) - A->m); 53349b5e25fSSatish Balay PetscFunctionReturn(0); 53449b5e25fSSatish Balay } 5354a2ae208SSatish Balay #undef __FUNCT__ 5364a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 53749b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 53849b5e25fSSatish Balay { 53949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54087828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 54149b5e25fSSatish Balay MatScalar *v; 542831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 54349b5e25fSSatish Balay 54449b5e25fSSatish Balay PetscFunctionBegin; 54549b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 546*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 547*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 54849b5e25fSSatish Balay 54949b5e25fSSatish Balay v = a->a; 55049b5e25fSSatish Balay xb = x; 55149b5e25fSSatish Balay 55249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 55349b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 55449b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 55549b5e25fSSatish Balay ib = aj + *ai; 556831a3094SHong Zhang jmin = 0; 5577fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 55849b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 55949b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 56049b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 56149b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 56249b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 56349b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 56449b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 565831a3094SHong Zhang v += 49; jmin++; 5667fbae186SHong Zhang } 567831a3094SHong Zhang for (j=jmin; j<n; j++) { 56849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 56949b5e25fSSatish Balay cval = ib[j]*7; 57049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 57149b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 57249b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 57349b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 57449b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 57549b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 57649b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 57749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 57849b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 57949b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 58049b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 58149b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 58249b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 58349b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 58449b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 58549b5e25fSSatish Balay v += 49; 58649b5e25fSSatish Balay } 58749b5e25fSSatish Balay xb +=7; ai++; 58849b5e25fSSatish Balay } 589*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 590*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 5916c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m) - A->m); 59249b5e25fSSatish Balay PetscFunctionReturn(0); 59349b5e25fSSatish Balay } 59449b5e25fSSatish Balay 59549b5e25fSSatish Balay /* 59649b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 59749b5e25fSSatish Balay */ 5984a2ae208SSatish Balay #undef __FUNCT__ 5994a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 60049b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 60149b5e25fSSatish Balay { 60249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 60387828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 6040b60a74dSHong Zhang MatScalar *v; 605066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 6060b60a74dSHong Zhang int ncols,k; 60749b5e25fSSatish Balay 60849b5e25fSSatish Balay PetscFunctionBegin; 60949b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 610*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 611*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 61249b5e25fSSatish Balay 61349b5e25fSSatish Balay aj = a->j; 61449b5e25fSSatish Balay v = a->a; 61549b5e25fSSatish Balay ii = a->i; 61649b5e25fSSatish Balay 61749b5e25fSSatish Balay if (!a->mult_work) { 61887828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 61949b5e25fSSatish Balay } 62049b5e25fSSatish Balay work = a->mult_work; 62149b5e25fSSatish Balay 62249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 62349b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 62449b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 62549b5e25fSSatish Balay 62649b5e25fSSatish Balay /* upper triangular part */ 62749b5e25fSSatish Balay for (j=0; j<n; j++) { 62849b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 62949b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 63049b5e25fSSatish Balay workt += bs; 63149b5e25fSSatish Balay } 63249b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 63349b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 63449b5e25fSSatish Balay 63549b5e25fSSatish Balay /* strict lower triangular part */ 636831a3094SHong Zhang idx = aj+ii[0]; 637831a3094SHong Zhang if (*idx == i){ 63896b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 639831a3094SHong Zhang } 64096b9376eSHong Zhang 64149b5e25fSSatish Balay if (ncols > 0){ 64249b5e25fSSatish Balay workt = work; 64387828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 644831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 645831a3094SHong Zhang for (j=0; j<n; j++) { 646831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 64749b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 64849b5e25fSSatish Balay workt += bs; 64949b5e25fSSatish Balay } 65049b5e25fSSatish Balay } 65149b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 65249b5e25fSSatish Balay } 65349b5e25fSSatish Balay 654*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 655*1ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 6566c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m); 65749b5e25fSSatish Balay PetscFunctionReturn(0); 65849b5e25fSSatish Balay } 65949b5e25fSSatish Balay 6604a2ae208SSatish Balay #undef __FUNCT__ 6614a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 66249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 66349b5e25fSSatish Balay { 66449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 66587828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1; 66649b5e25fSSatish Balay MatScalar *v; 667831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 66849b5e25fSSatish Balay 66949b5e25fSSatish Balay PetscFunctionBegin; 670*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 67149b5e25fSSatish Balay if (yy != xx) { 672*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 67349b5e25fSSatish Balay } else { 67449b5e25fSSatish Balay y = x; 67549b5e25fSSatish Balay } 67649b5e25fSSatish Balay if (zz != yy) { 677a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 678*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 679a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 68049b5e25fSSatish Balay } else { 68149b5e25fSSatish Balay z = y; 68249b5e25fSSatish Balay } 68349b5e25fSSatish Balay 68449b5e25fSSatish Balay v = a->a; 68549b5e25fSSatish Balay xb = x; 68649b5e25fSSatish Balay 68749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 68849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 68949b5e25fSSatish Balay x1 = xb[0]; 69049b5e25fSSatish Balay ib = aj + *ai; 691831a3094SHong Zhang jmin = 0; 692831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 693831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 694831a3094SHong Zhang } 695831a3094SHong Zhang for (j=jmin; j<n; j++) { 69649b5e25fSSatish Balay cval = *ib; 69749b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 69849b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 69949b5e25fSSatish Balay } 70049b5e25fSSatish Balay xb++; ai++; 70149b5e25fSSatish Balay } 70249b5e25fSSatish Balay 703*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 705*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 70649b5e25fSSatish Balay 7076c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 70849b5e25fSSatish Balay PetscFunctionReturn(0); 70949b5e25fSSatish Balay } 71049b5e25fSSatish Balay 7114a2ae208SSatish Balay #undef __FUNCT__ 7124a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 71349b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 71449b5e25fSSatish Balay { 71549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 71687828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2; 71749b5e25fSSatish Balay MatScalar *v; 718831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 71949b5e25fSSatish Balay 72049b5e25fSSatish Balay PetscFunctionBegin; 721*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 72249b5e25fSSatish Balay if (yy != xx) { 723*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 72449b5e25fSSatish Balay } else { 72549b5e25fSSatish Balay y = x; 72649b5e25fSSatish Balay } 72749b5e25fSSatish Balay if (zz != yy) { 728a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 729*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 730a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 73149b5e25fSSatish Balay } else { 73249b5e25fSSatish Balay z = y; 73349b5e25fSSatish Balay } 73449b5e25fSSatish Balay 73549b5e25fSSatish Balay v = a->a; 73649b5e25fSSatish Balay xb = x; 73749b5e25fSSatish Balay 73849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 73949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 74049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 74149b5e25fSSatish Balay ib = aj + *ai; 742831a3094SHong Zhang jmin = 0; 7437fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 74449b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 74549b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 746831a3094SHong Zhang v += 4; jmin++; 7477fbae186SHong Zhang } 748831a3094SHong Zhang for (j=jmin; j<n; j++) { 74949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 75049b5e25fSSatish Balay cval = ib[j]*2; 75149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 75249b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 75349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 75449b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 75549b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 75649b5e25fSSatish Balay v += 4; 75749b5e25fSSatish Balay } 75849b5e25fSSatish Balay xb +=2; ai++; 75949b5e25fSSatish Balay } 76049b5e25fSSatish Balay 761*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 762*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 763*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 76449b5e25fSSatish Balay 7656c6c5352SBarry Smith PetscLogFlops(4*(a->nz*2 - A->m)); 76649b5e25fSSatish Balay PetscFunctionReturn(0); 76749b5e25fSSatish Balay } 76849b5e25fSSatish Balay 7694a2ae208SSatish Balay #undef __FUNCT__ 7704a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 77149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 77249b5e25fSSatish Balay { 77349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77487828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3; 77549b5e25fSSatish Balay MatScalar *v; 776831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 77749b5e25fSSatish Balay 77849b5e25fSSatish Balay PetscFunctionBegin; 779*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 78049b5e25fSSatish Balay if (yy != xx) { 781*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 78249b5e25fSSatish Balay } else { 78349b5e25fSSatish Balay y = x; 78449b5e25fSSatish Balay } 78549b5e25fSSatish Balay if (zz != yy) { 786a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 787*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 788a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 78949b5e25fSSatish Balay } else { 79049b5e25fSSatish Balay z = y; 79149b5e25fSSatish Balay } 79249b5e25fSSatish Balay 79349b5e25fSSatish Balay v = a->a; 79449b5e25fSSatish Balay xb = x; 79549b5e25fSSatish Balay 79649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 79749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 79849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 79949b5e25fSSatish Balay ib = aj + *ai; 800831a3094SHong Zhang jmin = 0; 8017fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 80249b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 80349b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 80449b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 805831a3094SHong Zhang v += 9; jmin++; 8067fbae186SHong Zhang } 807831a3094SHong Zhang for (j=jmin; j<n; j++) { 80849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 80949b5e25fSSatish Balay cval = ib[j]*3; 81049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 81149b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 81249b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 81349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 81449b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 81549b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 81649b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 81749b5e25fSSatish Balay v += 9; 81849b5e25fSSatish Balay } 81949b5e25fSSatish Balay xb +=3; ai++; 82049b5e25fSSatish Balay } 82149b5e25fSSatish Balay 822*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 823*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 824*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 82549b5e25fSSatish Balay 8266c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m)); 82749b5e25fSSatish Balay PetscFunctionReturn(0); 82849b5e25fSSatish Balay } 82949b5e25fSSatish Balay 8304a2ae208SSatish Balay #undef __FUNCT__ 8314a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 83249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 83349b5e25fSSatish Balay { 83449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 83587828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4; 83649b5e25fSSatish Balay MatScalar *v; 837831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 83849b5e25fSSatish Balay 83949b5e25fSSatish Balay PetscFunctionBegin; 840*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 84149b5e25fSSatish Balay if (yy != xx) { 842*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 84349b5e25fSSatish Balay } else { 84449b5e25fSSatish Balay y = x; 84549b5e25fSSatish Balay } 84649b5e25fSSatish Balay if (zz != yy) { 847a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 848*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 849a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 85049b5e25fSSatish Balay } else { 85149b5e25fSSatish Balay z = y; 85249b5e25fSSatish Balay } 85349b5e25fSSatish Balay 85449b5e25fSSatish Balay v = a->a; 85549b5e25fSSatish Balay xb = x; 85649b5e25fSSatish Balay 85749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 85849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 85949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 86049b5e25fSSatish Balay ib = aj + *ai; 861831a3094SHong Zhang jmin = 0; 8627fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 86349b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 86449b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 86549b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 86649b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 867831a3094SHong Zhang v += 16; jmin++; 8687fbae186SHong Zhang } 869831a3094SHong Zhang for (j=jmin; j<n; j++) { 87049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 87149b5e25fSSatish Balay cval = ib[j]*4; 87249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 87349b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 87449b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 87549b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 87649b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 87749b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 87849b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 87949b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 88049b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 88149b5e25fSSatish Balay v += 16; 88249b5e25fSSatish Balay } 88349b5e25fSSatish Balay xb +=4; ai++; 88449b5e25fSSatish Balay } 88549b5e25fSSatish Balay 886*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 887*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 888*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 88949b5e25fSSatish Balay 8906c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m)); 89149b5e25fSSatish Balay PetscFunctionReturn(0); 89249b5e25fSSatish Balay } 89349b5e25fSSatish Balay 8944a2ae208SSatish Balay #undef __FUNCT__ 8954a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 89649b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 89749b5e25fSSatish Balay { 89849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 89987828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 90049b5e25fSSatish Balay MatScalar *v; 901831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 90249b5e25fSSatish Balay 90349b5e25fSSatish Balay PetscFunctionBegin; 904*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 90549b5e25fSSatish Balay if (yy != xx) { 906*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 90749b5e25fSSatish Balay } else { 90849b5e25fSSatish Balay y = x; 90949b5e25fSSatish Balay } 91049b5e25fSSatish Balay if (zz != yy) { 911a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 912*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 913a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 91449b5e25fSSatish Balay } else { 91549b5e25fSSatish Balay z = y; 91649b5e25fSSatish Balay } 91749b5e25fSSatish Balay 91849b5e25fSSatish Balay v = a->a; 91949b5e25fSSatish Balay xb = x; 92049b5e25fSSatish Balay 92149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 92249b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 92349b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 92449b5e25fSSatish Balay ib = aj + *ai; 925831a3094SHong Zhang jmin = 0; 9267fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 92749b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 92849b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 92949b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 93049b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 93149b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 932831a3094SHong Zhang v += 25; jmin++; 9337fbae186SHong Zhang } 934831a3094SHong Zhang for (j=jmin; j<n; j++) { 93549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 93649b5e25fSSatish Balay cval = ib[j]*5; 93749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 93849b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 93949b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 94049b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 94149b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 94249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 94349b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 94449b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 94549b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 94649b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 94749b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 94849b5e25fSSatish Balay v += 25; 94949b5e25fSSatish Balay } 95049b5e25fSSatish Balay xb +=5; ai++; 95149b5e25fSSatish Balay } 95249b5e25fSSatish Balay 953*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 955*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 95649b5e25fSSatish Balay 9576c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m)); 95849b5e25fSSatish Balay PetscFunctionReturn(0); 95949b5e25fSSatish Balay } 9604a2ae208SSatish Balay #undef __FUNCT__ 9614a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 96249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 96349b5e25fSSatish Balay { 96449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96587828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 96649b5e25fSSatish Balay MatScalar *v; 967831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 96849b5e25fSSatish Balay 96949b5e25fSSatish Balay PetscFunctionBegin; 970*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 97149b5e25fSSatish Balay if (yy != xx) { 972*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 97349b5e25fSSatish Balay } else { 97449b5e25fSSatish Balay y = x; 97549b5e25fSSatish Balay } 97649b5e25fSSatish Balay if (zz != yy) { 977a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 978*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 979a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 98049b5e25fSSatish Balay } else { 98149b5e25fSSatish Balay z = y; 98249b5e25fSSatish Balay } 98349b5e25fSSatish Balay 98449b5e25fSSatish Balay v = a->a; 98549b5e25fSSatish Balay xb = x; 98649b5e25fSSatish Balay 98749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 98849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 98949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 99049b5e25fSSatish Balay ib = aj + *ai; 991831a3094SHong Zhang jmin = 0; 9927fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 99349b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 99449b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 99549b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 99649b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 99749b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 99849b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 999831a3094SHong Zhang v += 36; jmin++; 10007fbae186SHong Zhang } 1001831a3094SHong Zhang for (j=jmin; j<n; j++) { 100249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 100349b5e25fSSatish Balay cval = ib[j]*6; 100449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 100549b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 100649b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 100749b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 100849b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 100949b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 101049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 101149b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 101249b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 101349b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 101449b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 101549b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 101649b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 101749b5e25fSSatish Balay v += 36; 101849b5e25fSSatish Balay } 101949b5e25fSSatish Balay xb +=6; ai++; 102049b5e25fSSatish Balay } 102149b5e25fSSatish Balay 1022*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1023*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1024*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 102549b5e25fSSatish Balay 10266c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m)); 102749b5e25fSSatish Balay PetscFunctionReturn(0); 102849b5e25fSSatish Balay } 102949b5e25fSSatish Balay 10304a2ae208SSatish Balay #undef __FUNCT__ 10314a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 103249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 103349b5e25fSSatish Balay { 103449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 103587828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 103649b5e25fSSatish Balay MatScalar *v; 1037831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 103849b5e25fSSatish Balay 103949b5e25fSSatish Balay PetscFunctionBegin; 1040*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 104149b5e25fSSatish Balay if (yy != xx) { 1042*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 104349b5e25fSSatish Balay } else { 104449b5e25fSSatish Balay y = x; 104549b5e25fSSatish Balay } 104649b5e25fSSatish Balay if (zz != yy) { 1047a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1048*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1049a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 105049b5e25fSSatish Balay } else { 105149b5e25fSSatish Balay z = y; 105249b5e25fSSatish Balay } 105349b5e25fSSatish Balay 105449b5e25fSSatish Balay v = a->a; 105549b5e25fSSatish Balay xb = x; 105649b5e25fSSatish Balay 105749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 105849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 105949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 106049b5e25fSSatish Balay ib = aj + *ai; 1061831a3094SHong Zhang jmin = 0; 10627fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 106349b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 106449b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 106549b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 106649b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 106749b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 106849b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 106949b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1070831a3094SHong Zhang v += 49; jmin++; 10717fbae186SHong Zhang } 1072831a3094SHong Zhang for (j=jmin; j<n; j++) { 107349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 107449b5e25fSSatish Balay cval = ib[j]*7; 107549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 107649b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 107749b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 107849b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 107949b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 108049b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 108149b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 108249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 108349b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 108449b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 108549b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 108649b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 108749b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 108849b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 108949b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 109049b5e25fSSatish Balay v += 49; 109149b5e25fSSatish Balay } 109249b5e25fSSatish Balay xb +=7; ai++; 109349b5e25fSSatish Balay } 109449b5e25fSSatish Balay 1095*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1096*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1097*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 109849b5e25fSSatish Balay 10996c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m)); 110049b5e25fSSatish Balay PetscFunctionReturn(0); 110149b5e25fSSatish Balay } 110249b5e25fSSatish Balay 11034a2ae208SSatish Balay #undef __FUNCT__ 11044a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 110549b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 110649b5e25fSSatish Balay { 110749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 110887828ca2SBarry Smith PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1109066653e3SSatish Balay MatScalar *v; 1110066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1111066653e3SSatish Balay int ncols,k; 111249b5e25fSSatish Balay 111349b5e25fSSatish Balay PetscFunctionBegin; 1114*1ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 111549b5e25fSSatish Balay if (yy != xx) { 1116*1ebc52fbSHong Zhang ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 111749b5e25fSSatish Balay } else { 111849b5e25fSSatish Balay y = x; 111949b5e25fSSatish Balay } 112049b5e25fSSatish Balay if (zz != yy) { 1121a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1122*1ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1123a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 112449b5e25fSSatish Balay } else { 112549b5e25fSSatish Balay z = y; 112649b5e25fSSatish Balay } 112749b5e25fSSatish Balay 112849b5e25fSSatish Balay aj = a->j; 112949b5e25fSSatish Balay v = a->a; 113049b5e25fSSatish Balay ii = a->i; 113149b5e25fSSatish Balay 113249b5e25fSSatish Balay if (!a->mult_work) { 113387828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 113449b5e25fSSatish Balay } 113549b5e25fSSatish Balay work = a->mult_work; 113649b5e25fSSatish Balay 113749b5e25fSSatish Balay 113849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 113949b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 114049b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 114149b5e25fSSatish Balay 114249b5e25fSSatish Balay /* upper triangular part */ 114349b5e25fSSatish Balay for (j=0; j<n; j++) { 114449b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 114549b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 114649b5e25fSSatish Balay workt += bs; 114749b5e25fSSatish Balay } 114849b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 114949b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 115049b5e25fSSatish Balay 115149b5e25fSSatish Balay /* strict lower triangular part */ 1152831a3094SHong Zhang idx = aj+ii[0]; 1153831a3094SHong Zhang if (*idx == i){ 115496b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1155831a3094SHong Zhang } 115649b5e25fSSatish Balay if (ncols > 0){ 115749b5e25fSSatish Balay workt = work; 115887828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1159831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1160831a3094SHong Zhang for (j=0; j<n; j++) { 1161831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 1162831a3094SHong Zhang /* idx++; */ 116349b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 116449b5e25fSSatish Balay workt += bs; 116549b5e25fSSatish Balay } 116649b5e25fSSatish Balay } 116749b5e25fSSatish Balay 116849b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 116949b5e25fSSatish Balay } 117049b5e25fSSatish Balay 1171*1ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1172*1ebc52fbSHong Zhang if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1173*1ebc52fbSHong Zhang if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 117449b5e25fSSatish Balay 11756c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 117649b5e25fSSatish Balay PetscFunctionReturn(0); 117749b5e25fSSatish Balay } 117849b5e25fSSatish Balay 11794a2ae208SSatish Balay #undef __FUNCT__ 11804a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqSBAIJ" 118149b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 118249b5e25fSSatish Balay { 1183eeeff2ecSHong Zhang int ierr; 1184eeeff2ecSHong Zhang 118549b5e25fSSatish Balay PetscFunctionBegin; 1186eeeff2ecSHong Zhang ierr = MatMult(A,xx,zz);CHKERRQ(ierr); 1187eeeff2ecSHong Zhang PetscFunctionReturn(0); 118849b5e25fSSatish Balay } 118949b5e25fSSatish Balay 11904a2ae208SSatish Balay #undef __FUNCT__ 11914a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ" 119249b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 119349b5e25fSSatish Balay 119449b5e25fSSatish Balay { 1195eeeff2ecSHong Zhang int ierr; 1196eeeff2ecSHong Zhang 119749b5e25fSSatish Balay PetscFunctionBegin; 1198eeeff2ecSHong Zhang ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1199eeeff2ecSHong Zhang PetscFunctionReturn(0); 120049b5e25fSSatish Balay } 120149b5e25fSSatish Balay 12024a2ae208SSatish Balay #undef __FUNCT__ 12034a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1204268466fbSBarry Smith int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA) 120549b5e25fSSatish Balay { 120649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 12076c6c5352SBarry Smith int one = 1,totalnz = a->bs2*a->nz; 120849b5e25fSSatish Balay 120949b5e25fSSatish Balay PetscFunctionBegin; 1210268466fbSBarry Smith BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one); 1211b0a32e0cSBarry Smith PetscLogFlops(totalnz); 121249b5e25fSSatish Balay PetscFunctionReturn(0); 121349b5e25fSSatish Balay } 121449b5e25fSSatish Balay 12154a2ae208SSatish Balay #undef __FUNCT__ 12164a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 121749b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 121849b5e25fSSatish Balay { 121949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 122049b5e25fSSatish Balay MatScalar *v = a->a; 122149b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1222831a3094SHong Zhang int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 1223831a3094SHong Zhang int *jl,*il,jmin,jmax,ierr,nexti,ik,*col; 122449b5e25fSSatish Balay 122549b5e25fSSatish Balay PetscFunctionBegin; 122649b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 122749b5e25fSSatish Balay for (k=0; k<mbs; k++){ 122849b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1229831a3094SHong Zhang col = aj + jmin; 1230831a3094SHong Zhang if (*col == k){ /* diagonal block */ 123149b5e25fSSatish Balay for (i=0; i<bs2; i++){ 123249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 123349b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 123449b5e25fSSatish Balay #else 123549b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 123649b5e25fSSatish Balay #endif 123749b5e25fSSatish Balay } 1238831a3094SHong Zhang jmin++; 1239831a3094SHong Zhang } 1240831a3094SHong Zhang for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 124149b5e25fSSatish Balay for (i=0; i<bs2; i++){ 124249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 124349b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 124449b5e25fSSatish Balay #else 124549b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 124649b5e25fSSatish Balay #endif 124749b5e25fSSatish Balay } 124849b5e25fSSatish Balay } 124949b5e25fSSatish Balay } 125049b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 125149b5e25fSSatish Balay 125249b5e25fSSatish Balay } else if (type == NORM_INFINITY) { /* maximum row sum */ 125382502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr); 125482502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 125582502324SSatish Balay ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr); 125649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 125749b5e25fSSatish Balay jl[i] = mbs; il[0] = 0; 125849b5e25fSSatish Balay } 125949b5e25fSSatish Balay 126049b5e25fSSatish Balay *norm = 0.0; 126149b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 126249b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 126349b5e25fSSatish Balay 126449b5e25fSSatish Balay /*-- col sum --*/ 126549b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1266831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1267831a3094SHong Zhang at step k */ 126849b5e25fSSatish Balay while (i<mbs){ 126949b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 127049b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 127149b5e25fSSatish Balay for (j=0; j<bs; j++){ 127249b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 127349b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 127449b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 127549b5e25fSSatish Balay } 127649b5e25fSSatish Balay } 127749b5e25fSSatish Balay /* update il, jl */ 1278831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1279831a3094SHong Zhang jmax = a->i[i+1]; 128049b5e25fSSatish Balay if (jmin < jmax){ 128149b5e25fSSatish Balay il[i] = jmin; 128249b5e25fSSatish Balay j = a->j[jmin]; 128349b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 128449b5e25fSSatish Balay } 128549b5e25fSSatish Balay i = nexti; 128649b5e25fSSatish Balay } 128749b5e25fSSatish Balay 128849b5e25fSSatish Balay /*-- row sum --*/ 128949b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 129049b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 129149b5e25fSSatish Balay for (j=0; j<bs; j++){ 129249b5e25fSSatish Balay v = a->a + i*bs2 + j; 129349b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 129449b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); 129549b5e25fSSatish Balay v += bs; 129649b5e25fSSatish Balay } 129749b5e25fSSatish Balay } 129849b5e25fSSatish Balay } 129949b5e25fSSatish Balay /* add k_th block row to il, jl */ 1300831a3094SHong Zhang col = aj+jmin; 1301831a3094SHong Zhang if (*col == k) jmin++; 130249b5e25fSSatish Balay if (jmin < jmax){ 130349b5e25fSSatish Balay il[k] = jmin; 130449b5e25fSSatish Balay j = a->j[jmin]; 130549b5e25fSSatish Balay jl[k] = jl[j]; jl[j] = k; 130649b5e25fSSatish Balay } 130749b5e25fSSatish Balay for (j=0; j<bs; j++){ 130849b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 130949b5e25fSSatish Balay } 131049b5e25fSSatish Balay } 131149b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 131249b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 131349b5e25fSSatish Balay ierr = PetscFree(sum);CHKERRQ(ierr); 131449b5e25fSSatish Balay } else { 1315347d480fSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 131649b5e25fSSatish Balay } 131749b5e25fSSatish Balay PetscFunctionReturn(0); 131849b5e25fSSatish Balay } 131949b5e25fSSatish Balay 13204a2ae208SSatish Balay #undef __FUNCT__ 13214a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 132249b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 132349b5e25fSSatish Balay { 132449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 132549b5e25fSSatish Balay int ierr; 132649b5e25fSSatish Balay 132749b5e25fSSatish Balay PetscFunctionBegin; 132849b5e25fSSatish Balay 132949b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 13306c6c5352SBarry Smith if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) { 1331ef511fbeSHong Zhang *flg = PETSC_FALSE; 1332ef511fbeSHong Zhang PetscFunctionReturn(0); 133349b5e25fSSatish Balay } 133449b5e25fSSatish Balay 133549b5e25fSSatish Balay /* if the a->i are the same */ 133649b5e25fSSatish Balay ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 133749b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 133849b5e25fSSatish Balay PetscFunctionReturn(0); 133949b5e25fSSatish Balay } 134049b5e25fSSatish Balay 134149b5e25fSSatish Balay /* if a->j are the same */ 13426c6c5352SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr); 134349b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 134449b5e25fSSatish Balay PetscFunctionReturn(0); 134549b5e25fSSatish Balay } 134649b5e25fSSatish Balay /* if a->a are the same */ 13476c6c5352SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 134849b5e25fSSatish Balay 1349935af2e7SHong Zhang PetscFunctionReturn(0); 135049b5e25fSSatish Balay } 135149b5e25fSSatish Balay 13524a2ae208SSatish Balay #undef __FUNCT__ 13534a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 135449b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 135549b5e25fSSatish Balay { 135649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 135749b5e25fSSatish Balay int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 135887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 135949b5e25fSSatish Balay MatScalar *aa,*aa_j; 136049b5e25fSSatish Balay 136149b5e25fSSatish Balay PetscFunctionBegin; 136249b5e25fSSatish Balay bs = a->bs; 136382799104SHong Zhang if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 136482799104SHong Zhang 136549b5e25fSSatish Balay aa = a->a; 136649b5e25fSSatish Balay ai = a->i; 136749b5e25fSSatish Balay aj = a->j; 136849b5e25fSSatish Balay ambs = a->mbs; 136949b5e25fSSatish Balay bs2 = a->bs2; 137049b5e25fSSatish Balay 137149b5e25fSSatish Balay ierr = VecSet(&zero,v);CHKERRQ(ierr); 1372*1ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 137349b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1374ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 137549b5e25fSSatish Balay for (i=0; i<ambs; i++) { 137649b5e25fSSatish Balay j=ai[i]; 137749b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 137849b5e25fSSatish Balay row = i*bs; 137949b5e25fSSatish Balay aa_j = aa + j*bs2; 138082799104SHong Zhang if (A->factor && bs==1){ 138182799104SHong Zhang for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k]; 138282799104SHong Zhang } else { 138349b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 138449b5e25fSSatish Balay } 138549b5e25fSSatish Balay } 138682799104SHong Zhang } 138782799104SHong Zhang 1388*1ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 138949b5e25fSSatish Balay PetscFunctionReturn(0); 139049b5e25fSSatish Balay } 139149b5e25fSSatish Balay 13924a2ae208SSatish Balay #undef __FUNCT__ 13934a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 139449b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 139549b5e25fSSatish Balay { 139649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 139787828ca2SBarry Smith PetscScalar *l,*r,x,*li,*ri; 139849b5e25fSSatish Balay MatScalar *aa,*v; 1399066653e3SSatish Balay int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 140049b5e25fSSatish Balay 140149b5e25fSSatish Balay PetscFunctionBegin; 140249b5e25fSSatish Balay ai = a->i; 140349b5e25fSSatish Balay aj = a->j; 140449b5e25fSSatish Balay aa = a->a; 1405ef511fbeSHong Zhang m = A->m; 140649b5e25fSSatish Balay bs = a->bs; 140749b5e25fSSatish Balay mbs = a->mbs; 140849b5e25fSSatish Balay bs2 = a->bs2; 140949b5e25fSSatish Balay 141049b5e25fSSatish Balay if (ll != rr) { 1411347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 141249b5e25fSSatish Balay } 141349b5e25fSSatish Balay if (ll) { 1414*1ebc52fbSHong Zhang ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 141549b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1416347d480fSBarry Smith if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 141749b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 141849b5e25fSSatish Balay M = ai[i+1] - ai[i]; 141949b5e25fSSatish Balay li = l + i*bs; 142049b5e25fSSatish Balay v = aa + bs2*ai[i]; 142149b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 142249b5e25fSSatish Balay for (k=0; k<bs2; k++) { 142349b5e25fSSatish Balay (*v++) *= li[k%bs]; 142449b5e25fSSatish Balay } 142549b5e25fSSatish Balay #ifdef CONT 142649b5e25fSSatish Balay /* will be used to replace the above loop */ 142749b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 142849b5e25fSSatish Balay for (k=0; k<bs; k++) { /* column value */ 142949b5e25fSSatish Balay x = ri[k]; 143049b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 143149b5e25fSSatish Balay } 143249b5e25fSSatish Balay #endif 143349b5e25fSSatish Balay 143449b5e25fSSatish Balay } 143549b5e25fSSatish Balay } 1436*1ebc52fbSHong Zhang ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 14376c6c5352SBarry Smith PetscLogFlops(2*a->nz); 143849b5e25fSSatish Balay } 143949b5e25fSSatish Balay /* will be deleted */ 144049b5e25fSSatish Balay if (rr) { 1441*1ebc52fbSHong Zhang ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 144249b5e25fSSatish Balay ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1443347d480fSBarry Smith if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 144449b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 144549b5e25fSSatish Balay M = ai[i+1] - ai[i]; 144649b5e25fSSatish Balay v = aa + bs2*ai[i]; 144749b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 144849b5e25fSSatish Balay ri = r + bs*aj[ai[i]+j]; 144949b5e25fSSatish Balay for (k=0; k<bs; k++) { 145049b5e25fSSatish Balay x = ri[k]; 145149b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 145249b5e25fSSatish Balay } 145349b5e25fSSatish Balay } 145449b5e25fSSatish Balay } 1455*1ebc52fbSHong Zhang ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 14566c6c5352SBarry Smith PetscLogFlops(a->nz); 145749b5e25fSSatish Balay } 145849b5e25fSSatish Balay PetscFunctionReturn(0); 145949b5e25fSSatish Balay } 146049b5e25fSSatish Balay 14614a2ae208SSatish Balay #undef __FUNCT__ 14624a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 146349b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 146449b5e25fSSatish Balay { 146549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 146649b5e25fSSatish Balay 146749b5e25fSSatish Balay PetscFunctionBegin; 1468ef511fbeSHong Zhang info->rows_global = (double)A->m; 1469ef511fbeSHong Zhang info->columns_global = (double)A->m; 1470ef511fbeSHong Zhang info->rows_local = (double)A->m; 1471ef511fbeSHong Zhang info->columns_local = (double)A->m; 147249b5e25fSSatish Balay info->block_size = a->bs2; 14736c6c5352SBarry Smith info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */ 14746c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 147549b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 147649b5e25fSSatish Balay info->assemblies = A->num_ass; 147749b5e25fSSatish Balay info->mallocs = a->reallocs; 147849b5e25fSSatish Balay info->memory = A->mem; 147949b5e25fSSatish Balay if (A->factor) { 148049b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 148149b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 148249b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 148349b5e25fSSatish Balay } else { 148449b5e25fSSatish Balay info->fill_ratio_given = 0; 148549b5e25fSSatish Balay info->fill_ratio_needed = 0; 148649b5e25fSSatish Balay info->factor_mallocs = 0; 148749b5e25fSSatish Balay } 148849b5e25fSSatish Balay PetscFunctionReturn(0); 148949b5e25fSSatish Balay } 149049b5e25fSSatish Balay 149149b5e25fSSatish Balay 14924a2ae208SSatish Balay #undef __FUNCT__ 14934a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 149449b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A) 149549b5e25fSSatish Balay { 149649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 149749b5e25fSSatish Balay int ierr; 149849b5e25fSSatish Balay 149949b5e25fSSatish Balay PetscFunctionBegin; 150049b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 150149b5e25fSSatish Balay PetscFunctionReturn(0); 150249b5e25fSSatish Balay } 1503dc354874SHong Zhang 15044a2ae208SSatish Balay #undef __FUNCT__ 15054a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ" 1506dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1507dc354874SHong Zhang { 1508dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1509d0f6400bSHong Zhang int ierr,i,j,n,row,col,bs,*ai,*aj,mbs; 1510c3fca9a7SHong Zhang PetscReal atmp; 1511273d9f13SBarry Smith MatScalar *aa; 151287828ca2SBarry Smith PetscScalar zero = 0.0,*x; 1513d0f6400bSHong Zhang int ncols,brow,bcol,krow,kcol; 1514dc354874SHong Zhang 1515dc354874SHong Zhang PetscFunctionBegin; 1516dc354874SHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1517dc354874SHong Zhang bs = a->bs; 1518dc354874SHong Zhang aa = a->a; 1519dc354874SHong Zhang ai = a->i; 1520dc354874SHong Zhang aj = a->j; 152144117c81SHong Zhang mbs = a->mbs; 1522dc354874SHong Zhang 1523dc354874SHong Zhang ierr = VecSet(&zero,v);CHKERRQ(ierr); 1524*1ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1525dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1526ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 152744117c81SHong Zhang for (i=0; i<mbs; i++) { 1528d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1529d0f6400bSHong Zhang brow = bs*i; 153044117c81SHong Zhang for (j=0; j<ncols; j++){ 1531d0f6400bSHong Zhang bcol = bs*(*aj); 153244117c81SHong Zhang for (kcol=0; kcol<bs; kcol++){ 1533d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 153444117c81SHong Zhang for (krow=0; krow<bs; krow++){ 1535d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1536d0f6400bSHong Zhang row = brow + krow; /* row index */ 153744117c81SHong Zhang /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 1538c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1539c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 154044117c81SHong Zhang } 154144117c81SHong Zhang } 1542d0f6400bSHong Zhang aj++; 1543dc354874SHong Zhang } 1544dc354874SHong Zhang } 1545*1ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1546dc354874SHong Zhang PetscFunctionReturn(0); 1547dc354874SHong Zhang } 1548