1*d9eff348SSatish Balay /*$Id: sbaij2.c,v 1.5 2000/08/13 15:03:03 bsmith Exp balay $*/ 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "petscsys.h" 449b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 549b5e25fSSatish Balay #include "src/vec/vecimpl.h" 649b5e25fSSatish Balay #include "src/inline/spops.h" 749b5e25fSSatish Balay #include "src/inline/ilu.h" 849b5e25fSSatish Balay #include "petscbt.h" 949b5e25fSSatish Balay #include "sbaij.h" 1049b5e25fSSatish Balay 1149b5e25fSSatish Balay #undef __FUNC__ 1249b5e25fSSatish Balay #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ" 1349b5e25fSSatish Balay int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 1449b5e25fSSatish Balay { 1549b5e25fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1649b5e25fSSatish Balay int row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival; 1749b5e25fSSatish Balay int start,end,*ai,*aj,bs,*nidx2; 1849b5e25fSSatish Balay PetscBT table; 1949b5e25fSSatish Balay 2049b5e25fSSatish Balay PetscFunctionBegin; 2149b5e25fSSatish Balay m = a->mbs; 2249b5e25fSSatish Balay ai = a->i; 2349b5e25fSSatish Balay aj = a->j; 2449b5e25fSSatish Balay bs = a->bs; 2549b5e25fSSatish Balay 2649b5e25fSSatish Balay if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative overlap specified"); 2749b5e25fSSatish Balay 2849b5e25fSSatish Balay ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 2949b5e25fSSatish Balay nidx = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(nidx); 3049b5e25fSSatish Balay nidx2 = (int *)PetscMalloc((a->m+1)*sizeof(int));CHKPTRQ(nidx2); 3149b5e25fSSatish Balay 3249b5e25fSSatish Balay for (i=0; i<is_max; i++) { 3349b5e25fSSatish Balay /* Initialise the two local arrays */ 3449b5e25fSSatish Balay isz = 0; 3549b5e25fSSatish Balay ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 3649b5e25fSSatish Balay 3749b5e25fSSatish Balay /* Extract the indices, assume there can be duplicate entries */ 3849b5e25fSSatish Balay ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 3949b5e25fSSatish Balay ierr = ISGetSize(is[i],&n);CHKERRQ(ierr); 4049b5e25fSSatish Balay 4149b5e25fSSatish Balay /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 4249b5e25fSSatish Balay for (j=0; j<n ; ++j){ 4349b5e25fSSatish Balay ival = idx[j]/bs; /* convert the indices into block indices */ 4449b5e25fSSatish Balay if (ival>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"index greater than mat-dim"); 4549b5e25fSSatish Balay if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 4649b5e25fSSatish Balay } 4749b5e25fSSatish Balay ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 4849b5e25fSSatish Balay ierr = ISDestroy(is[i]);CHKERRQ(ierr); 4949b5e25fSSatish Balay k = 0; 5049b5e25fSSatish Balay for (j=0; j<ov; j++){ /* for each overlap*/ 5149b5e25fSSatish Balay n = isz; 5249b5e25fSSatish Balay for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 5349b5e25fSSatish Balay row = nidx[k]; 5449b5e25fSSatish Balay start = ai[row]; 5549b5e25fSSatish Balay end = ai[row+1]; 5649b5e25fSSatish Balay for (l = start; l<end ; l++){ 5749b5e25fSSatish Balay val = aj[l]; 5849b5e25fSSatish Balay if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 5949b5e25fSSatish Balay } 6049b5e25fSSatish Balay } 6149b5e25fSSatish Balay } 6249b5e25fSSatish Balay /* expand the Index Set */ 6349b5e25fSSatish Balay for (j=0; j<isz; j++) { 6449b5e25fSSatish Balay for (k=0; k<bs; k++) 6549b5e25fSSatish Balay nidx2[j*bs+k] = nidx[j]*bs+k; 6649b5e25fSSatish Balay } 6749b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 6849b5e25fSSatish Balay } 6949b5e25fSSatish Balay ierr = PetscBTDestroy(table);CHKERRQ(ierr); 7049b5e25fSSatish Balay ierr = PetscFree(nidx);CHKERRQ(ierr); 7149b5e25fSSatish Balay ierr = PetscFree(nidx2);CHKERRQ(ierr); 7249b5e25fSSatish Balay PetscFunctionReturn(0); 7349b5e25fSSatish Balay } 7449b5e25fSSatish Balay 7549b5e25fSSatish Balay #undef __FUNC__ 7649b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private" 7749b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 7849b5e25fSSatish Balay { 7949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 8049b5e25fSSatish Balay int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 8149b5e25fSSatish Balay int row,mat_i,*mat_j,tcol,*mat_ilen; 8249b5e25fSSatish Balay int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 8349b5e25fSSatish Balay int *aj = a->j,*ai = a->i; 8449b5e25fSSatish Balay MatScalar *mat_a; 8549b5e25fSSatish Balay Mat C; 8649b5e25fSSatish Balay PetscTruth flag; 8749b5e25fSSatish Balay 8849b5e25fSSatish Balay PetscFunctionBegin; 8949b5e25fSSatish Balay 9049b5e25fSSatish Balay if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 9149b5e25fSSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 9249b5e25fSSatish Balay if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"IS is not sorted"); 9349b5e25fSSatish Balay 9449b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 9549b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 9649b5e25fSSatish Balay 9749b5e25fSSatish Balay smap = (int*)PetscMalloc((1+oldcols)*sizeof(int));CHKPTRQ(smap); 9849b5e25fSSatish Balay ssmap = smap; 9949b5e25fSSatish Balay lens = (int*)PetscMalloc((1+nrows)*sizeof(int));CHKPTRQ(lens); 10049b5e25fSSatish Balay ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 10149b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 10249b5e25fSSatish Balay /* determine lens of each row */ 10349b5e25fSSatish Balay for (i=0; i<nrows; i++) { 10449b5e25fSSatish Balay kstart = ai[irow[i]]; 10549b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 10649b5e25fSSatish Balay lens[i] = 0; 10749b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 10849b5e25fSSatish Balay if (ssmap[aj[k]]) { 10949b5e25fSSatish Balay lens[i]++; 11049b5e25fSSatish Balay } 11149b5e25fSSatish Balay } 11249b5e25fSSatish Balay } 11349b5e25fSSatish Balay /* Create and fill new matrix */ 11449b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 11549b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 11649b5e25fSSatish Balay 11749b5e25fSSatish Balay if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Submatrix wrong size"); 11849b5e25fSSatish Balay ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 11949b5e25fSSatish Balay if (flag == PETSC_FALSE) { 12049b5e25fSSatish Balay SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong no of nonzeros"); 12149b5e25fSSatish Balay } 12249b5e25fSSatish Balay ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 12349b5e25fSSatish Balay C = *B; 12449b5e25fSSatish Balay } else { 12549b5e25fSSatish Balay ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 12649b5e25fSSatish Balay } 12749b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 12849b5e25fSSatish Balay for (i=0; i<nrows; i++) { 12949b5e25fSSatish Balay row = irow[i]; 13049b5e25fSSatish Balay kstart = ai[row]; 13149b5e25fSSatish Balay kend = kstart + a->ilen[row]; 13249b5e25fSSatish Balay mat_i = c->i[i]; 13349b5e25fSSatish Balay mat_j = c->j + mat_i; 13449b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 13549b5e25fSSatish Balay mat_ilen = c->ilen + i; 13649b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 13749b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 13849b5e25fSSatish Balay *mat_j++ = tcol - 1; 13949b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 14049b5e25fSSatish Balay mat_a += bs2; 14149b5e25fSSatish Balay (*mat_ilen)++; 14249b5e25fSSatish Balay } 14349b5e25fSSatish Balay } 14449b5e25fSSatish Balay } 14549b5e25fSSatish Balay 14649b5e25fSSatish Balay /* Free work space */ 14749b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 14849b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 14949b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15049b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15149b5e25fSSatish Balay 15249b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 15349b5e25fSSatish Balay *B = C; 15449b5e25fSSatish Balay PetscFunctionReturn(0); 15549b5e25fSSatish Balay } 15649b5e25fSSatish Balay 15749b5e25fSSatish Balay #undef __FUNC__ 15849b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ" 15949b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 16049b5e25fSSatish Balay { 16149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 16249b5e25fSSatish Balay IS is1; 16349b5e25fSSatish Balay int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 16449b5e25fSSatish Balay 16549b5e25fSSatish Balay PetscFunctionBegin; 16649b5e25fSSatish Balay if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 16749b5e25fSSatish Balay 16849b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 16949b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 17049b5e25fSSatish Balay 17149b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 17249b5e25fSSatish Balay and form the IS with compressed IS */ 17349b5e25fSSatish Balay vary = (int*)PetscMalloc(2*(a->mbs+1)*sizeof(int));CHKPTRQ(vary); 17449b5e25fSSatish Balay iary = vary + a->mbs; 17549b5e25fSSatish Balay ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 17649b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 17749b5e25fSSatish Balay 17849b5e25fSSatish Balay count = 0; 17949b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 18049b5e25fSSatish Balay if (vary[i]!=0 && vary[i]!=bs) SETERRA(1,0,"Index set does not match blocks"); 18149b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 18249b5e25fSSatish Balay } 18349b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 18449b5e25fSSatish Balay 18549b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 18649b5e25fSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 18749b5e25fSSatish Balay 18849b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 18949b5e25fSSatish Balay ISDestroy(is1); 19049b5e25fSSatish Balay PetscFunctionReturn(0); 19149b5e25fSSatish Balay } 19249b5e25fSSatish Balay 19349b5e25fSSatish Balay #undef __FUNC__ 19449b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ" 19549b5e25fSSatish Balay int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 19649b5e25fSSatish Balay { 19749b5e25fSSatish Balay int ierr,i; 19849b5e25fSSatish Balay 19949b5e25fSSatish Balay PetscFunctionBegin; 20049b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 20149b5e25fSSatish Balay *B = (Mat*)PetscMalloc((n+1)*sizeof(Mat));CHKPTRQ(*B); 20249b5e25fSSatish Balay } 20349b5e25fSSatish Balay 20449b5e25fSSatish Balay for (i=0; i<n; i++) { 20549b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 20649b5e25fSSatish Balay } 20749b5e25fSSatish Balay PetscFunctionReturn(0); 20849b5e25fSSatish Balay } 20949b5e25fSSatish Balay 21049b5e25fSSatish Balay /* -------------------------------------------------------*/ 21149b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 21249b5e25fSSatish Balay /* -------------------------------------------------------*/ 213*d9eff348SSatish Balay #include "petscblaslapack.h" 21449b5e25fSSatish Balay 21549b5e25fSSatish Balay #undef __FUNC__ 21649b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_1" 21749b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 21849b5e25fSSatish Balay { 21949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22049b5e25fSSatish Balay Scalar *x,*z,*xb,x1,zero=0.0; 22149b5e25fSSatish Balay MatScalar *v; 22249b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 22349b5e25fSSatish Balay 22449b5e25fSSatish Balay PetscFunctionBegin; 22549b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 22649b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 22749b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 22849b5e25fSSatish Balay 22949b5e25fSSatish Balay v = a->a; 23049b5e25fSSatish Balay xb = x; 23149b5e25fSSatish Balay 23249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 23349b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 23449b5e25fSSatish Balay x1 = xb[0]; 23549b5e25fSSatish Balay ib = aj + *ai; 23649b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 23749b5e25fSSatish Balay for (j=1; j<n; j++) { 23849b5e25fSSatish Balay cval = *ib; 23949b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 24049b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 24149b5e25fSSatish Balay } 24249b5e25fSSatish Balay xb++; ai++; 24349b5e25fSSatish Balay } 24449b5e25fSSatish Balay 24549b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 24649b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 24749b5e25fSSatish Balay PLogFlops(2*(a->s_nz*2 - a->m) - a->m); /* s_nz = (nz+m)/2 */ 24849b5e25fSSatish Balay PetscFunctionReturn(0); 24949b5e25fSSatish Balay } 25049b5e25fSSatish Balay 25149b5e25fSSatish Balay #undef __FUNC__ 25249b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_2" 25349b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 25449b5e25fSSatish Balay { 25549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 25649b5e25fSSatish Balay Scalar *x,*z,*xb,x1,x2,zero=0.0; 25749b5e25fSSatish Balay MatScalar *v; 25849b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 25949b5e25fSSatish Balay 26049b5e25fSSatish Balay 26149b5e25fSSatish Balay PetscFunctionBegin; 26249b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 26349b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 26449b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 26549b5e25fSSatish Balay 26649b5e25fSSatish Balay v = a->a; 26749b5e25fSSatish Balay xb = x; 26849b5e25fSSatish Balay 26949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 27049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 27149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 27249b5e25fSSatish Balay ib = aj + *ai; 27349b5e25fSSatish Balay /* (diag of A)*x */ 27449b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 27549b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 27649b5e25fSSatish Balay v += 4; 27749b5e25fSSatish Balay for (j=1; j<n; j++) { 27849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 27949b5e25fSSatish Balay cval = ib[j]*2; 28049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 28149b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 28249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 28349b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 28449b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 28549b5e25fSSatish Balay v += 4; 28649b5e25fSSatish Balay } 28749b5e25fSSatish Balay xb +=2; ai++; 28849b5e25fSSatish Balay } 28949b5e25fSSatish Balay 29049b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 29149b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 29249b5e25fSSatish Balay PLogFlops(8*(a->s_nz*2 - a->m) - a->m); 29349b5e25fSSatish Balay PetscFunctionReturn(0); 29449b5e25fSSatish Balay } 29549b5e25fSSatish Balay 29649b5e25fSSatish Balay #undef __FUNC__ 29749b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_3" 29849b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 29949b5e25fSSatish Balay { 30049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 30149b5e25fSSatish Balay Scalar *x,*z,*xb,x1,x2,x3,zero=0.0; 30249b5e25fSSatish Balay MatScalar *v; 30349b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 30449b5e25fSSatish Balay 30549b5e25fSSatish Balay 30649b5e25fSSatish Balay PetscFunctionBegin; 30749b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 30849b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 30949b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 31049b5e25fSSatish Balay 31149b5e25fSSatish Balay v = a->a; 31249b5e25fSSatish Balay xb = x; 31349b5e25fSSatish Balay 31449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 31549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 31649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 31749b5e25fSSatish Balay ib = aj + *ai; 31849b5e25fSSatish Balay /* (diag of A)*x */ 31949b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 32049b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 32149b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 32249b5e25fSSatish Balay v += 9; 32349b5e25fSSatish Balay for (j=1; j<n; j++) { 32449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 32549b5e25fSSatish Balay cval = ib[j]*3; 32649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 32749b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 32849b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 32949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 33049b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 33149b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 33249b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 33349b5e25fSSatish Balay v += 9; 33449b5e25fSSatish Balay } 33549b5e25fSSatish Balay xb +=3; ai++; 33649b5e25fSSatish Balay } 33749b5e25fSSatish Balay 33849b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 33949b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 34049b5e25fSSatish Balay PLogFlops(18*(a->s_nz*2 - a->m) - a->m); 34149b5e25fSSatish Balay PetscFunctionReturn(0); 34249b5e25fSSatish Balay } 34349b5e25fSSatish Balay 34449b5e25fSSatish Balay #undef __FUNC__ 34549b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_4" 34649b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 34749b5e25fSSatish Balay { 34849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 34949b5e25fSSatish Balay Scalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 35049b5e25fSSatish Balay MatScalar *v; 35149b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 35249b5e25fSSatish Balay 35349b5e25fSSatish Balay PetscFunctionBegin; 35449b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 35549b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 35649b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 35749b5e25fSSatish Balay 35849b5e25fSSatish Balay v = a->a; 35949b5e25fSSatish Balay xb = x; 36049b5e25fSSatish Balay 36149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 36249b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 36349b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 36449b5e25fSSatish Balay ib = aj + *ai; 36549b5e25fSSatish Balay /* (diag of A)*x */ 36649b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 36749b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 36849b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 36949b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 37049b5e25fSSatish Balay v += 16; 37149b5e25fSSatish Balay for (j=1; j<n; j++) { 37249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 37349b5e25fSSatish Balay cval = ib[j]*4; 37449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 37549b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 37649b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 37749b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 37849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 37949b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 38049b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 38149b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 38249b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 38349b5e25fSSatish Balay v += 16; 38449b5e25fSSatish Balay } 38549b5e25fSSatish Balay xb +=4; ai++; 38649b5e25fSSatish Balay } 38749b5e25fSSatish Balay 38849b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 38949b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 39049b5e25fSSatish Balay PLogFlops(32*(a->s_nz*2 - a->m) - a->m); 39149b5e25fSSatish Balay PetscFunctionReturn(0); 39249b5e25fSSatish Balay } 39349b5e25fSSatish Balay 39449b5e25fSSatish Balay #undef __FUNC__ 39549b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_5" 39649b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 39749b5e25fSSatish Balay { 39849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 39949b5e25fSSatish Balay Scalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 40049b5e25fSSatish Balay MatScalar *v; 40149b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 40249b5e25fSSatish Balay 40349b5e25fSSatish Balay PetscFunctionBegin; 40449b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 40549b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 40649b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 40749b5e25fSSatish Balay 40849b5e25fSSatish Balay v = a->a; 40949b5e25fSSatish Balay xb = x; 41049b5e25fSSatish Balay 41149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 41249b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 41349b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 41449b5e25fSSatish Balay ib = aj + *ai; 41549b5e25fSSatish Balay /* (diag of A)*x */ 41649b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 41749b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 41849b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 41949b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 42049b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 42149b5e25fSSatish Balay v += 25; 42249b5e25fSSatish Balay for (j=1; j<n; j++) { 42349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 42449b5e25fSSatish Balay cval = ib[j]*5; 42549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 42649b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 42749b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 42849b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 42949b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 43049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 43149b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 43249b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 43349b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 43449b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 43549b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 43649b5e25fSSatish Balay v += 25; 43749b5e25fSSatish Balay } 43849b5e25fSSatish Balay xb +=5; ai++; 43949b5e25fSSatish Balay } 44049b5e25fSSatish Balay 44149b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 44249b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 44349b5e25fSSatish Balay PLogFlops(50*(a->s_nz*2 - a->m) - a->m); 44449b5e25fSSatish Balay PetscFunctionReturn(0); 44549b5e25fSSatish Balay } 44649b5e25fSSatish Balay 44749b5e25fSSatish Balay 44849b5e25fSSatish Balay #undef __FUNC__ 44949b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_6" 45049b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 45149b5e25fSSatish Balay { 45249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 45349b5e25fSSatish Balay Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 45449b5e25fSSatish Balay MatScalar *v; 45549b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 45649b5e25fSSatish Balay 45749b5e25fSSatish Balay PetscFunctionBegin; 45849b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 45949b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 46049b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 46149b5e25fSSatish Balay 46249b5e25fSSatish Balay v = a->a; 46349b5e25fSSatish Balay xb = x; 46449b5e25fSSatish Balay 46549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 46649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 46749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 46849b5e25fSSatish Balay ib = aj + *ai; 46949b5e25fSSatish Balay /* (diag of A)*x */ 47049b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 47149b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 47249b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 47349b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 47449b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 47549b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 47649b5e25fSSatish Balay v += 36; 47749b5e25fSSatish Balay for (j=1; j<n; j++) { 47849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 47949b5e25fSSatish Balay cval = ib[j]*6; 48049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 48149b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 48249b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 48349b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 48449b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 48549b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 48649b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 48749b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 48849b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 48949b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 49049b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 49149b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 49249b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 49349b5e25fSSatish Balay v += 36; 49449b5e25fSSatish Balay } 49549b5e25fSSatish Balay xb +=6; ai++; 49649b5e25fSSatish Balay } 49749b5e25fSSatish Balay 49849b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 49949b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 50049b5e25fSSatish Balay PLogFlops(72*(a->s_nz*2 - a->m) - a->m); 50149b5e25fSSatish Balay PetscFunctionReturn(0); 50249b5e25fSSatish Balay } 50349b5e25fSSatish Balay #undef __FUNC__ 50449b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_7" 50549b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 50649b5e25fSSatish Balay { 50749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 50849b5e25fSSatish Balay Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 50949b5e25fSSatish Balay MatScalar *v; 51049b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 51149b5e25fSSatish Balay 51249b5e25fSSatish Balay PetscFunctionBegin; 51349b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 51449b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 51549b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 51649b5e25fSSatish Balay 51749b5e25fSSatish Balay v = a->a; 51849b5e25fSSatish Balay xb = x; 51949b5e25fSSatish Balay 52049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 52149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 52249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 52349b5e25fSSatish Balay ib = aj + *ai; 52449b5e25fSSatish Balay /* (diag of A)*x */ 52549b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 52649b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 52749b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 52849b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 52949b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 53049b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 53149b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 53249b5e25fSSatish Balay v += 49; 53349b5e25fSSatish Balay for (j=1; j<n; j++) { 53449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 53549b5e25fSSatish Balay cval = ib[j]*7; 53649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 53749b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 53849b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 53949b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 54049b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 54149b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 54249b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 54349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 54449b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 54549b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 54649b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 54749b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 54849b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 54949b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 55049b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 55149b5e25fSSatish Balay v += 49; 55249b5e25fSSatish Balay } 55349b5e25fSSatish Balay xb +=7; ai++; 55449b5e25fSSatish Balay } 55549b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 55649b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 55749b5e25fSSatish Balay PLogFlops(98*(a->s_nz*2 - a->m) - a->m); 55849b5e25fSSatish Balay PetscFunctionReturn(0); 55949b5e25fSSatish Balay } 56049b5e25fSSatish Balay 56149b5e25fSSatish Balay /* 56249b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 56349b5e25fSSatish Balay */ 56449b5e25fSSatish Balay #undef __FUNC__ 56549b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_N" 56649b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 56749b5e25fSSatish Balay { 56849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 569066653e3SSatish Balay Scalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 570066653e3SSatish Balay MatScalar *v; 571066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 572066653e3SSatish Balay int ncols,k; 57349b5e25fSSatish Balay 57449b5e25fSSatish Balay PetscFunctionBegin; 57549b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 57649b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 57749b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 57849b5e25fSSatish Balay 57949b5e25fSSatish Balay aj = a->j; 58049b5e25fSSatish Balay v = a->a; 58149b5e25fSSatish Balay ii = a->i; 58249b5e25fSSatish Balay 58349b5e25fSSatish Balay if (!a->mult_work) { 58449b5e25fSSatish Balay k = a->m; 58549b5e25fSSatish Balay a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 58649b5e25fSSatish Balay } 58749b5e25fSSatish Balay work = a->mult_work; 58849b5e25fSSatish Balay 58949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 59049b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 59149b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 59249b5e25fSSatish Balay 59349b5e25fSSatish Balay /* upper triangular part */ 59449b5e25fSSatish Balay for (j=0; j<n; j++) { 595066653e3SSatish Balay /* col = bs*(*idx); */ 59649b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 59749b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 59849b5e25fSSatish Balay workt += bs; 59949b5e25fSSatish Balay } 60049b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 60149b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 60249b5e25fSSatish Balay 60349b5e25fSSatish Balay /* strict lower triangular part */ 60449b5e25fSSatish Balay ncols -= bs; 60549b5e25fSSatish Balay if (ncols > 0){ 60649b5e25fSSatish Balay workt = work; 60749b5e25fSSatish Balay ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 60849b5e25fSSatish Balay Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 60949b5e25fSSatish Balay 61049b5e25fSSatish Balay idx=aj+ii[0]+1; 61149b5e25fSSatish Balay for (j=1; j<n; j++) { 61249b5e25fSSatish Balay zb = z_ptr + bs*(*idx); 61349b5e25fSSatish Balay idx++; 61449b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 61549b5e25fSSatish Balay workt += bs; 61649b5e25fSSatish Balay } 61749b5e25fSSatish Balay } 61849b5e25fSSatish Balay 61949b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 62049b5e25fSSatish Balay } 62149b5e25fSSatish Balay 62249b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 62349b5e25fSSatish Balay ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 62449b5e25fSSatish Balay PLogFlops(2*(a->s_nz*2 - a->m)*bs2 - a->m); 62549b5e25fSSatish Balay PetscFunctionReturn(0); 62649b5e25fSSatish Balay } 62749b5e25fSSatish Balay 62849b5e25fSSatish Balay #undef __FUNC__ 62949b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_1" 63049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 63149b5e25fSSatish Balay { 63249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 63349b5e25fSSatish Balay Scalar *x,*y,*z,*xb,x1; 63449b5e25fSSatish Balay MatScalar *v; 63549b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 63649b5e25fSSatish Balay 63749b5e25fSSatish Balay PetscFunctionBegin; 63849b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 63949b5e25fSSatish Balay if (yy != xx) { 64049b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 64149b5e25fSSatish Balay } else { 64249b5e25fSSatish Balay y = x; 64349b5e25fSSatish Balay } 64449b5e25fSSatish Balay if (zz != yy) { 64549b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 64649b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 64749b5e25fSSatish Balay } else { 64849b5e25fSSatish Balay z = y; 64949b5e25fSSatish Balay } 65049b5e25fSSatish Balay 65149b5e25fSSatish Balay v = a->a; 65249b5e25fSSatish Balay xb = x; 65349b5e25fSSatish Balay 65449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 65549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 65649b5e25fSSatish Balay x1 = xb[0]; 65749b5e25fSSatish Balay ib = aj + *ai; 65849b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 65949b5e25fSSatish Balay for (j=1; j<n; j++) { 66049b5e25fSSatish Balay cval = *ib; 66149b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 66249b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 66349b5e25fSSatish Balay } 66449b5e25fSSatish Balay xb++; ai++; 66549b5e25fSSatish Balay } 66649b5e25fSSatish Balay 66749b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 66849b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 66949b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 67049b5e25fSSatish Balay 67149b5e25fSSatish Balay PLogFlops(2*(a->s_nz*2 - a->m)); 67249b5e25fSSatish Balay PetscFunctionReturn(0); 67349b5e25fSSatish Balay } 67449b5e25fSSatish Balay 67549b5e25fSSatish Balay #undef __FUNC__ 67649b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_2" 67749b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 67849b5e25fSSatish Balay { 67949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 68049b5e25fSSatish Balay Scalar *x,*y,*z,*xb,x1,x2; 68149b5e25fSSatish Balay MatScalar *v; 68249b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 68349b5e25fSSatish Balay 68449b5e25fSSatish Balay PetscFunctionBegin; 68549b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 68649b5e25fSSatish Balay if (yy != xx) { 68749b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 68849b5e25fSSatish Balay } else { 68949b5e25fSSatish Balay y = x; 69049b5e25fSSatish Balay } 69149b5e25fSSatish Balay if (zz != yy) { 69249b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 69349b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 69449b5e25fSSatish Balay } else { 69549b5e25fSSatish Balay z = y; 69649b5e25fSSatish Balay } 69749b5e25fSSatish Balay 69849b5e25fSSatish Balay v = a->a; 69949b5e25fSSatish Balay xb = x; 70049b5e25fSSatish Balay 70149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 70249b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 70349b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 70449b5e25fSSatish Balay ib = aj + *ai; 70549b5e25fSSatish Balay /* (diag of A)*x */ 70649b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 70749b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 70849b5e25fSSatish Balay v += 4; 70949b5e25fSSatish Balay for (j=1; j<n; j++) { 71049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 71149b5e25fSSatish Balay cval = ib[j]*2; 71249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 71349b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 71449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 71549b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 71649b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 71749b5e25fSSatish Balay v += 4; 71849b5e25fSSatish Balay } 71949b5e25fSSatish Balay xb +=2; ai++; 72049b5e25fSSatish Balay } 72149b5e25fSSatish Balay 72249b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 72349b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 72449b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 72549b5e25fSSatish Balay 72649b5e25fSSatish Balay PLogFlops(4*(a->s_nz*2 - a->m)); 72749b5e25fSSatish Balay PetscFunctionReturn(0); 72849b5e25fSSatish Balay } 72949b5e25fSSatish Balay 73049b5e25fSSatish Balay #undef __FUNC__ 73149b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_3" 73249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 73349b5e25fSSatish Balay { 73449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 73549b5e25fSSatish Balay Scalar *x,*y,*z,*xb,x1,x2,x3; 73649b5e25fSSatish Balay MatScalar *v; 73749b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 73849b5e25fSSatish Balay 73949b5e25fSSatish Balay PetscFunctionBegin; 74049b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 74149b5e25fSSatish Balay if (yy != xx) { 74249b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 74349b5e25fSSatish Balay } else { 74449b5e25fSSatish Balay y = x; 74549b5e25fSSatish Balay } 74649b5e25fSSatish Balay if (zz != yy) { 74749b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 74849b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 74949b5e25fSSatish Balay } else { 75049b5e25fSSatish Balay z = y; 75149b5e25fSSatish Balay } 75249b5e25fSSatish Balay 75349b5e25fSSatish Balay v = a->a; 75449b5e25fSSatish Balay xb = x; 75549b5e25fSSatish Balay 75649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 75749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 75849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 75949b5e25fSSatish Balay ib = aj + *ai; 76049b5e25fSSatish Balay /* (diag of A)*x */ 76149b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 76249b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 76349b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 76449b5e25fSSatish Balay v += 9; 76549b5e25fSSatish Balay for (j=1; j<n; j++) { 76649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 76749b5e25fSSatish Balay cval = ib[j]*3; 76849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 76949b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 77049b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 77149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 77249b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 77349b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 77449b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 77549b5e25fSSatish Balay v += 9; 77649b5e25fSSatish Balay } 77749b5e25fSSatish Balay xb +=3; ai++; 77849b5e25fSSatish Balay } 77949b5e25fSSatish Balay 78049b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 78149b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 78249b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 78349b5e25fSSatish Balay 78449b5e25fSSatish Balay PLogFlops(18*(a->s_nz*2 - a->m)); 78549b5e25fSSatish Balay PetscFunctionReturn(0); 78649b5e25fSSatish Balay } 78749b5e25fSSatish Balay 78849b5e25fSSatish Balay #undef __FUNC__ 78949b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_4" 79049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 79149b5e25fSSatish Balay { 79249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 79349b5e25fSSatish Balay Scalar *x,*y,*z,*xb,x1,x2,x3,x4; 79449b5e25fSSatish Balay MatScalar *v; 79549b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 79649b5e25fSSatish Balay 79749b5e25fSSatish Balay PetscFunctionBegin; 79849b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 79949b5e25fSSatish Balay if (yy != xx) { 80049b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 80149b5e25fSSatish Balay } else { 80249b5e25fSSatish Balay y = x; 80349b5e25fSSatish Balay } 80449b5e25fSSatish Balay if (zz != yy) { 80549b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 80649b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 80749b5e25fSSatish Balay } else { 80849b5e25fSSatish Balay z = y; 80949b5e25fSSatish Balay } 81049b5e25fSSatish Balay 81149b5e25fSSatish Balay v = a->a; 81249b5e25fSSatish Balay xb = x; 81349b5e25fSSatish Balay 81449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 81549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 81649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 81749b5e25fSSatish Balay ib = aj + *ai; 81849b5e25fSSatish Balay /* (diag of A)*x */ 81949b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 82049b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 82149b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 82249b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 82349b5e25fSSatish Balay v += 16; 82449b5e25fSSatish Balay for (j=1; j<n; j++) { 82549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 82649b5e25fSSatish Balay cval = ib[j]*4; 82749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 82849b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 82949b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 83049b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 83149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 83249b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 83349b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 83449b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 83549b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 83649b5e25fSSatish Balay v += 16; 83749b5e25fSSatish Balay } 83849b5e25fSSatish Balay xb +=4; ai++; 83949b5e25fSSatish Balay } 84049b5e25fSSatish Balay 84149b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 84249b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 84349b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 84449b5e25fSSatish Balay 84549b5e25fSSatish Balay PLogFlops(32*(a->s_nz*2 - a->m)); 84649b5e25fSSatish Balay PetscFunctionReturn(0); 84749b5e25fSSatish Balay } 84849b5e25fSSatish Balay 84949b5e25fSSatish Balay #undef __FUNC__ 85049b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_5" 85149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 85249b5e25fSSatish Balay { 85349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 85449b5e25fSSatish Balay Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 85549b5e25fSSatish Balay MatScalar *v; 85649b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 85749b5e25fSSatish Balay 85849b5e25fSSatish Balay PetscFunctionBegin; 85949b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 86049b5e25fSSatish Balay if (yy != xx) { 86149b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 86249b5e25fSSatish Balay } else { 86349b5e25fSSatish Balay y = x; 86449b5e25fSSatish Balay } 86549b5e25fSSatish Balay if (zz != yy) { 86649b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 86749b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 86849b5e25fSSatish Balay } else { 86949b5e25fSSatish Balay z = y; 87049b5e25fSSatish Balay } 87149b5e25fSSatish Balay 87249b5e25fSSatish Balay v = a->a; 87349b5e25fSSatish Balay xb = x; 87449b5e25fSSatish Balay 87549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 87649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 87749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 87849b5e25fSSatish Balay ib = aj + *ai; 87949b5e25fSSatish Balay /* (diag of A)*x */ 88049b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 88149b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 88249b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 88349b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 88449b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 88549b5e25fSSatish Balay v += 25; 88649b5e25fSSatish Balay for (j=1; j<n; j++) { 88749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 88849b5e25fSSatish Balay cval = ib[j]*5; 88949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 89049b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 89149b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 89249b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 89349b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 89449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 89549b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 89649b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 89749b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 89849b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 89949b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 90049b5e25fSSatish Balay v += 25; 90149b5e25fSSatish Balay } 90249b5e25fSSatish Balay xb +=5; ai++; 90349b5e25fSSatish Balay } 90449b5e25fSSatish Balay 90549b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 90649b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 90749b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 90849b5e25fSSatish Balay 90949b5e25fSSatish Balay PLogFlops(50*(a->s_nz*2 - a->m)); 91049b5e25fSSatish Balay PetscFunctionReturn(0); 91149b5e25fSSatish Balay } 91249b5e25fSSatish Balay #undef __FUNC__ 91349b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_6" 91449b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 91549b5e25fSSatish Balay { 91649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 91749b5e25fSSatish Balay Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 91849b5e25fSSatish Balay MatScalar *v; 91949b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 92049b5e25fSSatish Balay 92149b5e25fSSatish Balay PetscFunctionBegin; 92249b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 92349b5e25fSSatish Balay if (yy != xx) { 92449b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 92549b5e25fSSatish Balay } else { 92649b5e25fSSatish Balay y = x; 92749b5e25fSSatish Balay } 92849b5e25fSSatish Balay if (zz != yy) { 92949b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 93049b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 93149b5e25fSSatish Balay } else { 93249b5e25fSSatish Balay z = y; 93349b5e25fSSatish Balay } 93449b5e25fSSatish Balay 93549b5e25fSSatish Balay v = a->a; 93649b5e25fSSatish Balay xb = x; 93749b5e25fSSatish Balay 93849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 93949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 94049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 94149b5e25fSSatish Balay ib = aj + *ai; 94249b5e25fSSatish Balay /* (diag of A)*x */ 94349b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 94449b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 94549b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 94649b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 94749b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 94849b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 94949b5e25fSSatish Balay v += 36; 95049b5e25fSSatish Balay for (j=1; j<n; j++) { 95149b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 95249b5e25fSSatish Balay cval = ib[j]*6; 95349b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 95449b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 95549b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 95649b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 95749b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 95849b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 95949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 96049b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 96149b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 96249b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 96349b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 96449b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 96549b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 96649b5e25fSSatish Balay v += 36; 96749b5e25fSSatish Balay } 96849b5e25fSSatish Balay xb +=6; ai++; 96949b5e25fSSatish Balay } 97049b5e25fSSatish Balay 97149b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 97249b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 97349b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 97449b5e25fSSatish Balay 97549b5e25fSSatish Balay PLogFlops(72*(a->s_nz*2 - a->m)); 97649b5e25fSSatish Balay PetscFunctionReturn(0); 97749b5e25fSSatish Balay } 97849b5e25fSSatish Balay 97949b5e25fSSatish Balay #undef __FUNC__ 98049b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_7" 98149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 98249b5e25fSSatish Balay { 98349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 98449b5e25fSSatish Balay Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 98549b5e25fSSatish Balay MatScalar *v; 98649b5e25fSSatish Balay int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 98749b5e25fSSatish Balay 98849b5e25fSSatish Balay PetscFunctionBegin; 98949b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 99049b5e25fSSatish Balay if (yy != xx) { 99149b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 99249b5e25fSSatish Balay } else { 99349b5e25fSSatish Balay y = x; 99449b5e25fSSatish Balay } 99549b5e25fSSatish Balay if (zz != yy) { 99649b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 99749b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 99849b5e25fSSatish Balay } else { 99949b5e25fSSatish Balay z = y; 100049b5e25fSSatish Balay } 100149b5e25fSSatish Balay 100249b5e25fSSatish Balay v = a->a; 100349b5e25fSSatish Balay xb = x; 100449b5e25fSSatish Balay 100549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 100649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 100749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 100849b5e25fSSatish Balay ib = aj + *ai; 100949b5e25fSSatish Balay /* (diag of A)*x */ 101049b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 101149b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 101249b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 101349b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 101449b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 101549b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 101649b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 101749b5e25fSSatish Balay v += 49; 101849b5e25fSSatish Balay for (j=1; j<n; j++) { 101949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 102049b5e25fSSatish Balay cval = ib[j]*7; 102149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 102249b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 102349b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 102449b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 102549b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 102649b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 102749b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 102849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 102949b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 103049b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 103149b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 103249b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 103349b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 103449b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 103549b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 103649b5e25fSSatish Balay v += 49; 103749b5e25fSSatish Balay } 103849b5e25fSSatish Balay xb +=7; ai++; 103949b5e25fSSatish Balay } 104049b5e25fSSatish Balay 104149b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 104249b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 104349b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 104449b5e25fSSatish Balay 104549b5e25fSSatish Balay PLogFlops(98*(a->s_nz*2 - a->m)); 104649b5e25fSSatish Balay PetscFunctionReturn(0); 104749b5e25fSSatish Balay } 104849b5e25fSSatish Balay 104949b5e25fSSatish Balay #undef __FUNC__ 105049b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_N" 105149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 105249b5e25fSSatish Balay { 105349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1054ecfa4421SSatish Balay Scalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1055066653e3SSatish Balay MatScalar *v; 1056066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1057066653e3SSatish Balay int ncols,k; 105849b5e25fSSatish Balay 105949b5e25fSSatish Balay PetscFunctionBegin; 106049b5e25fSSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 106149b5e25fSSatish Balay if (yy != xx) { 106249b5e25fSSatish Balay ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 106349b5e25fSSatish Balay } else { 106449b5e25fSSatish Balay y = x; 106549b5e25fSSatish Balay } 106649b5e25fSSatish Balay if (zz != yy) { 106749b5e25fSSatish Balay ierr = VecCopy(yy,zz);CHKERRQ(ierr); 106849b5e25fSSatish Balay ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 106949b5e25fSSatish Balay } else { 107049b5e25fSSatish Balay z = y; 107149b5e25fSSatish Balay } 107249b5e25fSSatish Balay 107349b5e25fSSatish Balay aj = a->j; 107449b5e25fSSatish Balay v = a->a; 107549b5e25fSSatish Balay ii = a->i; 107649b5e25fSSatish Balay 107749b5e25fSSatish Balay if (!a->mult_work) { 107849b5e25fSSatish Balay k = a->m; 107949b5e25fSSatish Balay a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 108049b5e25fSSatish Balay } 108149b5e25fSSatish Balay work = a->mult_work; 108249b5e25fSSatish Balay 108349b5e25fSSatish Balay 108449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 108549b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 108649b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 108749b5e25fSSatish Balay 108849b5e25fSSatish Balay /* upper triangular part */ 108949b5e25fSSatish Balay for (j=0; j<n; j++) { 1090066653e3SSatish Balay /* col = bs*(*idx); */ 109149b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 109249b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 109349b5e25fSSatish Balay workt += bs; 109449b5e25fSSatish Balay } 109549b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 109649b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 109749b5e25fSSatish Balay 109849b5e25fSSatish Balay /* strict lower triangular part */ 109949b5e25fSSatish Balay ncols -= bs; 110049b5e25fSSatish Balay if (ncols > 0){ 110149b5e25fSSatish Balay workt = work; 110249b5e25fSSatish Balay ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 110349b5e25fSSatish Balay Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 110449b5e25fSSatish Balay 110549b5e25fSSatish Balay idx=aj+ii[0]+1; 110649b5e25fSSatish Balay for (j=1; j<n; j++) { 110749b5e25fSSatish Balay zb = z_ptr + bs*(*idx); 110849b5e25fSSatish Balay idx++; 110949b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 111049b5e25fSSatish Balay workt += bs; 111149b5e25fSSatish Balay } 111249b5e25fSSatish Balay } 111349b5e25fSSatish Balay 111449b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 111549b5e25fSSatish Balay } 111649b5e25fSSatish Balay 111749b5e25fSSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 111849b5e25fSSatish Balay if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 111949b5e25fSSatish Balay if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 112049b5e25fSSatish Balay 112149b5e25fSSatish Balay PLogFlops(2*(a->s_nz*2 - a->m)); 112249b5e25fSSatish Balay PetscFunctionReturn(0); 112349b5e25fSSatish Balay } 112449b5e25fSSatish Balay 112549b5e25fSSatish Balay #undef __FUNC__ 112649b5e25fSSatish Balay #define __FUNC__ "MatMultTranspose_SeqSBAIJ" 112749b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 112849b5e25fSSatish Balay { 112949b5e25fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 113049b5e25fSSatish Balay Scalar *xg,*zg,*zb,zero = 0.0; 113149b5e25fSSatish Balay Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 113249b5e25fSSatish Balay MatScalar *v; 113349b5e25fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval; 113449b5e25fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 113549b5e25fSSatish Balay 113649b5e25fSSatish Balay PetscFunctionBegin; 113749b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 113849b5e25fSSatish Balay ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg; 113949b5e25fSSatish Balay ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg; 114049b5e25fSSatish Balay 114149b5e25fSSatish Balay idx = a->j; 114249b5e25fSSatish Balay v = a->a; 114349b5e25fSSatish Balay ii = a->i; 114449b5e25fSSatish Balay xb = x; 114549b5e25fSSatish Balay switch (bs) { 114649b5e25fSSatish Balay case 1: 114749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 114849b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 114949b5e25fSSatish Balay x1 = xb[0]; 115049b5e25fSSatish Balay ib = idx + ai[i]; 115149b5e25fSSatish Balay for (j=0; j<n; j++) { 115249b5e25fSSatish Balay rval = ib[j]; 115349b5e25fSSatish Balay z[rval] += *v * x1; 115449b5e25fSSatish Balay v++; 115549b5e25fSSatish Balay } 115649b5e25fSSatish Balay xb++; 115749b5e25fSSatish Balay } 115849b5e25fSSatish Balay break; 115949b5e25fSSatish Balay case 2: 116049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 116149b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 116249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 116349b5e25fSSatish Balay ib = idx + ai[i]; 116449b5e25fSSatish Balay for (j=0; j<n; j++) { 116549b5e25fSSatish Balay rval = ib[j]*2; 116649b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 116749b5e25fSSatish Balay z[rval] += v[2]*x1 + v[3]*x2; 116849b5e25fSSatish Balay v += 4; 116949b5e25fSSatish Balay } 117049b5e25fSSatish Balay xb += 2; 117149b5e25fSSatish Balay } 117249b5e25fSSatish Balay break; 117349b5e25fSSatish Balay case 3: 117449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 117549b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 117649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 117749b5e25fSSatish Balay ib = idx + ai[i]; 117849b5e25fSSatish Balay for (j=0; j<n; j++) { 117949b5e25fSSatish Balay rval = ib[j]*3; 118049b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 118149b5e25fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 118249b5e25fSSatish Balay z[rval] += v[6]*x1 + v[7]*x2 + v[8]*x3; 118349b5e25fSSatish Balay v += 9; 118449b5e25fSSatish Balay } 118549b5e25fSSatish Balay xb += 3; 118649b5e25fSSatish Balay } 118749b5e25fSSatish Balay break; 118849b5e25fSSatish Balay case 4: 118949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 119049b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 119149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 119249b5e25fSSatish Balay ib = idx + ai[i]; 119349b5e25fSSatish Balay for (j=0; j<n; j++) { 119449b5e25fSSatish Balay rval = ib[j]*4; 119549b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 119649b5e25fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 119749b5e25fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 119849b5e25fSSatish Balay z[rval] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 119949b5e25fSSatish Balay v += 16; 120049b5e25fSSatish Balay } 120149b5e25fSSatish Balay xb += 4; 120249b5e25fSSatish Balay } 120349b5e25fSSatish Balay break; 120449b5e25fSSatish Balay case 5: 120549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 120649b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 120749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 120849b5e25fSSatish Balay x4 = xb[3]; x5 = xb[4]; 120949b5e25fSSatish Balay ib = idx + ai[i]; 121049b5e25fSSatish Balay for (j=0; j<n; j++) { 121149b5e25fSSatish Balay rval = ib[j]*5; 121249b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 121349b5e25fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 121449b5e25fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 121549b5e25fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 121649b5e25fSSatish Balay z[rval] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 121749b5e25fSSatish Balay v += 25; 121849b5e25fSSatish Balay } 121949b5e25fSSatish Balay xb += 5; 122049b5e25fSSatish Balay } 122149b5e25fSSatish Balay break; 122249b5e25fSSatish Balay case 6: 122349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 122449b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 122549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 122649b5e25fSSatish Balay x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 122749b5e25fSSatish Balay ib = idx + ai[i]; 122849b5e25fSSatish Balay for (j=0; j<n; j++) { 122949b5e25fSSatish Balay rval = ib[j]*6; 123049b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6; 123149b5e25fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6; 123249b5e25fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6; 123349b5e25fSSatish Balay z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6; 123449b5e25fSSatish Balay z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6; 123549b5e25fSSatish Balay z[rval] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6; 123649b5e25fSSatish Balay v += 36; 123749b5e25fSSatish Balay } 123849b5e25fSSatish Balay xb += 6; 123949b5e25fSSatish Balay } 124049b5e25fSSatish Balay break; 124149b5e25fSSatish Balay case 7: 124249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 124349b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 124449b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 124549b5e25fSSatish Balay x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 124649b5e25fSSatish Balay ib = idx + ai[i]; 124749b5e25fSSatish Balay for (j=0; j<n; j++) { 124849b5e25fSSatish Balay rval = ib[j]*7; 124949b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7; 125049b5e25fSSatish Balay z[rval++] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7; 125149b5e25fSSatish Balay z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7; 125249b5e25fSSatish Balay z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7; 125349b5e25fSSatish Balay z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7; 125449b5e25fSSatish Balay z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7; 125549b5e25fSSatish Balay z[rval] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7; 125649b5e25fSSatish Balay v += 49; 125749b5e25fSSatish Balay } 125849b5e25fSSatish Balay xb += 7; 125949b5e25fSSatish Balay } 126049b5e25fSSatish Balay break; 126149b5e25fSSatish Balay default: { /* block sizes larger then 7 by 7 are handled by BLAS */ 126249b5e25fSSatish Balay int ncols,k; 126349b5e25fSSatish Balay Scalar *work,*workt; 126449b5e25fSSatish Balay 126549b5e25fSSatish Balay if (!a->mult_work) { 126649b5e25fSSatish Balay k = PetscMax(a->m,a->n); 126749b5e25fSSatish Balay a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 126849b5e25fSSatish Balay } 126949b5e25fSSatish Balay work = a->mult_work; 127049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 127149b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 127249b5e25fSSatish Balay ncols = n*bs; 127349b5e25fSSatish Balay ierr = PetscMemzero(work,ncols*sizeof(Scalar));CHKERRQ(ierr); 127449b5e25fSSatish Balay Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work); 127549b5e25fSSatish Balay /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */ 127649b5e25fSSatish Balay v += n*bs2; 127749b5e25fSSatish Balay x += bs; 127849b5e25fSSatish Balay workt = work; 127949b5e25fSSatish Balay for (j=0; j<n; j++) { 128049b5e25fSSatish Balay zb = z + bs*(*idx++); 128149b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 128249b5e25fSSatish Balay workt += bs; 128349b5e25fSSatish Balay } 128449b5e25fSSatish Balay } 128549b5e25fSSatish Balay } 128649b5e25fSSatish Balay } 128749b5e25fSSatish Balay ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr); 128849b5e25fSSatish Balay ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr); 128949b5e25fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 129049b5e25fSSatish Balay PetscFunctionReturn(0); 129149b5e25fSSatish Balay } 129249b5e25fSSatish Balay 129349b5e25fSSatish Balay #undef __FUNC__ 129449b5e25fSSatish Balay #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ" 129549b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 129649b5e25fSSatish Balay 129749b5e25fSSatish Balay { 129849b5e25fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 129949b5e25fSSatish Balay Scalar *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5; 130049b5e25fSSatish Balay MatScalar *v; 130149b5e25fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 130249b5e25fSSatish Balay 130349b5e25fSSatish Balay PetscFunctionBegin; 130449b5e25fSSatish Balay ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg; 130549b5e25fSSatish Balay ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg; 130649b5e25fSSatish Balay 130749b5e25fSSatish Balay if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); } 130849b5e25fSSatish Balay 130949b5e25fSSatish Balay idx = a->j; 131049b5e25fSSatish Balay v = a->a; 131149b5e25fSSatish Balay ii = a->i; 131249b5e25fSSatish Balay xb = x; 131349b5e25fSSatish Balay 131449b5e25fSSatish Balay switch (bs) { 131549b5e25fSSatish Balay case 1: 131649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 131749b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 131849b5e25fSSatish Balay x1 = xb[0]; 131949b5e25fSSatish Balay ib = idx + ai[i]; 132049b5e25fSSatish Balay for (j=0; j<n; j++) { 132149b5e25fSSatish Balay rval = ib[j]; 132249b5e25fSSatish Balay z[rval] += *v * x1; 132349b5e25fSSatish Balay v++; 132449b5e25fSSatish Balay } 132549b5e25fSSatish Balay xb++; 132649b5e25fSSatish Balay } 132749b5e25fSSatish Balay break; 132849b5e25fSSatish Balay case 2: 132949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 133049b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 133149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 133249b5e25fSSatish Balay ib = idx + ai[i]; 133349b5e25fSSatish Balay for (j=0; j<n; j++) { 133449b5e25fSSatish Balay rval = ib[j]*2; 133549b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 133649b5e25fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 133749b5e25fSSatish Balay v += 4; 133849b5e25fSSatish Balay } 133949b5e25fSSatish Balay xb += 2; 134049b5e25fSSatish Balay } 134149b5e25fSSatish Balay break; 134249b5e25fSSatish Balay case 3: 134349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 134449b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 134549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 134649b5e25fSSatish Balay ib = idx + ai[i]; 134749b5e25fSSatish Balay for (j=0; j<n; j++) { 134849b5e25fSSatish Balay rval = ib[j]*3; 134949b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 135049b5e25fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 135149b5e25fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 135249b5e25fSSatish Balay v += 9; 135349b5e25fSSatish Balay } 135449b5e25fSSatish Balay xb += 3; 135549b5e25fSSatish Balay } 135649b5e25fSSatish Balay break; 135749b5e25fSSatish Balay case 4: 135849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 135949b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 136049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 136149b5e25fSSatish Balay ib = idx + ai[i]; 136249b5e25fSSatish Balay for (j=0; j<n; j++) { 136349b5e25fSSatish Balay rval = ib[j]*4; 136449b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 136549b5e25fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 136649b5e25fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 136749b5e25fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 136849b5e25fSSatish Balay v += 16; 136949b5e25fSSatish Balay } 137049b5e25fSSatish Balay xb += 4; 137149b5e25fSSatish Balay } 137249b5e25fSSatish Balay break; 137349b5e25fSSatish Balay case 5: 137449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 137549b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 137649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 137749b5e25fSSatish Balay x4 = xb[3]; x5 = xb[4]; 137849b5e25fSSatish Balay ib = idx + ai[i]; 137949b5e25fSSatish Balay for (j=0; j<n; j++) { 138049b5e25fSSatish Balay rval = ib[j]*5; 138149b5e25fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 138249b5e25fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 138349b5e25fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 138449b5e25fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 138549b5e25fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 138649b5e25fSSatish Balay v += 25; 138749b5e25fSSatish Balay } 138849b5e25fSSatish Balay xb += 5; 138949b5e25fSSatish Balay } 139049b5e25fSSatish Balay break; 139149b5e25fSSatish Balay default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 139249b5e25fSSatish Balay int ncols,k; 139349b5e25fSSatish Balay Scalar *work,*workt; 139449b5e25fSSatish Balay 139549b5e25fSSatish Balay if (!a->mult_work) { 139649b5e25fSSatish Balay k = PetscMax(a->m,a->n); 139749b5e25fSSatish Balay a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 139849b5e25fSSatish Balay } 139949b5e25fSSatish Balay work = a->mult_work; 140049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 140149b5e25fSSatish Balay n = ii[1] - ii[0]; ii++; 140249b5e25fSSatish Balay ncols = n*bs; 140349b5e25fSSatish Balay ierr = PetscMemzero(work,ncols*sizeof(Scalar));CHKERRQ(ierr); 140449b5e25fSSatish Balay Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work); 140549b5e25fSSatish Balay /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */ 140649b5e25fSSatish Balay v += n*bs2; 140749b5e25fSSatish Balay x += bs; 140849b5e25fSSatish Balay workt = work; 140949b5e25fSSatish Balay for (j=0; j<n; j++) { 141049b5e25fSSatish Balay zb = z + bs*(*idx++); 141149b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 141249b5e25fSSatish Balay workt += bs; 141349b5e25fSSatish Balay } 141449b5e25fSSatish Balay } 141549b5e25fSSatish Balay } 141649b5e25fSSatish Balay } 141749b5e25fSSatish Balay ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr); 141849b5e25fSSatish Balay ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr); 141949b5e25fSSatish Balay PLogFlops(2*a->nz*a->bs2); 142049b5e25fSSatish Balay PetscFunctionReturn(0); 142149b5e25fSSatish Balay } 142249b5e25fSSatish Balay 142349b5e25fSSatish Balay #undef __FUNC__ 142449b5e25fSSatish Balay #define __FUNC__ "MatScale_SeqSBAIJ" 142549b5e25fSSatish Balay int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA) 142649b5e25fSSatish Balay { 142749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 142849b5e25fSSatish Balay int one = 1,totalnz = a->bs2*a->s_nz; 142949b5e25fSSatish Balay 143049b5e25fSSatish Balay PetscFunctionBegin; 143149b5e25fSSatish Balay BLscal_(&totalnz,alpha,a->a,&one); 143249b5e25fSSatish Balay PLogFlops(totalnz); 143349b5e25fSSatish Balay PetscFunctionReturn(0); 143449b5e25fSSatish Balay } 143549b5e25fSSatish Balay 143649b5e25fSSatish Balay #undef __FUNC__ 143749b5e25fSSatish Balay #define __FUNC__ "MatNorm_SeqSBAIJ" 143849b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 143949b5e25fSSatish Balay { 144049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 144149b5e25fSSatish Balay MatScalar *v = a->a; 144249b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1443066653e3SSatish Balay int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs; 144449b5e25fSSatish Balay int *jl,*il,jmin,jmax,ierr,nexti,ik; 144549b5e25fSSatish Balay 144649b5e25fSSatish Balay PetscFunctionBegin; 144749b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 144849b5e25fSSatish Balay for (k=0; k<mbs; k++){ 144949b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 145049b5e25fSSatish Balay /* diagonal block */ 145149b5e25fSSatish Balay for (i=0; i<bs2; i++){ 145249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 145349b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 145449b5e25fSSatish Balay #else 145549b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 145649b5e25fSSatish Balay #endif 145749b5e25fSSatish Balay } 145849b5e25fSSatish Balay for (j=jmin+1; j<jmax; j++){ /* off-diagonal blocks */ 145949b5e25fSSatish Balay for (i=0; i<bs2; i++){ 146049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 146149b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 146249b5e25fSSatish Balay #else 146349b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 146449b5e25fSSatish Balay #endif 146549b5e25fSSatish Balay } 146649b5e25fSSatish Balay } 146749b5e25fSSatish Balay } 146849b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 146949b5e25fSSatish Balay 147049b5e25fSSatish Balay } else if (type == NORM_INFINITY) { /* maximum row sum */ 147149b5e25fSSatish Balay il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il); 147249b5e25fSSatish Balay jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl); 1473f3dd7b2dSKris Buschelman sum = (PetscReal*)PetscMalloc(bs*sizeof(PetscReal));CHKPTRQ(sum); 147449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 147549b5e25fSSatish Balay jl[i] = mbs; il[0] = 0; 147649b5e25fSSatish Balay } 147749b5e25fSSatish Balay 147849b5e25fSSatish Balay *norm = 0.0; 147949b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 148049b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 148149b5e25fSSatish Balay 148249b5e25fSSatish Balay /*-- col sum --*/ 148349b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 148449b5e25fSSatish Balay while (i<mbs){ 148549b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 148649b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 148749b5e25fSSatish Balay for (j=0; j<bs; j++){ 148849b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 148949b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 149049b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 149149b5e25fSSatish Balay } 149249b5e25fSSatish Balay } 149349b5e25fSSatish Balay /* update il, jl */ 149449b5e25fSSatish Balay jmin = ik + 1; jmax = a->i[i+1]; 149549b5e25fSSatish Balay if (jmin < jmax){ 149649b5e25fSSatish Balay il[i] = jmin; 149749b5e25fSSatish Balay j = a->j[jmin]; 149849b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 149949b5e25fSSatish Balay } 150049b5e25fSSatish Balay i = nexti; 150149b5e25fSSatish Balay } 150249b5e25fSSatish Balay 150349b5e25fSSatish Balay /*-- row sum --*/ 150449b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 150549b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 150649b5e25fSSatish Balay for (j=0; j<bs; j++){ 150749b5e25fSSatish Balay v = a->a + i*bs2 + j; 150849b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 150949b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); 151049b5e25fSSatish Balay v += bs; 151149b5e25fSSatish Balay } 151249b5e25fSSatish Balay } 151349b5e25fSSatish Balay } 151449b5e25fSSatish Balay /* add k_th block row to il, jl */ 151549b5e25fSSatish Balay jmin++; 151649b5e25fSSatish Balay if (jmin < jmax){ 151749b5e25fSSatish Balay il[k] = jmin; 151849b5e25fSSatish Balay j = a->j[jmin]; 151949b5e25fSSatish Balay jl[k] = jl[j]; jl[j] = k; 152049b5e25fSSatish Balay } 152149b5e25fSSatish Balay for (j=0; j<bs; j++){ 152249b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 152349b5e25fSSatish Balay } 152449b5e25fSSatish Balay } 152549b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 152649b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 152749b5e25fSSatish Balay ierr = PetscFree(sum);CHKERRQ(ierr); 152849b5e25fSSatish Balay } else { 152949b5e25fSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 153049b5e25fSSatish Balay } 153149b5e25fSSatish Balay PetscFunctionReturn(0); 153249b5e25fSSatish Balay } 153349b5e25fSSatish Balay 153449b5e25fSSatish Balay #ifdef MatNorm_SeqBAIJ 153549b5e25fSSatish Balay /* This is modified MatNorm_SeqBAIJ. 153649b5e25fSSatish Balay MatNorm_SeqBAIJ in baij/seq/baij2.c is not correct for bs>1 */ 153749b5e25fSSatish Balay 153849b5e25fSSatish Balay #undef __FUNC__ 153949b5e25fSSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ" 154049b5e25fSSatish Balay int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 154149b5e25fSSatish Balay { 154249b5e25fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 154349b5e25fSSatish Balay MatScalar *v = a->a; 154449b5e25fSSatish Balay PetscReal sum = 0.0; 154549b5e25fSSatish Balay int i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1; 154649b5e25fSSatish Balay 154749b5e25fSSatish Balay PetscFunctionBegin; 154849b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 154949b5e25fSSatish Balay for (i=0; i< bs2*nz; i++) { 155049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 155149b5e25fSSatish Balay sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 155249b5e25fSSatish Balay #else 155349b5e25fSSatish Balay sum += (*v)*(*v); v++; 155449b5e25fSSatish Balay #endif 155549b5e25fSSatish Balay } 155649b5e25fSSatish Balay *norm = sqrt(sum); 155749b5e25fSSatish Balay } else if (type == NORM_INFINITY) { /* maximum row sum */ 155849b5e25fSSatish Balay *norm = 0.0; 155949b5e25fSSatish Balay for (j=0; j<a->mbs; j++) { 156049b5e25fSSatish Balay for (k=0; k<bs; k++) { 156149b5e25fSSatish Balay v = a->a + bs2*a->i[j] + k; 156249b5e25fSSatish Balay sum = 0.0; 156349b5e25fSSatish Balay for (i=0; i<a->i[j+1]-a->i[j]; i++) { 156449b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ /* this loop was missing in the original code*/ 156549b5e25fSSatish Balay sum += PetscAbsScalar(*v); 156649b5e25fSSatish Balay v += bs; 156749b5e25fSSatish Balay } 156849b5e25fSSatish Balay } 156949b5e25fSSatish Balay if (sum > *norm) *norm = sum; 157049b5e25fSSatish Balay } 157149b5e25fSSatish Balay } 157249b5e25fSSatish Balay } else { 157349b5e25fSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 157449b5e25fSSatish Balay } 157549b5e25fSSatish Balay PetscFunctionReturn(0); 157649b5e25fSSatish Balay } 157749b5e25fSSatish Balay #endif 157849b5e25fSSatish Balay 157949b5e25fSSatish Balay #undef __FUNC__ 158049b5e25fSSatish Balay #define __FUNC__ "MatEqual_SeqSBAIJ" 158149b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 158249b5e25fSSatish Balay { 158349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 158449b5e25fSSatish Balay int ierr; 158549b5e25fSSatish Balay 158649b5e25fSSatish Balay PetscFunctionBegin; 158749b5e25fSSatish Balay if (B->type !=MATSEQSBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 158849b5e25fSSatish Balay 158949b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 159049b5e25fSSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 159149b5e25fSSatish Balay *flg = PETSC_FALSE; PetscFunctionReturn(0); 159249b5e25fSSatish Balay } 159349b5e25fSSatish Balay 159449b5e25fSSatish Balay /* if the a->i are the same */ 159549b5e25fSSatish Balay ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 159649b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 159749b5e25fSSatish Balay PetscFunctionReturn(0); 159849b5e25fSSatish Balay } 159949b5e25fSSatish Balay 160049b5e25fSSatish Balay /* if a->j are the same */ 160149b5e25fSSatish Balay ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 160249b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 160349b5e25fSSatish Balay PetscFunctionReturn(0); 160449b5e25fSSatish Balay } 160549b5e25fSSatish Balay /* if a->a are the same */ 160649b5e25fSSatish Balay ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr); 160749b5e25fSSatish Balay PetscFunctionReturn(0); 160849b5e25fSSatish Balay 160949b5e25fSSatish Balay } 161049b5e25fSSatish Balay 161149b5e25fSSatish Balay #undef __FUNC__ 161249b5e25fSSatish Balay #define __FUNC__ "MatGetDiagonal_SeqSBAIJ" 161349b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 161449b5e25fSSatish Balay { 161549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 161649b5e25fSSatish Balay int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 161749b5e25fSSatish Balay Scalar *x,zero = 0.0; 161849b5e25fSSatish Balay MatScalar *aa,*aa_j; 161949b5e25fSSatish Balay 162049b5e25fSSatish Balay PetscFunctionBegin; 162149b5e25fSSatish Balay if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix"); 162249b5e25fSSatish Balay bs = a->bs; 162349b5e25fSSatish Balay aa = a->a; 162449b5e25fSSatish Balay ai = a->i; 162549b5e25fSSatish Balay aj = a->j; 162649b5e25fSSatish Balay ambs = a->mbs; 162749b5e25fSSatish Balay bs2 = a->bs2; 162849b5e25fSSatish Balay 162949b5e25fSSatish Balay ierr = VecSet(&zero,v);CHKERRQ(ierr); 163049b5e25fSSatish Balay ierr = VecGetArray(v,&x);CHKERRQ(ierr); 163149b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 163249b5e25fSSatish Balay /* if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector");*/ 163349b5e25fSSatish Balay for (i=0; i<ambs; i++) { 163449b5e25fSSatish Balay j=ai[i]; 163549b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 163649b5e25fSSatish Balay row = i*bs; 163749b5e25fSSatish Balay aa_j = aa + j*bs2; 163849b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 163949b5e25fSSatish Balay } 164049b5e25fSSatish Balay } 164149b5e25fSSatish Balay ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 164249b5e25fSSatish Balay PetscFunctionReturn(0); 164349b5e25fSSatish Balay } 164449b5e25fSSatish Balay 164549b5e25fSSatish Balay #undef __FUNC__ 164649b5e25fSSatish Balay #define __FUNC__ "MatDiagonalScale_SeqSBAIJ" 164749b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 164849b5e25fSSatish Balay { 164949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 165049b5e25fSSatish Balay Scalar *l,*r,x,*li,*ri; 165149b5e25fSSatish Balay MatScalar *aa,*v; 1652066653e3SSatish Balay int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 165349b5e25fSSatish Balay 165449b5e25fSSatish Balay PetscFunctionBegin; 165549b5e25fSSatish Balay ai = a->i; 165649b5e25fSSatish Balay aj = a->j; 165749b5e25fSSatish Balay aa = a->a; 165849b5e25fSSatish Balay m = a->m; 165949b5e25fSSatish Balay bs = a->bs; 166049b5e25fSSatish Balay mbs = a->mbs; 166149b5e25fSSatish Balay bs2 = a->bs2; 166249b5e25fSSatish Balay 166349b5e25fSSatish Balay if (ll != rr) { 166449b5e25fSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 166549b5e25fSSatish Balay } 166649b5e25fSSatish Balay if (ll) { 166749b5e25fSSatish Balay ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 166849b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 166949b5e25fSSatish Balay if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length"); 167049b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 167149b5e25fSSatish Balay M = ai[i+1] - ai[i]; 167249b5e25fSSatish Balay li = l + i*bs; 167349b5e25fSSatish Balay v = aa + bs2*ai[i]; 167449b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 167549b5e25fSSatish Balay for (k=0; k<bs2; k++) { 167649b5e25fSSatish Balay (*v++) *= li[k%bs]; 167749b5e25fSSatish Balay } 167849b5e25fSSatish Balay #ifdef CONT 167949b5e25fSSatish Balay /* will be used to replace the above loop */ 168049b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 168149b5e25fSSatish Balay for (k=0; k<bs; k++) { /* column value */ 168249b5e25fSSatish Balay x = ri[k]; 168349b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 168449b5e25fSSatish Balay } 168549b5e25fSSatish Balay #endif 168649b5e25fSSatish Balay 168749b5e25fSSatish Balay } 168849b5e25fSSatish Balay } 168949b5e25fSSatish Balay ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 169049b5e25fSSatish Balay PLogFlops(2*a->s_nz); 169149b5e25fSSatish Balay } 169249b5e25fSSatish Balay /* will be deleted */ 169349b5e25fSSatish Balay if (rr) { 169449b5e25fSSatish Balay ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 169549b5e25fSSatish Balay ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 169649b5e25fSSatish Balay if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length"); 169749b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 169849b5e25fSSatish Balay M = ai[i+1] - ai[i]; 169949b5e25fSSatish Balay v = aa + bs2*ai[i]; 170049b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 170149b5e25fSSatish Balay ri = r + bs*aj[ai[i]+j]; 170249b5e25fSSatish Balay for (k=0; k<bs; k++) { 170349b5e25fSSatish Balay x = ri[k]; 170449b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 170549b5e25fSSatish Balay } 170649b5e25fSSatish Balay } 170749b5e25fSSatish Balay } 170849b5e25fSSatish Balay ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 170949b5e25fSSatish Balay PLogFlops(a->s_nz); 171049b5e25fSSatish Balay } 171149b5e25fSSatish Balay PetscFunctionReturn(0); 171249b5e25fSSatish Balay } 171349b5e25fSSatish Balay 171449b5e25fSSatish Balay #undef __FUNC__ 171549b5e25fSSatish Balay #define __FUNC__ "MatGetInfo_SeqSBAIJ" 171649b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 171749b5e25fSSatish Balay { 171849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 171949b5e25fSSatish Balay 172049b5e25fSSatish Balay PetscFunctionBegin; 172149b5e25fSSatish Balay info->rows_global = (double)a->m; 172249b5e25fSSatish Balay info->columns_global = (double)a->m; 172349b5e25fSSatish Balay info->rows_local = (double)a->m; 172449b5e25fSSatish Balay info->columns_local = (double)a->m; 172549b5e25fSSatish Balay info->block_size = a->bs2; 172649b5e25fSSatish Balay info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 172749b5e25fSSatish Balay info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 172849b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 172949b5e25fSSatish Balay info->assemblies = A->num_ass; 173049b5e25fSSatish Balay info->mallocs = a->reallocs; 173149b5e25fSSatish Balay info->memory = A->mem; 173249b5e25fSSatish Balay if (A->factor) { 173349b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 173449b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 173549b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 173649b5e25fSSatish Balay } else { 173749b5e25fSSatish Balay info->fill_ratio_given = 0; 173849b5e25fSSatish Balay info->fill_ratio_needed = 0; 173949b5e25fSSatish Balay info->factor_mallocs = 0; 174049b5e25fSSatish Balay } 174149b5e25fSSatish Balay PetscFunctionReturn(0); 174249b5e25fSSatish Balay } 174349b5e25fSSatish Balay 174449b5e25fSSatish Balay 174549b5e25fSSatish Balay #undef __FUNC__ 174649b5e25fSSatish Balay #define __FUNC__ "MatZeroEntries_SeqSBAIJ" 174749b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A) 174849b5e25fSSatish Balay { 174949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 175049b5e25fSSatish Balay int ierr; 175149b5e25fSSatish Balay 175249b5e25fSSatish Balay PetscFunctionBegin; 175349b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 175449b5e25fSSatish Balay PetscFunctionReturn(0); 175549b5e25fSSatish Balay } 1756