1be1d678aSKris Buschelman #define PETSCMAT_DLL 249b5e25fSSatish Balay 37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h" 4c60f0209SBarry Smith #include "../src/mat/blockinvert.h" 549b5e25fSSatish Balay #include "petscbt.h" 67c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h" 7f3da1532SBarry Smith #include "petscblaslapack.h" 849b5e25fSSatish Balay 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 1249b5e25fSSatish Balay { 135eee224dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 146849ba73SBarry Smith PetscErrorCode ierr; 155d0c19d7SBarry Smith PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2; 165d0c19d7SBarry Smith const PetscInt *idx; 17521d7252SBarry Smith PetscBT table,table0; 18d94109b8SHong Zhang 19d94109b8SHong Zhang PetscFunctionBegin; 20e32f2f54SBarry Smith if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 21d94109b8SHong Zhang mbs = a->mbs; 22d94109b8SHong Zhang ai = a->i; 23d94109b8SHong Zhang aj = a->j; 24d0f46423SBarry Smith bs = A->rmap->bs; 25d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr); 2613f74950SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 27d0f46423SBarry Smith ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 28d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr); 29d94109b8SHong Zhang 30d94109b8SHong Zhang for (i=0; i<is_max; i++) { /* for each is */ 31d94109b8SHong Zhang isz = 0; 32d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr); 33d94109b8SHong Zhang 34d94109b8SHong Zhang /* Extract the indices, assume there can be duplicate entries */ 35d94109b8SHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 36d94109b8SHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 37d94109b8SHong Zhang 38d94109b8SHong Zhang /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */ 39dbe03f88SHong Zhang bcol_max = 0; 40d94109b8SHong Zhang for (j=0; j<n ; ++j){ 41d94109b8SHong Zhang brow = idx[j]/bs; /* convert the indices into block indices */ 42e32f2f54SBarry Smith if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43dbe03f88SHong Zhang if(!PetscBTLookupSet(table,brow)) { 44dbe03f88SHong Zhang nidx[isz++] = brow; 45dbe03f88SHong Zhang if (bcol_max < brow) bcol_max = brow; 46dbe03f88SHong Zhang } 47d94109b8SHong Zhang } 48d94109b8SHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 49d94109b8SHong Zhang ierr = ISDestroy(is[i]);CHKERRQ(ierr); 50d94109b8SHong Zhang 51d94109b8SHong Zhang k = 0; 52d94109b8SHong Zhang for (j=0; j<ov; j++){ /* for each overlap */ 53d94109b8SHong Zhang /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */ 54d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr); 55efee365bSSatish Balay for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); } 56d94109b8SHong Zhang 57d94109b8SHong Zhang n = isz; /* length of the updated is[i] */ 58d94109b8SHong Zhang for (brow=0; brow<mbs; brow++){ 59d94109b8SHong Zhang start = ai[brow]; end = ai[brow+1]; 60d94109b8SHong Zhang if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */ 61d94109b8SHong Zhang for (l = start; l<end ; l++){ 62d94109b8SHong Zhang bcol = aj[l]; 63d94109b8SHong Zhang if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;} 64d94109b8SHong Zhang } 65d94109b8SHong Zhang k++; 66d94109b8SHong Zhang if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */ 67d94109b8SHong Zhang } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */ 68d94109b8SHong Zhang for (l = start; l<end ; l++){ 69d94109b8SHong Zhang bcol = aj[l]; 70dbe03f88SHong Zhang if (bcol > bcol_max) break; 71d94109b8SHong Zhang if (PetscBTLookup(table0,bcol)){ 72d94109b8SHong Zhang if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;} 73d94109b8SHong Zhang break; /* for l = start; l<end ; l++) */ 74d94109b8SHong Zhang } 75d94109b8SHong Zhang } 76d94109b8SHong Zhang } 77d94109b8SHong Zhang } 78d94109b8SHong Zhang } /* for each overlap */ 79d94109b8SHong Zhang 80d94109b8SHong Zhang /* expand the Index Set */ 81d94109b8SHong Zhang for (j=0; j<isz; j++) { 82d94109b8SHong Zhang for (k=0; k<bs; k++) 83d94109b8SHong Zhang nidx2[j*bs+k] = nidx[j]*bs+k; 84d94109b8SHong Zhang } 8570b3c8c7SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr); 86d94109b8SHong Zhang } 87d94109b8SHong Zhang ierr = PetscBTDestroy(table);CHKERRQ(ierr); 88d94109b8SHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 89d94109b8SHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 90d94109b8SHong Zhang ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 915eee224dSHong Zhang PetscFunctionReturn(0); 9249b5e25fSSatish Balay } 9349b5e25fSSatish Balay 944a2ae208SSatish Balay #undef __FUNCT__ 954a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 964aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 9749b5e25fSSatish Balay { 9849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 996849ba73SBarry Smith PetscErrorCode ierr; 10013f74950SBarry Smith PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens; 10113f74950SBarry Smith PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 1025d0c19d7SBarry Smith PetscInt nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2; 1035d0c19d7SBarry Smith const PetscInt *irow,*aj = a->j,*ai = a->i; 10449b5e25fSSatish Balay MatScalar *mat_a; 10549b5e25fSSatish Balay Mat C; 106ace3abfcSBarry Smith PetscBool flag,sorted; 10749b5e25fSSatish Balay 10849b5e25fSSatish Balay PetscFunctionBegin; 109e32f2f54SBarry Smith if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 11014ca34e6SBarry Smith ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr); 111e32f2f54SBarry Smith if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 11249b5e25fSSatish Balay 11349b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 11449b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 11549b5e25fSSatish Balay 11674ed9c26SBarry Smith ierr = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr); 11774ed9c26SBarry Smith ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 11849b5e25fSSatish Balay ssmap = smap; 11913f74950SBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 12049b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 12149b5e25fSSatish Balay /* determine lens of each row */ 12249b5e25fSSatish Balay for (i=0; i<nrows; i++) { 12349b5e25fSSatish Balay kstart = ai[irow[i]]; 12449b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 12549b5e25fSSatish Balay lens[i] = 0; 12649b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 12749b5e25fSSatish Balay if (ssmap[aj[k]]) { 12849b5e25fSSatish Balay lens[i]++; 12949b5e25fSSatish Balay } 13049b5e25fSSatish Balay } 13149b5e25fSSatish Balay } 13249b5e25fSSatish Balay /* Create and fill new matrix */ 13349b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 13449b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 13549b5e25fSSatish Balay 136e32f2f54SBarry Smith if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 13713f74950SBarry Smith ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 138e7e72b3dSBarry Smith if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 13913f74950SBarry Smith ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 14049b5e25fSSatish Balay C = *B; 14149b5e25fSSatish Balay } else { 1427adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 143f69a0ea3SMatthew Knepley ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1447adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 145ab93d7beSBarry Smith ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr); 14649b5e25fSSatish Balay } 14749b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 14849b5e25fSSatish Balay for (i=0; i<nrows; i++) { 14949b5e25fSSatish Balay row = irow[i]; 15049b5e25fSSatish Balay kstart = ai[row]; 15149b5e25fSSatish Balay kend = kstart + a->ilen[row]; 15249b5e25fSSatish Balay mat_i = c->i[i]; 15349b5e25fSSatish Balay mat_j = c->j + mat_i; 15449b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 15549b5e25fSSatish Balay mat_ilen = c->ilen + i; 15649b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 15749b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 15849b5e25fSSatish Balay *mat_j++ = tcol - 1; 15949b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 16049b5e25fSSatish Balay mat_a += bs2; 16149b5e25fSSatish Balay (*mat_ilen)++; 16249b5e25fSSatish Balay } 16349b5e25fSSatish Balay } 16449b5e25fSSatish Balay } 16549b5e25fSSatish Balay 16649b5e25fSSatish Balay /* Free work space */ 16749b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 16849b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 16949b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17049b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17149b5e25fSSatish Balay 17249b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 17349b5e25fSSatish Balay *B = C; 17449b5e25fSSatish Balay PetscFunctionReturn(0); 17549b5e25fSSatish Balay } 17649b5e25fSSatish Balay 1774a2ae208SSatish Balay #undef __FUNCT__ 1784a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 1794aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 18049b5e25fSSatish Balay { 18149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 18249b5e25fSSatish Balay IS is1; 1836849ba73SBarry Smith PetscErrorCode ierr; 1845d0c19d7SBarry Smith PetscInt *vary,*iary,nrows,i,bs=A->rmap->bs,count; 1855d0c19d7SBarry Smith const PetscInt *irow; 18649b5e25fSSatish Balay 18749b5e25fSSatish Balay PetscFunctionBegin; 188e32f2f54SBarry Smith if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 18949b5e25fSSatish Balay 19049b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 19149b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 19249b5e25fSSatish Balay 19349b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 19449b5e25fSSatish Balay and form the IS with compressed IS */ 19574ed9c26SBarry Smith ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr); 19674ed9c26SBarry Smith ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr); 19749b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 19849b5e25fSSatish Balay 19949b5e25fSSatish Balay count = 0; 20049b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 201e32f2f54SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks"); 20249b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 20349b5e25fSSatish Balay } 20449b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 20570b3c8c7SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr); 20674ed9c26SBarry Smith ierr = PetscFree2(vary,iary);CHKERRQ(ierr); 20749b5e25fSSatish Balay 2084aa3045dSJed Brown ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr); 20974ed9c26SBarry Smith ierr = ISDestroy(is1);CHKERRQ(ierr); 21049b5e25fSSatish Balay PetscFunctionReturn(0); 21149b5e25fSSatish Balay } 21249b5e25fSSatish Balay 2134a2ae208SSatish Balay #undef __FUNCT__ 2144a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 21513f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 21649b5e25fSSatish Balay { 2176849ba73SBarry Smith PetscErrorCode ierr; 21813f74950SBarry Smith PetscInt i; 21949b5e25fSSatish Balay 22049b5e25fSSatish Balay PetscFunctionBegin; 22149b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 22282502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 22349b5e25fSSatish Balay } 22449b5e25fSSatish Balay 22549b5e25fSSatish Balay for (i=0; i<n; i++) { 2264aa3045dSJed Brown ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr); 22749b5e25fSSatish Balay } 22849b5e25fSSatish Balay PetscFunctionReturn(0); 22949b5e25fSSatish Balay } 23049b5e25fSSatish Balay 23149b5e25fSSatish Balay /* -------------------------------------------------------*/ 23249b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 23349b5e25fSSatish Balay /* -------------------------------------------------------*/ 23449b5e25fSSatish Balay 2354a2ae208SSatish Balay #undef __FUNCT__ 2364a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 237dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 23849b5e25fSSatish Balay { 23949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 24087828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 24149b5e25fSSatish Balay MatScalar *v; 2426849ba73SBarry Smith PetscErrorCode ierr; 24313f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 24498c9bda7SSatish Balay PetscInt nonzerorow=0; 24549b5e25fSSatish Balay 24649b5e25fSSatish Balay PetscFunctionBegin; 2472dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2481ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2491ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 25049b5e25fSSatish Balay 25149b5e25fSSatish Balay v = a->a; 25249b5e25fSSatish Balay xb = x; 25349b5e25fSSatish Balay 25449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 25549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 25649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 25749b5e25fSSatish Balay ib = aj + *ai; 258831a3094SHong Zhang jmin = 0; 25998c9bda7SSatish Balay nonzerorow += (n>0); 2607fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 26149b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 26249b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 263831a3094SHong Zhang v += 4; jmin++; 2647fbae186SHong Zhang } 265831a3094SHong Zhang for (j=jmin; j<n; j++) { 26649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 26749b5e25fSSatish Balay cval = ib[j]*2; 26849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 26949b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 27049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 27149b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 27249b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 27349b5e25fSSatish Balay v += 4; 27449b5e25fSSatish Balay } 27549b5e25fSSatish Balay xb +=2; ai++; 27649b5e25fSSatish Balay } 27749b5e25fSSatish Balay 2781ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2791ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 280dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 28149b5e25fSSatish Balay PetscFunctionReturn(0); 28249b5e25fSSatish Balay } 28349b5e25fSSatish Balay 2844a2ae208SSatish Balay #undef __FUNCT__ 2854a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 286dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 28749b5e25fSSatish Balay { 28849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 28987828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 29049b5e25fSSatish Balay MatScalar *v; 2916849ba73SBarry Smith PetscErrorCode ierr; 29213f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 29398c9bda7SSatish Balay PetscInt nonzerorow=0; 29449b5e25fSSatish Balay 29549b5e25fSSatish Balay PetscFunctionBegin; 2962dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2971ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2981ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 29949b5e25fSSatish Balay 30049b5e25fSSatish Balay v = a->a; 30149b5e25fSSatish Balay xb = x; 30249b5e25fSSatish Balay 30349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 30449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 30549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 30649b5e25fSSatish Balay ib = aj + *ai; 307831a3094SHong Zhang jmin = 0; 30898c9bda7SSatish Balay nonzerorow += (n>0); 3097fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 31049b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 31149b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 31249b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 313831a3094SHong Zhang v += 9; jmin++; 3147fbae186SHong Zhang } 315831a3094SHong Zhang for (j=jmin; j<n; j++) { 31649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 31749b5e25fSSatish Balay cval = ib[j]*3; 31849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 31949b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 32049b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 32149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 32249b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 32349b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 32449b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 32549b5e25fSSatish Balay v += 9; 32649b5e25fSSatish Balay } 32749b5e25fSSatish Balay xb +=3; ai++; 32849b5e25fSSatish Balay } 32949b5e25fSSatish Balay 3301ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3311ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 332dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 33349b5e25fSSatish Balay PetscFunctionReturn(0); 33449b5e25fSSatish Balay } 33549b5e25fSSatish Balay 3364a2ae208SSatish Balay #undef __FUNCT__ 3374a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 338dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 33949b5e25fSSatish Balay { 34049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 34187828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 34249b5e25fSSatish Balay MatScalar *v; 3436849ba73SBarry Smith PetscErrorCode ierr; 34413f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 34598c9bda7SSatish Balay PetscInt nonzerorow=0; 34649b5e25fSSatish Balay 34749b5e25fSSatish Balay PetscFunctionBegin; 3482dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 3491ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3501ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 35149b5e25fSSatish Balay 35249b5e25fSSatish Balay v = a->a; 35349b5e25fSSatish Balay xb = x; 35449b5e25fSSatish Balay 35549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 35649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 35749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 35849b5e25fSSatish Balay ib = aj + *ai; 359831a3094SHong Zhang jmin = 0; 36098c9bda7SSatish Balay nonzerorow += (n>0); 3617fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 36249b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 36349b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 36449b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 36549b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 366831a3094SHong Zhang v += 16; jmin++; 3677fbae186SHong Zhang } 368831a3094SHong Zhang for (j=jmin; j<n; j++) { 36949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 37049b5e25fSSatish Balay cval = ib[j]*4; 37149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 37249b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 37349b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 37449b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 37549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 37649b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 37749b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 37849b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 37949b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 38049b5e25fSSatish Balay v += 16; 38149b5e25fSSatish Balay } 38249b5e25fSSatish Balay xb +=4; ai++; 38349b5e25fSSatish Balay } 38449b5e25fSSatish Balay 3851ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3861ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 387dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 38849b5e25fSSatish Balay PetscFunctionReturn(0); 38949b5e25fSSatish Balay } 39049b5e25fSSatish Balay 3914a2ae208SSatish Balay #undef __FUNCT__ 3924a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 393dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 39449b5e25fSSatish Balay { 39549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 39687828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 39749b5e25fSSatish Balay MatScalar *v; 3986849ba73SBarry Smith PetscErrorCode ierr; 39913f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 40098c9bda7SSatish Balay PetscInt nonzerorow=0; 40149b5e25fSSatish Balay 40249b5e25fSSatish Balay PetscFunctionBegin; 4032dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4041ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4051ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 40649b5e25fSSatish Balay 40749b5e25fSSatish Balay v = a->a; 40849b5e25fSSatish Balay xb = x; 40949b5e25fSSatish Balay 41049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 41149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 41249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 41349b5e25fSSatish Balay ib = aj + *ai; 414831a3094SHong Zhang jmin = 0; 41598c9bda7SSatish Balay nonzerorow += (n>0); 4167fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 41749b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 41849b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 41949b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 42049b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 42149b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 422831a3094SHong Zhang v += 25; jmin++; 4237fbae186SHong Zhang } 424831a3094SHong Zhang for (j=jmin; j<n; j++) { 42549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 42649b5e25fSSatish Balay cval = ib[j]*5; 42749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 42849b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 42949b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 43049b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 43149b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 43249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 43349b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 43449b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 43549b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 43649b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 43749b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 43849b5e25fSSatish Balay v += 25; 43949b5e25fSSatish Balay } 44049b5e25fSSatish Balay xb +=5; ai++; 44149b5e25fSSatish Balay } 44249b5e25fSSatish Balay 4431ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4441ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 445dc0b31edSSatish Balay ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 44649b5e25fSSatish Balay PetscFunctionReturn(0); 44749b5e25fSSatish Balay } 44849b5e25fSSatish Balay 44949b5e25fSSatish Balay 4504a2ae208SSatish Balay #undef __FUNCT__ 4514a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 452dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 45349b5e25fSSatish Balay { 45449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 45587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 45649b5e25fSSatish Balay MatScalar *v; 4576849ba73SBarry Smith PetscErrorCode ierr; 45813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 45998c9bda7SSatish Balay PetscInt nonzerorow=0; 46049b5e25fSSatish Balay 46149b5e25fSSatish Balay PetscFunctionBegin; 4622dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4631ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4641ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 46549b5e25fSSatish Balay 46649b5e25fSSatish Balay v = a->a; 46749b5e25fSSatish Balay xb = x; 46849b5e25fSSatish Balay 46949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 47049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 47149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 47249b5e25fSSatish Balay ib = aj + *ai; 473831a3094SHong Zhang jmin = 0; 47498c9bda7SSatish Balay nonzerorow += (n>0); 4757fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 47649b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 47749b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 47849b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 47949b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 48049b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 48149b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 482831a3094SHong Zhang v += 36; jmin++; 4837fbae186SHong Zhang } 484831a3094SHong Zhang for (j=jmin; j<n; j++) { 48549b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 48649b5e25fSSatish Balay cval = ib[j]*6; 48749b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 48849b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 48949b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 49049b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 49149b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 49249b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 49349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 49449b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 49549b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 49649b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 49749b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 49849b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 49949b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 50049b5e25fSSatish Balay v += 36; 50149b5e25fSSatish Balay } 50249b5e25fSSatish Balay xb +=6; ai++; 50349b5e25fSSatish Balay } 50449b5e25fSSatish Balay 5051ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5061ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 507dc0b31edSSatish Balay ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 50849b5e25fSSatish Balay PetscFunctionReturn(0); 50949b5e25fSSatish Balay } 5104a2ae208SSatish Balay #undef __FUNCT__ 5114a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 512dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 51349b5e25fSSatish Balay { 51449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 51587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 51649b5e25fSSatish Balay MatScalar *v; 5176849ba73SBarry Smith PetscErrorCode ierr; 51813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 51998c9bda7SSatish Balay PetscInt nonzerorow=0; 52049b5e25fSSatish Balay 52149b5e25fSSatish Balay PetscFunctionBegin; 5222dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 5231ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5241ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 52549b5e25fSSatish Balay 52649b5e25fSSatish Balay v = a->a; 52749b5e25fSSatish Balay xb = x; 52849b5e25fSSatish Balay 52949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 53049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 53149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 53249b5e25fSSatish Balay ib = aj + *ai; 533831a3094SHong Zhang jmin = 0; 53498c9bda7SSatish Balay nonzerorow += (n>0); 5357fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 53649b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 53749b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 53849b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 53949b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 54049b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 54149b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 54249b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 543831a3094SHong Zhang v += 49; jmin++; 5447fbae186SHong Zhang } 545831a3094SHong Zhang for (j=jmin; j<n; j++) { 54649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 54749b5e25fSSatish Balay cval = ib[j]*7; 54849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 54949b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 55049b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 55149b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 55249b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 55349b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 55449b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 55549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 55649b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 55749b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 55849b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 55949b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 56049b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 56149b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 56249b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 56349b5e25fSSatish Balay v += 49; 56449b5e25fSSatish Balay } 56549b5e25fSSatish Balay xb +=7; ai++; 56649b5e25fSSatish Balay } 5671ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5681ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 569dc0b31edSSatish Balay ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 57049b5e25fSSatish Balay PetscFunctionReturn(0); 57149b5e25fSSatish Balay } 57249b5e25fSSatish Balay 57349b5e25fSSatish Balay /* 57449b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 57549b5e25fSSatish Balay */ 5764a2ae208SSatish Balay #undef __FUNCT__ 5774a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 578dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 57949b5e25fSSatish Balay { 58049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 58187828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 5820b60a74dSHong Zhang MatScalar *v; 583dfbe8321SBarry Smith PetscErrorCode ierr; 584d0f46423SBarry Smith PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; 58598c9bda7SSatish Balay PetscInt nonzerorow=0; 58649b5e25fSSatish Balay 58749b5e25fSSatish Balay PetscFunctionBegin; 5882dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 5891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 5901ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 59149b5e25fSSatish Balay 59249b5e25fSSatish Balay aj = a->j; 59349b5e25fSSatish Balay v = a->a; 59449b5e25fSSatish Balay ii = a->i; 59549b5e25fSSatish Balay 59649b5e25fSSatish Balay if (!a->mult_work) { 597d0f46423SBarry Smith ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 59849b5e25fSSatish Balay } 59949b5e25fSSatish Balay work = a->mult_work; 60049b5e25fSSatish Balay 60149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 60249b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 60349b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 60498c9bda7SSatish Balay nonzerorow += (n>0); 60549b5e25fSSatish Balay 60649b5e25fSSatish Balay /* upper triangular part */ 60749b5e25fSSatish Balay for (j=0; j<n; j++) { 60849b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 60949b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 61049b5e25fSSatish Balay workt += bs; 61149b5e25fSSatish Balay } 61249b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 61349b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 61449b5e25fSSatish Balay 61549b5e25fSSatish Balay /* strict lower triangular part */ 616831a3094SHong Zhang idx = aj+ii[0]; 617831a3094SHong Zhang if (*idx == i){ 61896b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 619831a3094SHong Zhang } 62096b9376eSHong Zhang 62149b5e25fSSatish Balay if (ncols > 0){ 62249b5e25fSSatish Balay workt = work; 62387828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 624831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 625831a3094SHong Zhang for (j=0; j<n; j++) { 626831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 62749b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 62849b5e25fSSatish Balay workt += bs; 62949b5e25fSSatish Balay } 63049b5e25fSSatish Balay } 63149b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 63249b5e25fSSatish Balay } 63349b5e25fSSatish Balay 6341ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6351ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 636dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr); 63749b5e25fSSatish Balay PetscFunctionReturn(0); 63849b5e25fSSatish Balay } 63949b5e25fSSatish Balay 6404a2ae208SSatish Balay #undef __FUNCT__ 6414a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 642dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 64349b5e25fSSatish Balay { 64449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 645bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1; 64649b5e25fSSatish Balay MatScalar *v; 6476849ba73SBarry Smith PetscErrorCode ierr; 64813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 64998c9bda7SSatish Balay PetscInt nonzerorow=0; 65049b5e25fSSatish Balay 65149b5e25fSSatish Balay PetscFunctionBegin; 652*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 6531ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6541ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 65549b5e25fSSatish Balay v = a->a; 65649b5e25fSSatish Balay xb = x; 65749b5e25fSSatish Balay 65849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 65949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 66049b5e25fSSatish Balay x1 = xb[0]; 66149b5e25fSSatish Balay ib = aj + *ai; 662831a3094SHong Zhang jmin = 0; 66398c9bda7SSatish Balay nonzerorow += (n>0); 664831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 665831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 666831a3094SHong Zhang } 667831a3094SHong Zhang for (j=jmin; j<n; j++) { 66849b5e25fSSatish Balay cval = *ib; 66949b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 67049b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 67149b5e25fSSatish Balay } 67249b5e25fSSatish Balay xb++; ai++; 67349b5e25fSSatish Balay } 67449b5e25fSSatish Balay 6751ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 676bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 67749b5e25fSSatish Balay 678dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 67949b5e25fSSatish Balay PetscFunctionReturn(0); 68049b5e25fSSatish Balay } 68149b5e25fSSatish Balay 6824a2ae208SSatish Balay #undef __FUNCT__ 6834a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 684dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 68549b5e25fSSatish Balay { 68649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 687bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2; 68849b5e25fSSatish Balay MatScalar *v; 6896849ba73SBarry Smith PetscErrorCode ierr; 69013f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 69198c9bda7SSatish Balay PetscInt nonzerorow=0; 69249b5e25fSSatish Balay 69349b5e25fSSatish Balay PetscFunctionBegin; 694*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 6951ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6961ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 69749b5e25fSSatish Balay 69849b5e25fSSatish Balay v = a->a; 69949b5e25fSSatish Balay xb = x; 70049b5e25fSSatish Balay 70149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 70249b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 70349b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 70449b5e25fSSatish Balay ib = aj + *ai; 705831a3094SHong Zhang jmin = 0; 70698c9bda7SSatish Balay nonzerorow += (n>0); 7077fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 70849b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 70949b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 710831a3094SHong Zhang v += 4; jmin++; 7117fbae186SHong Zhang } 712831a3094SHong Zhang for (j=jmin; j<n; j++) { 71349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 71449b5e25fSSatish Balay cval = ib[j]*2; 71549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 71649b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 71749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 71849b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 71949b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 72049b5e25fSSatish Balay v += 4; 72149b5e25fSSatish Balay } 72249b5e25fSSatish Balay xb +=2; ai++; 72349b5e25fSSatish Balay } 7241ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 725bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 72649b5e25fSSatish Balay 727dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 72849b5e25fSSatish Balay PetscFunctionReturn(0); 72949b5e25fSSatish Balay } 73049b5e25fSSatish Balay 7314a2ae208SSatish Balay #undef __FUNCT__ 7324a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 733dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 73449b5e25fSSatish Balay { 73549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 736bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3; 73749b5e25fSSatish Balay MatScalar *v; 7386849ba73SBarry Smith PetscErrorCode ierr; 73913f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 74098c9bda7SSatish Balay PetscInt nonzerorow=0; 74149b5e25fSSatish Balay 74249b5e25fSSatish Balay PetscFunctionBegin; 743*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 7441ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7451ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 74649b5e25fSSatish Balay 74749b5e25fSSatish Balay v = a->a; 74849b5e25fSSatish Balay xb = x; 74949b5e25fSSatish Balay 75049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 75149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 75249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 75349b5e25fSSatish Balay ib = aj + *ai; 754831a3094SHong Zhang jmin = 0; 75598c9bda7SSatish Balay nonzerorow += (n>0); 7567fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 75749b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 75849b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 75949b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 760831a3094SHong Zhang v += 9; jmin++; 7617fbae186SHong Zhang } 762831a3094SHong Zhang for (j=jmin; j<n; j++) { 76349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 76449b5e25fSSatish Balay cval = ib[j]*3; 76549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 76649b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 76749b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 76849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 76949b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 77049b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 77149b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 77249b5e25fSSatish Balay v += 9; 77349b5e25fSSatish Balay } 77449b5e25fSSatish Balay xb +=3; ai++; 77549b5e25fSSatish Balay } 77649b5e25fSSatish Balay 7771ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 778bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 77949b5e25fSSatish Balay 780dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 78149b5e25fSSatish Balay PetscFunctionReturn(0); 78249b5e25fSSatish Balay } 78349b5e25fSSatish Balay 7844a2ae208SSatish Balay #undef __FUNCT__ 7854a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 786dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 78749b5e25fSSatish Balay { 78849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 789bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4; 79049b5e25fSSatish Balay MatScalar *v; 7916849ba73SBarry Smith PetscErrorCode ierr; 79213f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 79398c9bda7SSatish Balay PetscInt nonzerorow=0; 79449b5e25fSSatish Balay 79549b5e25fSSatish Balay PetscFunctionBegin; 796*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 7971ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7981ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 79949b5e25fSSatish Balay 80049b5e25fSSatish Balay v = a->a; 80149b5e25fSSatish Balay xb = x; 80249b5e25fSSatish Balay 80349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 80449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 80549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 80649b5e25fSSatish Balay ib = aj + *ai; 807831a3094SHong Zhang jmin = 0; 80898c9bda7SSatish Balay nonzerorow += (n>0); 8097fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 81049b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 81149b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 81249b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 81349b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 814831a3094SHong Zhang v += 16; jmin++; 8157fbae186SHong Zhang } 816831a3094SHong Zhang for (j=jmin; j<n; j++) { 81749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 81849b5e25fSSatish Balay cval = ib[j]*4; 81949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 82049b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 82149b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 82249b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 82349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 82449b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 82549b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 82649b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 82749b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 82849b5e25fSSatish Balay v += 16; 82949b5e25fSSatish Balay } 83049b5e25fSSatish Balay xb +=4; ai++; 83149b5e25fSSatish Balay } 83249b5e25fSSatish Balay 8331ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 834bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 83549b5e25fSSatish Balay 836dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 83749b5e25fSSatish Balay PetscFunctionReturn(0); 83849b5e25fSSatish Balay } 83949b5e25fSSatish Balay 8404a2ae208SSatish Balay #undef __FUNCT__ 8414a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 842dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 84349b5e25fSSatish Balay { 84449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 845bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5; 84649b5e25fSSatish Balay MatScalar *v; 8476849ba73SBarry Smith PetscErrorCode ierr; 84813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 84998c9bda7SSatish Balay PetscInt nonzerorow=0; 85049b5e25fSSatish Balay 85149b5e25fSSatish Balay PetscFunctionBegin; 852*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 8531ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8541ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 85549b5e25fSSatish Balay 85649b5e25fSSatish Balay v = a->a; 85749b5e25fSSatish Balay xb = x; 85849b5e25fSSatish Balay 85949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 86049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 86149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 86249b5e25fSSatish Balay ib = aj + *ai; 863831a3094SHong Zhang jmin = 0; 86498c9bda7SSatish Balay nonzerorow += (n>0); 8657fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 86649b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 86749b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 86849b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 86949b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 87049b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 871831a3094SHong Zhang v += 25; jmin++; 8727fbae186SHong Zhang } 873831a3094SHong Zhang for (j=jmin; j<n; j++) { 87449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 87549b5e25fSSatish Balay cval = ib[j]*5; 87649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 87749b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 87849b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 87949b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 88049b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 88149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 88249b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 88349b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 88449b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 88549b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 88649b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 88749b5e25fSSatish Balay v += 25; 88849b5e25fSSatish Balay } 88949b5e25fSSatish Balay xb +=5; ai++; 89049b5e25fSSatish Balay } 89149b5e25fSSatish Balay 8921ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 893bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 89449b5e25fSSatish Balay 895dc0b31edSSatish Balay ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 89649b5e25fSSatish Balay PetscFunctionReturn(0); 89749b5e25fSSatish Balay } 8984a2ae208SSatish Balay #undef __FUNCT__ 8994a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 900dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 90149b5e25fSSatish Balay { 90249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 903bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6; 90449b5e25fSSatish Balay MatScalar *v; 9056849ba73SBarry Smith PetscErrorCode ierr; 90613f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 90798c9bda7SSatish Balay PetscInt nonzerorow=0; 90849b5e25fSSatish Balay 90949b5e25fSSatish Balay PetscFunctionBegin; 910*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 9111ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9121ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 91349b5e25fSSatish Balay 91449b5e25fSSatish Balay v = a->a; 91549b5e25fSSatish Balay xb = x; 91649b5e25fSSatish Balay 91749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 91849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 91949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 92049b5e25fSSatish Balay ib = aj + *ai; 921831a3094SHong Zhang jmin = 0; 92298c9bda7SSatish Balay nonzerorow += (n>0); 9237fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 92449b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 92549b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 92649b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 92749b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 92849b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 92949b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 930831a3094SHong Zhang v += 36; jmin++; 9317fbae186SHong Zhang } 932831a3094SHong Zhang for (j=jmin; j<n; j++) { 93349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 93449b5e25fSSatish Balay cval = ib[j]*6; 93549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 93649b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 93749b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 93849b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 93949b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 94049b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 94149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 94249b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 94349b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 94449b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 94549b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 94649b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 94749b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 94849b5e25fSSatish Balay v += 36; 94949b5e25fSSatish Balay } 95049b5e25fSSatish Balay xb +=6; ai++; 95149b5e25fSSatish Balay } 95249b5e25fSSatish Balay 9531ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 95549b5e25fSSatish Balay 956dc0b31edSSatish Balay ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 95749b5e25fSSatish Balay PetscFunctionReturn(0); 95849b5e25fSSatish Balay } 95949b5e25fSSatish Balay 9604a2ae208SSatish Balay #undef __FUNCT__ 9614a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 962dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 96349b5e25fSSatish Balay { 96449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 965bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 96649b5e25fSSatish Balay MatScalar *v; 9676849ba73SBarry Smith PetscErrorCode ierr; 96813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 96998c9bda7SSatish Balay PetscInt nonzerorow=0; 97049b5e25fSSatish Balay 97149b5e25fSSatish Balay PetscFunctionBegin; 972*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 9731ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9741ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 97549b5e25fSSatish Balay 97649b5e25fSSatish Balay v = a->a; 97749b5e25fSSatish Balay xb = x; 97849b5e25fSSatish Balay 97949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 98049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 98149b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 98249b5e25fSSatish Balay ib = aj + *ai; 983831a3094SHong Zhang jmin = 0; 98498c9bda7SSatish Balay nonzerorow += (n>0); 9857fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 98649b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 98749b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 98849b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 98949b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 99049b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 99149b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 99249b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 993831a3094SHong Zhang v += 49; jmin++; 9947fbae186SHong Zhang } 995831a3094SHong Zhang for (j=jmin; j<n; j++) { 99649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 99749b5e25fSSatish Balay cval = ib[j]*7; 99849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 99949b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 100049b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 100149b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 100249b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 100349b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 100449b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 100549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 100649b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 100749b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 100849b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 100949b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 101049b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 101149b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 101249b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 101349b5e25fSSatish Balay v += 49; 101449b5e25fSSatish Balay } 101549b5e25fSSatish Balay xb +=7; ai++; 101649b5e25fSSatish Balay } 101749b5e25fSSatish Balay 10181ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1019bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 102049b5e25fSSatish Balay 1021dc0b31edSSatish Balay ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 102249b5e25fSSatish Balay PetscFunctionReturn(0); 102349b5e25fSSatish Balay } 102449b5e25fSSatish Balay 10254a2ae208SSatish Balay #undef __FUNCT__ 10264a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 1027dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 102849b5e25fSSatish Balay { 102949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1030bba805e6SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1031066653e3SSatish Balay MatScalar *v; 1032dfbe8321SBarry Smith PetscErrorCode ierr; 1033d0f46423SBarry Smith PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; 103498c9bda7SSatish Balay PetscInt nonzerorow=0; 103549b5e25fSSatish Balay 103649b5e25fSSatish Balay PetscFunctionBegin; 1037*343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 10381ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 10391ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 104049b5e25fSSatish Balay 104149b5e25fSSatish Balay aj = a->j; 104249b5e25fSSatish Balay v = a->a; 104349b5e25fSSatish Balay ii = a->i; 104449b5e25fSSatish Balay 104549b5e25fSSatish Balay if (!a->mult_work) { 1046d0f46423SBarry Smith ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 104749b5e25fSSatish Balay } 104849b5e25fSSatish Balay work = a->mult_work; 104949b5e25fSSatish Balay 105049b5e25fSSatish Balay 105149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 105249b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 105349b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 105498c9bda7SSatish Balay nonzerorow += (n>0); 105549b5e25fSSatish Balay 105649b5e25fSSatish Balay /* upper triangular part */ 105749b5e25fSSatish Balay for (j=0; j<n; j++) { 105849b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 105949b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 106049b5e25fSSatish Balay workt += bs; 106149b5e25fSSatish Balay } 106249b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 106349b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 106449b5e25fSSatish Balay 106549b5e25fSSatish Balay /* strict lower triangular part */ 1066831a3094SHong Zhang idx = aj+ii[0]; 1067831a3094SHong Zhang if (*idx == i){ 106896b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1069831a3094SHong Zhang } 107049b5e25fSSatish Balay if (ncols > 0){ 107149b5e25fSSatish Balay workt = work; 107287828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1073831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1074831a3094SHong Zhang for (j=0; j<n; j++) { 1075831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 107649b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 107749b5e25fSSatish Balay workt += bs; 107849b5e25fSSatish Balay } 107949b5e25fSSatish Balay } 108049b5e25fSSatish Balay 108149b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 108249b5e25fSSatish Balay } 108349b5e25fSSatish Balay 10841ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1085bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 108649b5e25fSSatish Balay 1087dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 108849b5e25fSSatish Balay PetscFunctionReturn(0); 108949b5e25fSSatish Balay } 109049b5e25fSSatish Balay 10914a2ae208SSatish Balay #undef __FUNCT__ 10924a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1093f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha) 109449b5e25fSSatish Balay { 109549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1096f4df32b1SMatthew Knepley PetscScalar oalpha = alpha; 1097efee365bSSatish Balay PetscErrorCode ierr; 10980805154bSBarry Smith PetscBLASInt one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz); 109949b5e25fSSatish Balay 110049b5e25fSSatish Balay PetscFunctionBegin; 1101f4df32b1SMatthew Knepley BLASscal_(&totalnz,&oalpha,a->a,&one); 1102efee365bSSatish Balay ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 110349b5e25fSSatish Balay PetscFunctionReturn(0); 110449b5e25fSSatish Balay } 110549b5e25fSSatish Balay 11064a2ae208SSatish Balay #undef __FUNCT__ 11074a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 1108dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 110949b5e25fSSatish Balay { 111049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111149b5e25fSSatish Balay MatScalar *v = a->a; 111249b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1113d0f46423SBarry Smith PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 11146849ba73SBarry Smith PetscErrorCode ierr; 111513f74950SBarry Smith PetscInt *jl,*il,jmin,jmax,nexti,ik,*col; 111649b5e25fSSatish Balay 111749b5e25fSSatish Balay PetscFunctionBegin; 111849b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 111949b5e25fSSatish Balay for (k=0; k<mbs; k++){ 112049b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1121831a3094SHong Zhang col = aj + jmin; 1122831a3094SHong Zhang if (*col == k){ /* diagonal block */ 112349b5e25fSSatish Balay for (i=0; i<bs2; i++){ 112449b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 112549b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 112649b5e25fSSatish Balay #else 112749b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 112849b5e25fSSatish Balay #endif 112949b5e25fSSatish Balay } 1130831a3094SHong Zhang jmin++; 1131831a3094SHong Zhang } 1132831a3094SHong Zhang for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 113349b5e25fSSatish Balay for (i=0; i<bs2; i++){ 113449b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 113549b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 113649b5e25fSSatish Balay #else 113749b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 113849b5e25fSSatish Balay #endif 113949b5e25fSSatish Balay } 114049b5e25fSSatish Balay } 114149b5e25fSSatish Balay } 114249b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 11430b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */ 114474ed9c26SBarry Smith ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 11450b8dc8d2SHong Zhang for (i=0; i<mbs; i++) jl[i] = mbs; 11460b8dc8d2SHong Zhang il[0] = 0; 114749b5e25fSSatish Balay 114849b5e25fSSatish Balay *norm = 0.0; 114949b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 115049b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 115149b5e25fSSatish Balay /*-- col sum --*/ 115249b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1153831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1154831a3094SHong Zhang at step k */ 115549b5e25fSSatish Balay while (i<mbs){ 115649b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 115749b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 115849b5e25fSSatish Balay for (j=0; j<bs; j++){ 115949b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 116049b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 116149b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 116249b5e25fSSatish Balay } 116349b5e25fSSatish Balay } 116449b5e25fSSatish Balay /* update il, jl */ 1165831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1166831a3094SHong Zhang jmax = a->i[i+1]; 116749b5e25fSSatish Balay if (jmin < jmax){ 116849b5e25fSSatish Balay il[i] = jmin; 116949b5e25fSSatish Balay j = a->j[jmin]; 117049b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 117149b5e25fSSatish Balay } 117249b5e25fSSatish Balay i = nexti; 117349b5e25fSSatish Balay } 117449b5e25fSSatish Balay /*-- row sum --*/ 117549b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 117649b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 117749b5e25fSSatish Balay for (j=0; j<bs; j++){ 117849b5e25fSSatish Balay v = a->a + i*bs2 + j; 117949b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 11800b8dc8d2SHong Zhang sum[j] += PetscAbsScalar(*v); v += bs; 118149b5e25fSSatish Balay } 118249b5e25fSSatish Balay } 118349b5e25fSSatish Balay } 118449b5e25fSSatish Balay /* add k_th block row to il, jl */ 1185831a3094SHong Zhang col = aj+jmin; 1186831a3094SHong Zhang if (*col == k) jmin++; 118749b5e25fSSatish Balay if (jmin < jmax){ 118849b5e25fSSatish Balay il[k] = jmin; 11890b8dc8d2SHong Zhang j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k; 119049b5e25fSSatish Balay } 119149b5e25fSSatish Balay for (j=0; j<bs; j++){ 119249b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 119349b5e25fSSatish Balay } 119449b5e25fSSatish Balay } 119574ed9c26SBarry Smith ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr); 119649b5e25fSSatish Balay } else { 1197e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 119849b5e25fSSatish Balay } 119949b5e25fSSatish Balay PetscFunctionReturn(0); 120049b5e25fSSatish Balay } 120149b5e25fSSatish Balay 12024a2ae208SSatish Balay #undef __FUNCT__ 12034a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 1204ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg) 120549b5e25fSSatish Balay { 120649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1207dfbe8321SBarry Smith PetscErrorCode ierr; 120849b5e25fSSatish Balay 120949b5e25fSSatish Balay PetscFunctionBegin; 121049b5e25fSSatish Balay 121149b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1212d0f46423SBarry Smith if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { 1213ef511fbeSHong Zhang *flg = PETSC_FALSE; 1214ef511fbeSHong Zhang PetscFunctionReturn(0); 121549b5e25fSSatish Balay } 121649b5e25fSSatish Balay 121749b5e25fSSatish Balay /* if the a->i are the same */ 121813f74950SBarry Smith ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1219abc0a331SBarry Smith if (!*flg) { 122049b5e25fSSatish Balay PetscFunctionReturn(0); 122149b5e25fSSatish Balay } 122249b5e25fSSatish Balay 122349b5e25fSSatish Balay /* if a->j are the same */ 122413f74950SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1225abc0a331SBarry Smith if (!*flg) { 122649b5e25fSSatish Balay PetscFunctionReturn(0); 122749b5e25fSSatish Balay } 122849b5e25fSSatish Balay /* if a->a are the same */ 1229d0f46423SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1230935af2e7SHong Zhang PetscFunctionReturn(0); 123149b5e25fSSatish Balay } 123249b5e25fSSatish Balay 12334a2ae208SSatish Balay #undef __FUNCT__ 12344a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 1235dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 123649b5e25fSSatish Balay { 123749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1238dfbe8321SBarry Smith PetscErrorCode ierr; 123913f74950SBarry Smith PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 124087828ca2SBarry Smith PetscScalar *x,zero = 0.0; 124149b5e25fSSatish Balay MatScalar *aa,*aa_j; 124249b5e25fSSatish Balay 124349b5e25fSSatish Balay PetscFunctionBegin; 1244d0f46423SBarry Smith bs = A->rmap->bs; 1245e32f2f54SBarry Smith if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 124682799104SHong Zhang 124749b5e25fSSatish Balay aa = a->a; 124849b5e25fSSatish Balay ai = a->i; 124949b5e25fSSatish Balay aj = a->j; 125049b5e25fSSatish Balay ambs = a->mbs; 125149b5e25fSSatish Balay bs2 = a->bs2; 125249b5e25fSSatish Balay 12532dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 12541ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 125549b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1256e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 125749b5e25fSSatish Balay for (i=0; i<ambs; i++) { 125849b5e25fSSatish Balay j=ai[i]; 125949b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 126049b5e25fSSatish Balay row = i*bs; 126149b5e25fSSatish Balay aa_j = aa + j*bs2; 1262d5f3da31SBarry Smith if (A->factortype && bs==1){ 126382799104SHong Zhang for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k]; 126482799104SHong Zhang } else { 126549b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 126649b5e25fSSatish Balay } 126749b5e25fSSatish Balay } 126882799104SHong Zhang } 126982799104SHong Zhang 12701ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 127149b5e25fSSatish Balay PetscFunctionReturn(0); 127249b5e25fSSatish Balay } 127349b5e25fSSatish Balay 12744a2ae208SSatish Balay #undef __FUNCT__ 12754a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 1276dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 127749b5e25fSSatish Balay { 127849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 12795e90f9d9SHong Zhang PetscScalar *l,x,*li,*ri; 128049b5e25fSSatish Balay MatScalar *aa,*v; 1281dfbe8321SBarry Smith PetscErrorCode ierr; 12825e90f9d9SHong Zhang PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1283ace3abfcSBarry Smith PetscBool flg; 128449b5e25fSSatish Balay 128549b5e25fSSatish Balay PetscFunctionBegin; 1286b3bf805bSHong Zhang if (ll != rr){ 1287b3bf805bSHong Zhang ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1288e7e72b3dSBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1289b3bf805bSHong Zhang } 1290b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 129149b5e25fSSatish Balay ai = a->i; 129249b5e25fSSatish Balay aj = a->j; 129349b5e25fSSatish Balay aa = a->a; 1294d0f46423SBarry Smith m = A->rmap->N; 1295d0f46423SBarry Smith bs = A->rmap->bs; 129649b5e25fSSatish Balay mbs = a->mbs; 129749b5e25fSSatish Balay bs2 = a->bs2; 129849b5e25fSSatish Balay 12991ebc52fbSHong Zhang ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 130049b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1301e32f2f54SBarry Smith if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 130249b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 130349b5e25fSSatish Balay M = ai[i+1] - ai[i]; 130449b5e25fSSatish Balay li = l + i*bs; 130549b5e25fSSatish Balay v = aa + bs2*ai[i]; 130649b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 130749b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 13085e90f9d9SHong Zhang for (k=0; k<bs; k++) { 130949b5e25fSSatish Balay x = ri[k]; 131049b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 131149b5e25fSSatish Balay } 131249b5e25fSSatish Balay } 131349b5e25fSSatish Balay } 13141ebc52fbSHong Zhang ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1315dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 131649b5e25fSSatish Balay PetscFunctionReturn(0); 131749b5e25fSSatish Balay } 131849b5e25fSSatish Balay 13194a2ae208SSatish Balay #undef __FUNCT__ 13204a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 1321dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 132249b5e25fSSatish Balay { 132349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 132449b5e25fSSatish Balay 132549b5e25fSSatish Balay PetscFunctionBegin; 132649b5e25fSSatish Balay info->block_size = a->bs2; 1327ceed8ce5SJed Brown info->nz_allocated = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */ 13286c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 132949b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 133049b5e25fSSatish Balay info->assemblies = A->num_ass; 13318e58a170SBarry Smith info->mallocs = A->info.mallocs; 13327adad957SLisandro Dalcin info->memory = ((PetscObject)A)->mem; 1333d5f3da31SBarry Smith if (A->factortype) { 133449b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 133549b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 133649b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 133749b5e25fSSatish Balay } else { 133849b5e25fSSatish Balay info->fill_ratio_given = 0; 133949b5e25fSSatish Balay info->fill_ratio_needed = 0; 134049b5e25fSSatish Balay info->factor_mallocs = 0; 134149b5e25fSSatish Balay } 134249b5e25fSSatish Balay PetscFunctionReturn(0); 134349b5e25fSSatish Balay } 134449b5e25fSSatish Balay 134549b5e25fSSatish Balay 13464a2ae208SSatish Balay #undef __FUNCT__ 13474a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 1348dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A) 134949b5e25fSSatish Balay { 135049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1351dfbe8321SBarry Smith PetscErrorCode ierr; 135249b5e25fSSatish Balay 135349b5e25fSSatish Balay PetscFunctionBegin; 135449b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 135549b5e25fSSatish Balay PetscFunctionReturn(0); 135649b5e25fSSatish Balay } 1357dc354874SHong Zhang 13584a2ae208SSatish Balay #undef __FUNCT__ 1359985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ" 1360985db425SBarry Smith /* 1361985db425SBarry Smith This code does not work since it only checks the upper triangular part of 1362985db425SBarry Smith the matrix. Hence it is not listed in the function table. 1363985db425SBarry Smith */ 1364985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[]) 1365dc354874SHong Zhang { 1366dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1367dfbe8321SBarry Smith PetscErrorCode ierr; 136813f74950SBarry Smith PetscInt i,j,n,row,col,bs,*ai,*aj,mbs; 1369c3fca9a7SHong Zhang PetscReal atmp; 1370273d9f13SBarry Smith MatScalar *aa; 1371985db425SBarry Smith PetscScalar *x; 137213f74950SBarry Smith PetscInt ncols,brow,bcol,krow,kcol; 1373dc354874SHong Zhang 1374dc354874SHong Zhang PetscFunctionBegin; 1375e32f2f54SBarry Smith if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 1376e32f2f54SBarry Smith if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1377d0f46423SBarry Smith bs = A->rmap->bs; 1378dc354874SHong Zhang aa = a->a; 1379dc354874SHong Zhang ai = a->i; 1380dc354874SHong Zhang aj = a->j; 138144117c81SHong Zhang mbs = a->mbs; 1382dc354874SHong Zhang 1383985db425SBarry Smith ierr = VecSet(v,0.0);CHKERRQ(ierr); 13841ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1385dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1386e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 138744117c81SHong Zhang for (i=0; i<mbs; i++) { 1388d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1389d0f6400bSHong Zhang brow = bs*i; 139044117c81SHong Zhang for (j=0; j<ncols; j++){ 1391d0f6400bSHong Zhang bcol = bs*(*aj); 139244117c81SHong Zhang for (kcol=0; kcol<bs; kcol++){ 1393d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 139444117c81SHong Zhang for (krow=0; krow<bs; krow++){ 1395d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1396d0f6400bSHong Zhang row = brow + krow; /* row index */ 1397c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1398c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 139944117c81SHong Zhang } 140044117c81SHong Zhang } 1401d0f6400bSHong Zhang aj++; 1402dc354874SHong Zhang } 1403dc354874SHong Zhang } 14041ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1405dc354874SHong Zhang PetscFunctionReturn(0); 1406dc354874SHong Zhang } 1407