149b5e25fSSatish Balay 2c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 306873bf2SBarry Smith #include <petsc-private/kernels/blockinvert.h> 4c6db04a5SJed Brown #include <petscbt.h> 5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 6c6db04a5SJed Brown #include <petscblaslapack.h> 749b5e25fSSatish Balay 84a2ae208SSatish Balay #undef __FUNCT__ 94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 1149b5e25fSSatish Balay { 125eee224dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 136849ba73SBarry Smith PetscErrorCode ierr; 145d0c19d7SBarry Smith PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2; 155d0c19d7SBarry Smith const PetscInt *idx; 16db41cccfSHong Zhang PetscBT table_out,table_in; 17d94109b8SHong Zhang 18d94109b8SHong Zhang PetscFunctionBegin; 19e32f2f54SBarry Smith if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 20d94109b8SHong Zhang mbs = a->mbs; 21d94109b8SHong Zhang ai = a->i; 22d94109b8SHong Zhang aj = a->j; 23d0f46423SBarry Smith bs = A->rmap->bs; 2453b8de81SBarry Smith ierr = PetscBTCreate(mbs,&table_out);CHKERRQ(ierr); 2513f74950SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 26d0f46423SBarry Smith ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 2753b8de81SBarry Smith ierr = PetscBTCreate(mbs,&table_in);CHKERRQ(ierr); 28d94109b8SHong Zhang 29d94109b8SHong Zhang for (i=0; i<is_max; i++) { /* for each is */ 30d94109b8SHong Zhang isz = 0; 31db41cccfSHong Zhang ierr = PetscBTMemzero(mbs,table_out);CHKERRQ(ierr); 32d94109b8SHong Zhang 33d94109b8SHong Zhang /* Extract the indices, assume there can be duplicate entries */ 34d94109b8SHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 35d94109b8SHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 36d94109b8SHong Zhang 37db41cccfSHong Zhang /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */ 38dbe03f88SHong Zhang bcol_max = 0; 39d94109b8SHong Zhang for (j=0; j<n; ++j) { 40d94109b8SHong Zhang brow = idx[j]/bs; /* convert the indices into block indices */ 41e32f2f54SBarry Smith if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 42db41cccfSHong Zhang if (!PetscBTLookupSet(table_out,brow)) { 43dbe03f88SHong Zhang nidx[isz++] = brow; 44dbe03f88SHong Zhang if (bcol_max < brow) bcol_max = brow; 45dbe03f88SHong Zhang } 46d94109b8SHong Zhang } 47d94109b8SHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 486bf464f9SBarry Smith ierr = ISDestroy(&is[i]);CHKERRQ(ierr); 49d94109b8SHong Zhang 50d94109b8SHong Zhang k = 0; 51d94109b8SHong Zhang for (j=0; j<ov; j++) { /* for each overlap */ 52db41cccfSHong Zhang /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */ 53db41cccfSHong Zhang ierr = PetscBTMemzero(mbs,table_in);CHKERRQ(ierr); 54db41cccfSHong Zhang for (l=k; l<isz; l++) { ierr = PetscBTSet(table_in,nidx[l]);CHKERRQ(ierr); } 55d94109b8SHong Zhang 56d94109b8SHong Zhang n = isz; /* length of the updated is[i] */ 57d94109b8SHong Zhang for (brow=0; brow<mbs; brow++) { 58d94109b8SHong Zhang start = ai[brow]; end = ai[brow+1]; 59db41cccfSHong Zhang if (PetscBTLookup(table_in,brow)) { /* brow is on nidx - row search: collect all bcol in this brow */ 60d94109b8SHong Zhang for (l = start; l<end; l++) { 61d94109b8SHong Zhang bcol = aj[l]; 62d7b97159SHong Zhang if (!PetscBTLookupSet(table_out,bcol)) { 63d7b97159SHong Zhang nidx[isz++] = bcol; 64d7b97159SHong Zhang if (bcol_max < bcol) bcol_max = bcol; 65d7b97159SHong Zhang } 66d94109b8SHong Zhang } 67d94109b8SHong Zhang k++; 68d94109b8SHong Zhang if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */ 69d94109b8SHong Zhang } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */ 70d94109b8SHong Zhang for (l = start; l<end; l++) { 71d94109b8SHong Zhang bcol = aj[l]; 72dbe03f88SHong Zhang if (bcol > bcol_max) break; 73db41cccfSHong Zhang if (PetscBTLookup(table_in,bcol)) { 7426fbe8dcSKarl Rupp if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow; 75d94109b8SHong Zhang break; /* for l = start; l<end ; l++) */ 76d94109b8SHong Zhang } 77d94109b8SHong Zhang } 78d94109b8SHong Zhang } 79d94109b8SHong Zhang } 80d94109b8SHong Zhang } /* for each overlap */ 81d94109b8SHong Zhang 82d94109b8SHong Zhang /* expand the Index Set */ 83d94109b8SHong Zhang for (j=0; j<isz; j++) { 8426fbe8dcSKarl Rupp for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k; 85d94109b8SHong Zhang } 8670b3c8c7SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr); 87d94109b8SHong Zhang } 8894bacf5dSBarry Smith ierr = PetscBTDestroy(&table_out);CHKERRQ(ierr); 89d94109b8SHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 90d94109b8SHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 9194bacf5dSBarry Smith ierr = PetscBTDestroy(&table_in);CHKERRQ(ierr); 925eee224dSHong Zhang PetscFunctionReturn(0); 9349b5e25fSSatish Balay } 9449b5e25fSSatish Balay 954a2ae208SSatish Balay #undef __FUNCT__ 964a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 974aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 9849b5e25fSSatish Balay { 9949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 1006849ba73SBarry Smith PetscErrorCode ierr; 10113f74950SBarry Smith PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens; 10213f74950SBarry Smith PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 1035d0c19d7SBarry Smith PetscInt nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2; 1045d0c19d7SBarry Smith const PetscInt *irow,*aj = a->j,*ai = a->i; 10549b5e25fSSatish Balay MatScalar *mat_a; 10649b5e25fSSatish Balay Mat C; 107ace3abfcSBarry Smith PetscBool flag,sorted; 10849b5e25fSSatish Balay 10949b5e25fSSatish Balay PetscFunctionBegin; 110e32f2f54SBarry Smith if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 11114ca34e6SBarry Smith ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr); 112e32f2f54SBarry Smith if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 11349b5e25fSSatish Balay 11449b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 11549b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 11649b5e25fSSatish Balay 11774ed9c26SBarry Smith ierr = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr); 11874ed9c26SBarry Smith ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 11949b5e25fSSatish Balay ssmap = smap; 12013f74950SBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 12149b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 12249b5e25fSSatish Balay /* determine lens of each row */ 12349b5e25fSSatish Balay for (i=0; i<nrows; i++) { 12449b5e25fSSatish Balay kstart = ai[irow[i]]; 12549b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 12649b5e25fSSatish Balay lens[i] = 0; 12749b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 12826fbe8dcSKarl Rupp if (ssmap[aj[k]]) lens[i]++; 12949b5e25fSSatish Balay } 13049b5e25fSSatish Balay } 13149b5e25fSSatish Balay /* Create and fill new matrix */ 13249b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 13349b5e25fSSatish Balay c = (Mat_SeqSBAIJ*)((*B)->data); 13449b5e25fSSatish Balay 135e32f2f54SBarry Smith if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 13613f74950SBarry Smith ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 137e7e72b3dSBarry Smith if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 13813f74950SBarry Smith ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 13949b5e25fSSatish Balay C = *B; 14049b5e25fSSatish Balay } else { 141ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 142f69a0ea3SMatthew Knepley ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1437adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 144ab93d7beSBarry Smith ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr); 14549b5e25fSSatish Balay } 14649b5e25fSSatish Balay c = (Mat_SeqSBAIJ*)(C->data); 14749b5e25fSSatish Balay for (i=0; i<nrows; i++) { 14849b5e25fSSatish Balay row = irow[i]; 14949b5e25fSSatish Balay kstart = ai[row]; 15049b5e25fSSatish Balay kend = kstart + a->ilen[row]; 15149b5e25fSSatish Balay mat_i = c->i[i]; 15249b5e25fSSatish Balay mat_j = c->j + mat_i; 15349b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 15449b5e25fSSatish Balay mat_ilen = c->ilen + i; 15549b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 15649b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 15749b5e25fSSatish Balay *mat_j++ = tcol - 1; 15849b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 15949b5e25fSSatish Balay mat_a += bs2; 16049b5e25fSSatish Balay (*mat_ilen)++; 16149b5e25fSSatish Balay } 16249b5e25fSSatish Balay } 16349b5e25fSSatish Balay } 16449b5e25fSSatish Balay 16549b5e25fSSatish Balay /* Free work space */ 16649b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 16749b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 16849b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16949b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17049b5e25fSSatish Balay 17149b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 17249b5e25fSSatish Balay *B = C; 17349b5e25fSSatish Balay PetscFunctionReturn(0); 17449b5e25fSSatish Balay } 17549b5e25fSSatish Balay 1764a2ae208SSatish Balay #undef __FUNCT__ 1774a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 1784aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 17949b5e25fSSatish Balay { 18049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 18149b5e25fSSatish Balay IS is1; 1826849ba73SBarry Smith PetscErrorCode ierr; 1835d0c19d7SBarry Smith PetscInt *vary,*iary,nrows,i,bs=A->rmap->bs,count; 1845d0c19d7SBarry Smith const PetscInt *irow; 18549b5e25fSSatish Balay 18649b5e25fSSatish Balay PetscFunctionBegin; 187e32f2f54SBarry Smith if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 18849b5e25fSSatish Balay 18949b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 19049b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 19149b5e25fSSatish Balay 19249b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 19349b5e25fSSatish Balay and form the IS with compressed IS */ 19474ed9c26SBarry Smith ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr); 19574ed9c26SBarry Smith ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr); 19649b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 19749b5e25fSSatish Balay 19849b5e25fSSatish Balay count = 0; 19949b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 200e32f2f54SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks"); 20149b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 20249b5e25fSSatish Balay } 20349b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 20470b3c8c7SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr); 20574ed9c26SBarry Smith ierr = PetscFree2(vary,iary);CHKERRQ(ierr); 20649b5e25fSSatish Balay 2074aa3045dSJed Brown ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr); 2086bf464f9SBarry Smith ierr = ISDestroy(&is1);CHKERRQ(ierr); 20949b5e25fSSatish Balay PetscFunctionReturn(0); 21049b5e25fSSatish Balay } 21149b5e25fSSatish Balay 2124a2ae208SSatish Balay #undef __FUNCT__ 2134a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 21413f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 21549b5e25fSSatish Balay { 2166849ba73SBarry Smith PetscErrorCode ierr; 21713f74950SBarry Smith PetscInt i; 21849b5e25fSSatish Balay 21949b5e25fSSatish Balay PetscFunctionBegin; 22049b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 22182502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 22249b5e25fSSatish Balay } 22349b5e25fSSatish Balay 22449b5e25fSSatish Balay for (i=0; i<n; i++) { 2254aa3045dSJed Brown ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr); 22649b5e25fSSatish Balay } 22749b5e25fSSatish Balay PetscFunctionReturn(0); 22849b5e25fSSatish Balay } 22949b5e25fSSatish Balay 23049b5e25fSSatish Balay /* -------------------------------------------------------*/ 23149b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 23249b5e25fSSatish Balay /* -------------------------------------------------------*/ 23349b5e25fSSatish Balay 2344a2ae208SSatish Balay #undef __FUNCT__ 2354a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 236dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 23749b5e25fSSatish Balay { 23849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 23987828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 24049b5e25fSSatish Balay MatScalar *v; 2416849ba73SBarry Smith PetscErrorCode ierr; 24213f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 24398c9bda7SSatish Balay PetscInt nonzerorow=0; 24449b5e25fSSatish Balay 24549b5e25fSSatish Balay PetscFunctionBegin; 2462dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2471ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2481ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 24949b5e25fSSatish Balay 25049b5e25fSSatish Balay v = a->a; 25149b5e25fSSatish Balay xb = x; 25249b5e25fSSatish Balay 25349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 25449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 25549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 25649b5e25fSSatish Balay ib = aj + *ai; 257831a3094SHong Zhang jmin = 0; 25898c9bda7SSatish Balay nonzerorow += (n>0); 2597fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 26049b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 26149b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 262831a3094SHong Zhang v += 4; jmin++; 2637fbae186SHong Zhang } 264444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 265444d8c10SJed Brown PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 266831a3094SHong Zhang for (j=jmin; j<n; j++) { 26749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 26849b5e25fSSatish Balay cval = ib[j]*2; 26949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 27049b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 27149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 27249b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 27349b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 27449b5e25fSSatish Balay v += 4; 27549b5e25fSSatish Balay } 27649b5e25fSSatish Balay xb +=2; ai++; 27749b5e25fSSatish Balay } 27849b5e25fSSatish Balay 2791ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2801ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 281dc0b31edSSatish Balay ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 28249b5e25fSSatish Balay PetscFunctionReturn(0); 28349b5e25fSSatish Balay } 28449b5e25fSSatish Balay 2854a2ae208SSatish Balay #undef __FUNCT__ 2864a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 287dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 28849b5e25fSSatish Balay { 28949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 29087828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 29149b5e25fSSatish Balay MatScalar *v; 2926849ba73SBarry Smith PetscErrorCode ierr; 29313f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 29498c9bda7SSatish Balay PetscInt nonzerorow=0; 29549b5e25fSSatish Balay 29649b5e25fSSatish Balay PetscFunctionBegin; 2972dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2981ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2991ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 30049b5e25fSSatish Balay 30149b5e25fSSatish Balay v = a->a; 30249b5e25fSSatish Balay xb = x; 30349b5e25fSSatish Balay 30449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 30549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 30649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 30749b5e25fSSatish Balay ib = aj + *ai; 308831a3094SHong Zhang jmin = 0; 30998c9bda7SSatish Balay nonzerorow += (n>0); 3107fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 31149b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 31249b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 31349b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 314831a3094SHong Zhang v += 9; jmin++; 3157fbae186SHong Zhang } 316444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 317444d8c10SJed Brown PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 318831a3094SHong Zhang for (j=jmin; j<n; j++) { 31949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 32049b5e25fSSatish Balay cval = ib[j]*3; 32149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 32249b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 32349b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 32449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 32549b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 32649b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 32749b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 32849b5e25fSSatish Balay v += 9; 32949b5e25fSSatish Balay } 33049b5e25fSSatish Balay xb +=3; ai++; 33149b5e25fSSatish Balay } 33249b5e25fSSatish Balay 3331ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3341ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 335dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 33649b5e25fSSatish Balay PetscFunctionReturn(0); 33749b5e25fSSatish Balay } 33849b5e25fSSatish Balay 3394a2ae208SSatish Balay #undef __FUNCT__ 3404a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 341dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 34249b5e25fSSatish Balay { 34349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 34487828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 34549b5e25fSSatish Balay MatScalar *v; 3466849ba73SBarry Smith PetscErrorCode ierr; 34713f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 34898c9bda7SSatish Balay PetscInt nonzerorow=0; 34949b5e25fSSatish Balay 35049b5e25fSSatish Balay PetscFunctionBegin; 3512dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 3521ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3531ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 35449b5e25fSSatish Balay 35549b5e25fSSatish Balay v = a->a; 35649b5e25fSSatish Balay xb = x; 35749b5e25fSSatish Balay 35849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 35949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 36049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 36149b5e25fSSatish Balay ib = aj + *ai; 362831a3094SHong Zhang jmin = 0; 36398c9bda7SSatish Balay nonzerorow += (n>0); 3647fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 36549b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 36649b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 36749b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 36849b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 369831a3094SHong Zhang v += 16; jmin++; 3707fbae186SHong Zhang } 371444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 372444d8c10SJed Brown PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 373831a3094SHong Zhang for (j=jmin; j<n; j++) { 37449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 37549b5e25fSSatish Balay cval = ib[j]*4; 37649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 37749b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 37849b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 37949b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 38049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 38149b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 38249b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 38349b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 38449b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 38549b5e25fSSatish Balay v += 16; 38649b5e25fSSatish Balay } 38749b5e25fSSatish Balay xb +=4; ai++; 38849b5e25fSSatish Balay } 38949b5e25fSSatish Balay 3901ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3911ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 392dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 39349b5e25fSSatish Balay PetscFunctionReturn(0); 39449b5e25fSSatish Balay } 39549b5e25fSSatish Balay 3964a2ae208SSatish Balay #undef __FUNCT__ 3974a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 398dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 39949b5e25fSSatish Balay { 40049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 40187828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 40249b5e25fSSatish Balay MatScalar *v; 4036849ba73SBarry Smith PetscErrorCode ierr; 40413f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 40598c9bda7SSatish Balay PetscInt nonzerorow=0; 40649b5e25fSSatish Balay 40749b5e25fSSatish Balay PetscFunctionBegin; 4082dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4091ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4101ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 41149b5e25fSSatish Balay 41249b5e25fSSatish Balay v = a->a; 41349b5e25fSSatish Balay xb = x; 41449b5e25fSSatish Balay 41549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 41649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 41749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 41849b5e25fSSatish Balay ib = aj + *ai; 419831a3094SHong Zhang jmin = 0; 42098c9bda7SSatish Balay nonzerorow += (n>0); 4217fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 42249b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 42349b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 42449b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 42549b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 42649b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 427831a3094SHong Zhang v += 25; jmin++; 4287fbae186SHong Zhang } 429444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 430444d8c10SJed Brown PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 431831a3094SHong Zhang for (j=jmin; j<n; j++) { 43249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 43349b5e25fSSatish Balay cval = ib[j]*5; 43449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 43549b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 43649b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 43749b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 43849b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 43949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 44049b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 44149b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 44249b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 44349b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 44449b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 44549b5e25fSSatish Balay v += 25; 44649b5e25fSSatish Balay } 44749b5e25fSSatish Balay xb +=5; ai++; 44849b5e25fSSatish Balay } 44949b5e25fSSatish Balay 4501ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4511ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 452dc0b31edSSatish Balay ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 45349b5e25fSSatish Balay PetscFunctionReturn(0); 45449b5e25fSSatish Balay } 45549b5e25fSSatish Balay 45649b5e25fSSatish Balay 4574a2ae208SSatish Balay #undef __FUNCT__ 4584a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 459dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 46049b5e25fSSatish Balay { 46149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 46287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 46349b5e25fSSatish Balay MatScalar *v; 4646849ba73SBarry Smith PetscErrorCode ierr; 46513f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 46698c9bda7SSatish Balay PetscInt nonzerorow=0; 46749b5e25fSSatish Balay 46849b5e25fSSatish Balay PetscFunctionBegin; 4692dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4701ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4711ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 47249b5e25fSSatish Balay 47349b5e25fSSatish Balay v = a->a; 47449b5e25fSSatish Balay xb = x; 47549b5e25fSSatish Balay 47649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 47749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 47849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 47949b5e25fSSatish Balay ib = aj + *ai; 480831a3094SHong Zhang jmin = 0; 48198c9bda7SSatish Balay nonzerorow += (n>0); 4827fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 48349b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 48449b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 48549b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 48649b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 48749b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 48849b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 489831a3094SHong Zhang v += 36; jmin++; 4907fbae186SHong Zhang } 491444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 492444d8c10SJed Brown PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 493831a3094SHong Zhang for (j=jmin; j<n; j++) { 49449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 49549b5e25fSSatish Balay cval = ib[j]*6; 49649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 49749b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 49849b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 49949b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 50049b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 50149b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 50249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 50349b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 50449b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 50549b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 50649b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 50749b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 50849b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 50949b5e25fSSatish Balay v += 36; 51049b5e25fSSatish Balay } 51149b5e25fSSatish Balay xb +=6; ai++; 51249b5e25fSSatish Balay } 51349b5e25fSSatish Balay 5141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5151ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 516dc0b31edSSatish Balay ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 51749b5e25fSSatish Balay PetscFunctionReturn(0); 51849b5e25fSSatish Balay } 5194a2ae208SSatish Balay #undef __FUNCT__ 5204a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 521dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 52249b5e25fSSatish Balay { 52349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 52487828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 52549b5e25fSSatish Balay MatScalar *v; 5266849ba73SBarry Smith PetscErrorCode ierr; 52713f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 52898c9bda7SSatish Balay PetscInt nonzerorow=0; 52949b5e25fSSatish Balay 53049b5e25fSSatish Balay PetscFunctionBegin; 5312dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 5321ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5331ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 53449b5e25fSSatish Balay 53549b5e25fSSatish Balay v = a->a; 53649b5e25fSSatish Balay xb = x; 53749b5e25fSSatish Balay 53849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 53949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 54049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 54149b5e25fSSatish Balay ib = aj + *ai; 542831a3094SHong Zhang jmin = 0; 54398c9bda7SSatish Balay nonzerorow += (n>0); 5447fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 54549b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 54649b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 54749b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 54849b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 54949b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 55049b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 55149b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 552831a3094SHong Zhang v += 49; jmin++; 5537fbae186SHong Zhang } 554444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 555444d8c10SJed Brown PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 556831a3094SHong Zhang for (j=jmin; j<n; j++) { 55749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 55849b5e25fSSatish Balay cval = ib[j]*7; 55949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 56049b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 56149b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 56249b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 56349b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 56449b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 56549b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 56649b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 56749b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 56849b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 56949b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 57049b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 57149b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 57249b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 57349b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 57449b5e25fSSatish Balay v += 49; 57549b5e25fSSatish Balay } 57649b5e25fSSatish Balay xb +=7; ai++; 57749b5e25fSSatish Balay } 5781ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5791ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 580dc0b31edSSatish Balay ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); 58149b5e25fSSatish Balay PetscFunctionReturn(0); 58249b5e25fSSatish Balay } 58349b5e25fSSatish Balay 58449b5e25fSSatish Balay /* 58549b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 58649b5e25fSSatish Balay */ 5874a2ae208SSatish Balay #undef __FUNCT__ 5884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 589dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 59049b5e25fSSatish Balay { 59149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59287828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 5930b60a74dSHong Zhang MatScalar *v; 594dfbe8321SBarry Smith PetscErrorCode ierr; 595d0f46423SBarry Smith PetscInt mbs =a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; 59698c9bda7SSatish Balay PetscInt nonzerorow=0; 59749b5e25fSSatish Balay 59849b5e25fSSatish Balay PetscFunctionBegin; 5992dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 6001ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 6011ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 60249b5e25fSSatish Balay 60349b5e25fSSatish Balay aj = a->j; 60449b5e25fSSatish Balay v = a->a; 60549b5e25fSSatish Balay ii = a->i; 60649b5e25fSSatish Balay 60749b5e25fSSatish Balay if (!a->mult_work) { 608d0f46423SBarry Smith ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 60949b5e25fSSatish Balay } 61049b5e25fSSatish Balay work = a->mult_work; 61149b5e25fSSatish Balay 61249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 61349b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 61449b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 61598c9bda7SSatish Balay nonzerorow += (n>0); 61649b5e25fSSatish Balay 61749b5e25fSSatish Balay /* upper triangular part */ 61849b5e25fSSatish Balay for (j=0; j<n; j++) { 61949b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 62049b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 62149b5e25fSSatish Balay workt += bs; 62249b5e25fSSatish Balay } 62349b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 62496b95a6bSBarry Smith PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 62549b5e25fSSatish Balay 62649b5e25fSSatish Balay /* strict lower triangular part */ 627831a3094SHong Zhang idx = aj+ii[0]; 628831a3094SHong Zhang if (*idx == i) { 62996b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 630831a3094SHong Zhang } 63196b9376eSHong Zhang 63249b5e25fSSatish Balay if (ncols > 0) { 63349b5e25fSSatish Balay workt = work; 63487828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 63596b95a6bSBarry Smith PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 636831a3094SHong Zhang for (j=0; j<n; j++) { 637831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 63849b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k]; 63949b5e25fSSatish Balay workt += bs; 64049b5e25fSSatish Balay } 64149b5e25fSSatish Balay } 64249b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 64349b5e25fSSatish Balay } 64449b5e25fSSatish Balay 6451ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6461ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 647dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr); 64849b5e25fSSatish Balay PetscFunctionReturn(0); 64949b5e25fSSatish Balay } 65049b5e25fSSatish Balay 6514a2ae208SSatish Balay #undef __FUNCT__ 6524a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 653dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 65449b5e25fSSatish Balay { 65549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 656bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1; 65749b5e25fSSatish Balay MatScalar *v; 6586849ba73SBarry Smith PetscErrorCode ierr; 65913f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 66098c9bda7SSatish Balay PetscInt nonzerorow=0; 66149b5e25fSSatish Balay 66249b5e25fSSatish Balay PetscFunctionBegin; 663343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 6641ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6651ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 66649b5e25fSSatish Balay v = a->a; 66749b5e25fSSatish Balay xb = x; 66849b5e25fSSatish Balay 66949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 67049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 67149b5e25fSSatish Balay x1 = xb[0]; 67249b5e25fSSatish Balay ib = aj + *ai; 673831a3094SHong Zhang jmin = 0; 67498c9bda7SSatish Balay nonzerorow += (n>0); 675831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 676831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 677831a3094SHong Zhang } 678831a3094SHong Zhang for (j=jmin; j<n; j++) { 67949b5e25fSSatish Balay cval = *ib; 68049b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 68149b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 68249b5e25fSSatish Balay } 68349b5e25fSSatish Balay xb++; ai++; 68449b5e25fSSatish Balay } 68549b5e25fSSatish Balay 6861ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 68849b5e25fSSatish Balay 689dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 69049b5e25fSSatish Balay PetscFunctionReturn(0); 69149b5e25fSSatish Balay } 69249b5e25fSSatish Balay 6934a2ae208SSatish Balay #undef __FUNCT__ 6944a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 695dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 69649b5e25fSSatish Balay { 69749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 698bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2; 69949b5e25fSSatish Balay MatScalar *v; 7006849ba73SBarry Smith PetscErrorCode ierr; 70113f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 70298c9bda7SSatish Balay PetscInt nonzerorow=0; 70349b5e25fSSatish Balay 70449b5e25fSSatish Balay PetscFunctionBegin; 705343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 7061ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7071ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 70849b5e25fSSatish Balay 70949b5e25fSSatish Balay v = a->a; 71049b5e25fSSatish Balay xb = x; 71149b5e25fSSatish Balay 71249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 71349b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 71449b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 71549b5e25fSSatish Balay ib = aj + *ai; 716831a3094SHong Zhang jmin = 0; 71798c9bda7SSatish Balay nonzerorow += (n>0); 7187fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 71949b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 72049b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 721831a3094SHong Zhang v += 4; jmin++; 7227fbae186SHong Zhang } 723444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 724444d8c10SJed Brown PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 725831a3094SHong Zhang for (j=jmin; j<n; j++) { 72649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 72749b5e25fSSatish Balay cval = ib[j]*2; 72849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 72949b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 73049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 73149b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 73249b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 73349b5e25fSSatish Balay v += 4; 73449b5e25fSSatish Balay } 73549b5e25fSSatish Balay xb +=2; ai++; 73649b5e25fSSatish Balay } 7371ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 738bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 73949b5e25fSSatish Balay 740dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 74149b5e25fSSatish Balay PetscFunctionReturn(0); 74249b5e25fSSatish Balay } 74349b5e25fSSatish Balay 7444a2ae208SSatish Balay #undef __FUNCT__ 7454a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 746dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 74749b5e25fSSatish Balay { 74849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 749bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3; 75049b5e25fSSatish Balay MatScalar *v; 7516849ba73SBarry Smith PetscErrorCode ierr; 75213f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 75398c9bda7SSatish Balay PetscInt nonzerorow=0; 75449b5e25fSSatish Balay 75549b5e25fSSatish Balay PetscFunctionBegin; 756343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 7571ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7581ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 75949b5e25fSSatish Balay 76049b5e25fSSatish Balay v = a->a; 76149b5e25fSSatish Balay xb = x; 76249b5e25fSSatish Balay 76349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 76449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 76549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 76649b5e25fSSatish Balay ib = aj + *ai; 767831a3094SHong Zhang jmin = 0; 76898c9bda7SSatish Balay nonzerorow += (n>0); 7697fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 77049b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 77149b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 77249b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 773831a3094SHong Zhang v += 9; jmin++; 7747fbae186SHong Zhang } 775444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 776444d8c10SJed Brown PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 777831a3094SHong Zhang for (j=jmin; j<n; j++) { 77849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 77949b5e25fSSatish Balay cval = ib[j]*3; 78049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 78149b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 78249b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 78349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 78449b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 78549b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 78649b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 78749b5e25fSSatish Balay v += 9; 78849b5e25fSSatish Balay } 78949b5e25fSSatish Balay xb +=3; ai++; 79049b5e25fSSatish Balay } 79149b5e25fSSatish Balay 7921ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 793bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 79449b5e25fSSatish Balay 795dc0b31edSSatish Balay ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 79649b5e25fSSatish Balay PetscFunctionReturn(0); 79749b5e25fSSatish Balay } 79849b5e25fSSatish Balay 7994a2ae208SSatish Balay #undef __FUNCT__ 8004a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 801dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 80249b5e25fSSatish Balay { 80349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 804bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4; 80549b5e25fSSatish Balay MatScalar *v; 8066849ba73SBarry Smith PetscErrorCode ierr; 80713f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 80898c9bda7SSatish Balay PetscInt nonzerorow=0; 80949b5e25fSSatish Balay 81049b5e25fSSatish Balay PetscFunctionBegin; 811343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 8121ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8131ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 81449b5e25fSSatish Balay 81549b5e25fSSatish Balay v = a->a; 81649b5e25fSSatish Balay xb = x; 81749b5e25fSSatish Balay 81849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 81949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 82049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 82149b5e25fSSatish Balay ib = aj + *ai; 822831a3094SHong Zhang jmin = 0; 82398c9bda7SSatish Balay nonzerorow += (n>0); 8247fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 82549b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 82649b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 82749b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 82849b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 829831a3094SHong Zhang v += 16; jmin++; 8307fbae186SHong Zhang } 831444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 832444d8c10SJed Brown PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 833831a3094SHong Zhang for (j=jmin; j<n; j++) { 83449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 83549b5e25fSSatish Balay cval = ib[j]*4; 83649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 83749b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 83849b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 83949b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 84049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 84149b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 84249b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 84349b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 84449b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 84549b5e25fSSatish Balay v += 16; 84649b5e25fSSatish Balay } 84749b5e25fSSatish Balay xb +=4; ai++; 84849b5e25fSSatish Balay } 84949b5e25fSSatish Balay 8501ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 851bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 85249b5e25fSSatish Balay 853dc0b31edSSatish Balay ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 85449b5e25fSSatish Balay PetscFunctionReturn(0); 85549b5e25fSSatish Balay } 85649b5e25fSSatish Balay 8574a2ae208SSatish Balay #undef __FUNCT__ 8584a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 859dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 86049b5e25fSSatish Balay { 86149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 862bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5; 86349b5e25fSSatish Balay MatScalar *v; 8646849ba73SBarry Smith PetscErrorCode ierr; 86513f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 86698c9bda7SSatish Balay PetscInt nonzerorow=0; 86749b5e25fSSatish Balay 86849b5e25fSSatish Balay PetscFunctionBegin; 869343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 8701ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8711ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 87249b5e25fSSatish Balay 87349b5e25fSSatish Balay v = a->a; 87449b5e25fSSatish Balay xb = x; 87549b5e25fSSatish Balay 87649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 87749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 87849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 87949b5e25fSSatish Balay ib = aj + *ai; 880831a3094SHong Zhang jmin = 0; 88198c9bda7SSatish Balay nonzerorow += (n>0); 8827fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 88349b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 88449b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 88549b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 88649b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 88749b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 888831a3094SHong Zhang v += 25; jmin++; 8897fbae186SHong Zhang } 890444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 891444d8c10SJed Brown PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 892831a3094SHong Zhang for (j=jmin; j<n; j++) { 89349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 89449b5e25fSSatish Balay cval = ib[j]*5; 89549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 89649b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 89749b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 89849b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 89949b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 90049b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 90149b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 90249b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 90349b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 90449b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 90549b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 90649b5e25fSSatish Balay v += 25; 90749b5e25fSSatish Balay } 90849b5e25fSSatish Balay xb +=5; ai++; 90949b5e25fSSatish Balay } 91049b5e25fSSatish Balay 9111ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 912bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 91349b5e25fSSatish Balay 914dc0b31edSSatish Balay ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 91549b5e25fSSatish Balay PetscFunctionReturn(0); 91649b5e25fSSatish Balay } 9174a2ae208SSatish Balay #undef __FUNCT__ 9184a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 919dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 92049b5e25fSSatish Balay { 92149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 922bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6; 92349b5e25fSSatish Balay MatScalar *v; 9246849ba73SBarry Smith PetscErrorCode ierr; 92513f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 92698c9bda7SSatish Balay PetscInt nonzerorow=0; 92749b5e25fSSatish Balay 92849b5e25fSSatish Balay PetscFunctionBegin; 929343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 9301ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9311ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 93249b5e25fSSatish Balay 93349b5e25fSSatish Balay v = a->a; 93449b5e25fSSatish Balay xb = x; 93549b5e25fSSatish Balay 93649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 93749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 93849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 93949b5e25fSSatish Balay ib = aj + *ai; 940831a3094SHong Zhang jmin = 0; 94198c9bda7SSatish Balay nonzerorow += (n>0); 9427fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 94349b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 94449b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 94549b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 94649b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 94749b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 94849b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 949831a3094SHong Zhang v += 36; jmin++; 9507fbae186SHong Zhang } 951444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 952444d8c10SJed Brown PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 953831a3094SHong Zhang for (j=jmin; j<n; j++) { 95449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 95549b5e25fSSatish Balay cval = ib[j]*6; 95649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 95749b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 95849b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 95949b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 96049b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 96149b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 96249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 96349b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 96449b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 96549b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 96649b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 96749b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 96849b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 96949b5e25fSSatish Balay v += 36; 97049b5e25fSSatish Balay } 97149b5e25fSSatish Balay xb +=6; ai++; 97249b5e25fSSatish Balay } 97349b5e25fSSatish Balay 9741ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 975bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 97649b5e25fSSatish Balay 977dc0b31edSSatish Balay ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 97849b5e25fSSatish Balay PetscFunctionReturn(0); 97949b5e25fSSatish Balay } 98049b5e25fSSatish Balay 9814a2ae208SSatish Balay #undef __FUNCT__ 9824a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 983dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 98449b5e25fSSatish Balay { 98549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 986bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 98749b5e25fSSatish Balay MatScalar *v; 9886849ba73SBarry Smith PetscErrorCode ierr; 98913f74950SBarry Smith PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 99098c9bda7SSatish Balay PetscInt nonzerorow=0; 99149b5e25fSSatish Balay 99249b5e25fSSatish Balay PetscFunctionBegin; 993343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 9941ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9951ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 99649b5e25fSSatish Balay 99749b5e25fSSatish Balay v = a->a; 99849b5e25fSSatish Balay xb = x; 99949b5e25fSSatish Balay 100049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 100149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 100249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 100349b5e25fSSatish Balay ib = aj + *ai; 1004831a3094SHong Zhang jmin = 0; 100598c9bda7SSatish Balay nonzerorow += (n>0); 10067fbae186SHong Zhang if (*ib == i) { /* (diag of A)*x */ 100749b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 100849b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 100949b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 101049b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 101149b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 101249b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 101349b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1014831a3094SHong Zhang v += 49; jmin++; 10157fbae186SHong Zhang } 1016444d8c10SJed Brown PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ 1017444d8c10SJed Brown PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ 1018831a3094SHong Zhang for (j=jmin; j<n; j++) { 101949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 102049b5e25fSSatish Balay cval = ib[j]*7; 102149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 102249b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 102349b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 102449b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 102549b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 102649b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 102749b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 102849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 102949b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 103049b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 103149b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 103249b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 103349b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 103449b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 103549b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 103649b5e25fSSatish Balay v += 49; 103749b5e25fSSatish Balay } 103849b5e25fSSatish Balay xb +=7; ai++; 103949b5e25fSSatish Balay } 104049b5e25fSSatish Balay 10411ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1042bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 104349b5e25fSSatish Balay 1044dc0b31edSSatish Balay ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 104549b5e25fSSatish Balay PetscFunctionReturn(0); 104649b5e25fSSatish Balay } 104749b5e25fSSatish Balay 10484a2ae208SSatish Balay #undef __FUNCT__ 10494a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 1050dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 105149b5e25fSSatish Balay { 105249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1053bba805e6SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1054066653e3SSatish Balay MatScalar *v; 1055dfbe8321SBarry Smith PetscErrorCode ierr; 1056d0f46423SBarry Smith PetscInt mbs =a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; 105798c9bda7SSatish Balay PetscInt nonzerorow=0; 105849b5e25fSSatish Balay 105949b5e25fSSatish Balay PetscFunctionBegin; 1060343551c4SBarry Smith ierr = VecCopy(yy,zz);CHKERRQ(ierr); 10611ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 10621ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 106349b5e25fSSatish Balay 106449b5e25fSSatish Balay aj = a->j; 106549b5e25fSSatish Balay v = a->a; 106649b5e25fSSatish Balay ii = a->i; 106749b5e25fSSatish Balay 106849b5e25fSSatish Balay if (!a->mult_work) { 1069d0f46423SBarry Smith ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 107049b5e25fSSatish Balay } 107149b5e25fSSatish Balay work = a->mult_work; 107249b5e25fSSatish Balay 107349b5e25fSSatish Balay 107449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 107549b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 107649b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 107798c9bda7SSatish Balay nonzerorow += (n>0); 107849b5e25fSSatish Balay 107949b5e25fSSatish Balay /* upper triangular part */ 108049b5e25fSSatish Balay for (j=0; j<n; j++) { 108149b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 108249b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 108349b5e25fSSatish Balay workt += bs; 108449b5e25fSSatish Balay } 108549b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 108696b95a6bSBarry Smith PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 108749b5e25fSSatish Balay 108849b5e25fSSatish Balay /* strict lower triangular part */ 1089831a3094SHong Zhang idx = aj+ii[0]; 1090831a3094SHong Zhang if (*idx == i) { 109196b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1092831a3094SHong Zhang } 109349b5e25fSSatish Balay if (ncols > 0) { 109449b5e25fSSatish Balay workt = work; 109587828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 109696b95a6bSBarry Smith PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1097831a3094SHong Zhang for (j=0; j<n; j++) { 1098831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 109949b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k]; 110049b5e25fSSatish Balay workt += bs; 110149b5e25fSSatish Balay } 110249b5e25fSSatish Balay } 110349b5e25fSSatish Balay 110449b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 110549b5e25fSSatish Balay } 110649b5e25fSSatish Balay 11071ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1108bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 110949b5e25fSSatish Balay 1110dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr); 111149b5e25fSSatish Balay PetscFunctionReturn(0); 111249b5e25fSSatish Balay } 111349b5e25fSSatish Balay 11144a2ae208SSatish Balay #undef __FUNCT__ 11154a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1116f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha) 111749b5e25fSSatish Balay { 111849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1119f4df32b1SMatthew Knepley PetscScalar oalpha = alpha; 1120efee365bSSatish Balay PetscErrorCode ierr; 1121c5df96a5SBarry Smith PetscBLASInt one = 1,totalnz; 112249b5e25fSSatish Balay 112349b5e25fSSatish Balay PetscFunctionBegin; 1124c5df96a5SBarry Smith ierr = PetscBLASIntCast(a->bs2*a->nz,&totalnz);CHKERRQ(ierr); 1125*8b83055fSJed Brown PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one)); 1126efee365bSSatish Balay ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 112749b5e25fSSatish Balay PetscFunctionReturn(0); 112849b5e25fSSatish Balay } 112949b5e25fSSatish Balay 11304a2ae208SSatish Balay #undef __FUNCT__ 11314a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 1132dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 113349b5e25fSSatish Balay { 113449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 113549b5e25fSSatish Balay MatScalar *v = a->a; 113649b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1137d0f46423SBarry Smith PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 11386849ba73SBarry Smith PetscErrorCode ierr; 113913f74950SBarry Smith PetscInt *jl,*il,jmin,jmax,nexti,ik,*col; 114049b5e25fSSatish Balay 114149b5e25fSSatish Balay PetscFunctionBegin; 114249b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 114349b5e25fSSatish Balay for (k=0; k<mbs; k++) { 114449b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1145831a3094SHong Zhang col = aj + jmin; 1146831a3094SHong Zhang if (*col == k) { /* diagonal block */ 114749b5e25fSSatish Balay for (i=0; i<bs2; i++) { 114849b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 114949b5e25fSSatish Balay } 1150831a3094SHong Zhang jmin++; 1151831a3094SHong Zhang } 1152831a3094SHong Zhang for (j=jmin; j<jmax; j++) { /* off-diagonal blocks */ 115349b5e25fSSatish Balay for (i=0; i<bs2; i++) { 115449b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 115549b5e25fSSatish Balay } 115649b5e25fSSatish Balay } 115749b5e25fSSatish Balay } 11588f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum_diag + 2*sum_off); 11590b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */ 116074ed9c26SBarry Smith ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 11610b8dc8d2SHong Zhang for (i=0; i<mbs; i++) jl[i] = mbs; 11620b8dc8d2SHong Zhang il[0] = 0; 116349b5e25fSSatish Balay 116449b5e25fSSatish Balay *norm = 0.0; 116549b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 116649b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 116749b5e25fSSatish Balay /*-- col sum --*/ 116849b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1169831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1170831a3094SHong Zhang at step k */ 117149b5e25fSSatish Balay while (i<mbs) { 117249b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 117349b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 117449b5e25fSSatish Balay for (j=0; j<bs; j++) { 117549b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 117649b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 117749b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 117849b5e25fSSatish Balay } 117949b5e25fSSatish Balay } 118049b5e25fSSatish Balay /* update il, jl */ 1181831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1182831a3094SHong Zhang jmax = a->i[i+1]; 118349b5e25fSSatish Balay if (jmin < jmax) { 118449b5e25fSSatish Balay il[i] = jmin; 118549b5e25fSSatish Balay j = a->j[jmin]; 118649b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 118749b5e25fSSatish Balay } 118849b5e25fSSatish Balay i = nexti; 118949b5e25fSSatish Balay } 119049b5e25fSSatish Balay /*-- row sum --*/ 119149b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 119249b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 119349b5e25fSSatish Balay for (j=0; j<bs; j++) { 119449b5e25fSSatish Balay v = a->a + i*bs2 + j; 119549b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 11960b8dc8d2SHong Zhang sum[j] += PetscAbsScalar(*v); v += bs; 119749b5e25fSSatish Balay } 119849b5e25fSSatish Balay } 119949b5e25fSSatish Balay } 120049b5e25fSSatish Balay /* add k_th block row to il, jl */ 1201831a3094SHong Zhang col = aj+jmin; 1202831a3094SHong Zhang if (*col == k) jmin++; 120349b5e25fSSatish Balay if (jmin < jmax) { 120449b5e25fSSatish Balay il[k] = jmin; 12050b8dc8d2SHong Zhang j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k; 120649b5e25fSSatish Balay } 120749b5e25fSSatish Balay for (j=0; j<bs; j++) { 120849b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 120949b5e25fSSatish Balay } 121049b5e25fSSatish Balay } 121174ed9c26SBarry Smith ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr); 1212f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 121349b5e25fSSatish Balay PetscFunctionReturn(0); 121449b5e25fSSatish Balay } 121549b5e25fSSatish Balay 12164a2ae208SSatish Balay #undef __FUNCT__ 12174a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 1218ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg) 121949b5e25fSSatish Balay { 122049b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data; 1221dfbe8321SBarry Smith PetscErrorCode ierr; 122249b5e25fSSatish Balay 122349b5e25fSSatish Balay PetscFunctionBegin; 122449b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1225d0f46423SBarry Smith if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { 1226ef511fbeSHong Zhang *flg = PETSC_FALSE; 1227ef511fbeSHong Zhang PetscFunctionReturn(0); 122849b5e25fSSatish Balay } 122949b5e25fSSatish Balay 123049b5e25fSSatish Balay /* if the a->i are the same */ 123113f74950SBarry Smith ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 123226fbe8dcSKarl Rupp if (!*flg) PetscFunctionReturn(0); 123349b5e25fSSatish Balay 123449b5e25fSSatish Balay /* if a->j are the same */ 123513f74950SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 123626fbe8dcSKarl Rupp if (!*flg) PetscFunctionReturn(0); 123726fbe8dcSKarl Rupp 123849b5e25fSSatish Balay /* if a->a are the same */ 1239d0f46423SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1240935af2e7SHong Zhang PetscFunctionReturn(0); 124149b5e25fSSatish Balay } 124249b5e25fSSatish Balay 12434a2ae208SSatish Balay #undef __FUNCT__ 12444a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 1245dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 124649b5e25fSSatish Balay { 124749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1248dfbe8321SBarry Smith PetscErrorCode ierr; 12498a0c6efdSHong Zhang PetscInt i,j,k,row,bs,*ai,*aj,ambs,bs2; 125087828ca2SBarry Smith PetscScalar *x,zero = 0.0; 125149b5e25fSSatish Balay MatScalar *aa,*aa_j; 125249b5e25fSSatish Balay 125349b5e25fSSatish Balay PetscFunctionBegin; 1254d0f46423SBarry Smith bs = A->rmap->bs; 1255e32f2f54SBarry Smith if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 125682799104SHong Zhang 125749b5e25fSSatish Balay aa = a->a; 12588a0c6efdSHong Zhang ambs = a->mbs; 12598a0c6efdSHong Zhang 12608a0c6efdSHong Zhang if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) { 12618a0c6efdSHong Zhang PetscInt *diag=a->diag; 12628a0c6efdSHong Zhang aa = a->a; 12638a0c6efdSHong Zhang ambs = a->mbs; 12648a0c6efdSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 12658a0c6efdSHong Zhang for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]]; 12668a0c6efdSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 12678a0c6efdSHong Zhang PetscFunctionReturn(0); 12688a0c6efdSHong Zhang } 12698a0c6efdSHong Zhang 127049b5e25fSSatish Balay ai = a->i; 127149b5e25fSSatish Balay aj = a->j; 127249b5e25fSSatish Balay bs2 = a->bs2; 12732dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 12741ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 127549b5e25fSSatish Balay for (i=0; i<ambs; i++) { 127649b5e25fSSatish Balay j=ai[i]; 127749b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 127849b5e25fSSatish Balay row = i*bs; 127949b5e25fSSatish Balay aa_j = aa + j*bs2; 128049b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 128149b5e25fSSatish Balay } 128249b5e25fSSatish Balay } 12831ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 128449b5e25fSSatish Balay PetscFunctionReturn(0); 128549b5e25fSSatish Balay } 128649b5e25fSSatish Balay 12874a2ae208SSatish Balay #undef __FUNCT__ 12884a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 1289dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 129049b5e25fSSatish Balay { 129149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 12925e90f9d9SHong Zhang PetscScalar *l,x,*li,*ri; 129349b5e25fSSatish Balay MatScalar *aa,*v; 1294dfbe8321SBarry Smith PetscErrorCode ierr; 12955e90f9d9SHong Zhang PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1296ace3abfcSBarry Smith PetscBool flg; 129749b5e25fSSatish Balay 129849b5e25fSSatish Balay PetscFunctionBegin; 1299b3bf805bSHong Zhang if (ll != rr) { 1300b3bf805bSHong Zhang ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1301e7e72b3dSBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1302b3bf805bSHong Zhang } 1303b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 130449b5e25fSSatish Balay ai = a->i; 130549b5e25fSSatish Balay aj = a->j; 130649b5e25fSSatish Balay aa = a->a; 1307d0f46423SBarry Smith m = A->rmap->N; 1308d0f46423SBarry Smith bs = A->rmap->bs; 130949b5e25fSSatish Balay mbs = a->mbs; 131049b5e25fSSatish Balay bs2 = a->bs2; 131149b5e25fSSatish Balay 13121ebc52fbSHong Zhang ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 131349b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1314e32f2f54SBarry Smith if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 131549b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 131649b5e25fSSatish Balay M = ai[i+1] - ai[i]; 131749b5e25fSSatish Balay li = l + i*bs; 131849b5e25fSSatish Balay v = aa + bs2*ai[i]; 131949b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 132049b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 13215e90f9d9SHong Zhang for (k=0; k<bs; k++) { 132249b5e25fSSatish Balay x = ri[k]; 132349b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 132449b5e25fSSatish Balay } 132549b5e25fSSatish Balay } 132649b5e25fSSatish Balay } 13271ebc52fbSHong Zhang ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1328dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 132949b5e25fSSatish Balay PetscFunctionReturn(0); 133049b5e25fSSatish Balay } 133149b5e25fSSatish Balay 13324a2ae208SSatish Balay #undef __FUNCT__ 13334a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 1334dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 133549b5e25fSSatish Balay { 133649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133749b5e25fSSatish Balay 133849b5e25fSSatish Balay PetscFunctionBegin; 133949b5e25fSSatish Balay info->block_size = a->bs2; 1340ceed8ce5SJed Brown info->nz_allocated = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */ 13416c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 134249b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 134349b5e25fSSatish Balay info->assemblies = A->num_ass; 13448e58a170SBarry Smith info->mallocs = A->info.mallocs; 13457adad957SLisandro Dalcin info->memory = ((PetscObject)A)->mem; 1346d5f3da31SBarry Smith if (A->factortype) { 134749b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 134849b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 134949b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 135049b5e25fSSatish Balay } else { 135149b5e25fSSatish Balay info->fill_ratio_given = 0; 135249b5e25fSSatish Balay info->fill_ratio_needed = 0; 135349b5e25fSSatish Balay info->factor_mallocs = 0; 135449b5e25fSSatish Balay } 135549b5e25fSSatish Balay PetscFunctionReturn(0); 135649b5e25fSSatish Balay } 135749b5e25fSSatish Balay 135849b5e25fSSatish Balay 13594a2ae208SSatish Balay #undef __FUNCT__ 13604a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 1361dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A) 136249b5e25fSSatish Balay { 136349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1364dfbe8321SBarry Smith PetscErrorCode ierr; 136549b5e25fSSatish Balay 136649b5e25fSSatish Balay PetscFunctionBegin; 136749b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 136849b5e25fSSatish Balay PetscFunctionReturn(0); 136949b5e25fSSatish Balay } 1370dc354874SHong Zhang 13714a2ae208SSatish Balay #undef __FUNCT__ 1372985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ" 1373985db425SBarry Smith /* 1374985db425SBarry Smith This code does not work since it only checks the upper triangular part of 1375985db425SBarry Smith the matrix. Hence it is not listed in the function table. 1376985db425SBarry Smith */ 1377985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[]) 1378dc354874SHong Zhang { 1379dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1380dfbe8321SBarry Smith PetscErrorCode ierr; 138113f74950SBarry Smith PetscInt i,j,n,row,col,bs,*ai,*aj,mbs; 1382c3fca9a7SHong Zhang PetscReal atmp; 1383273d9f13SBarry Smith MatScalar *aa; 1384985db425SBarry Smith PetscScalar *x; 138513f74950SBarry Smith PetscInt ncols,brow,bcol,krow,kcol; 1386dc354874SHong Zhang 1387dc354874SHong Zhang PetscFunctionBegin; 1388e32f2f54SBarry Smith if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 1389e32f2f54SBarry Smith if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1390d0f46423SBarry Smith bs = A->rmap->bs; 1391dc354874SHong Zhang aa = a->a; 1392dc354874SHong Zhang ai = a->i; 1393dc354874SHong Zhang aj = a->j; 139444117c81SHong Zhang mbs = a->mbs; 1395dc354874SHong Zhang 1396985db425SBarry Smith ierr = VecSet(v,0.0);CHKERRQ(ierr); 13971ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1398dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1399e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 140044117c81SHong Zhang for (i=0; i<mbs; i++) { 1401d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1402d0f6400bSHong Zhang brow = bs*i; 140344117c81SHong Zhang for (j=0; j<ncols; j++) { 1404d0f6400bSHong Zhang bcol = bs*(*aj); 140544117c81SHong Zhang for (kcol=0; kcol<bs; kcol++) { 1406d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 140744117c81SHong Zhang for (krow=0; krow<bs; krow++) { 1408d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1409d0f6400bSHong Zhang row = brow + krow; /* row index */ 1410c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1411c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 141244117c81SHong Zhang } 141344117c81SHong Zhang } 1414d0f6400bSHong Zhang aj++; 1415dc354874SHong Zhang } 1416dc354874SHong Zhang } 14171ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1418dc354874SHong Zhang PetscFunctionReturn(0); 1419dc354874SHong Zhang } 1420