1be1d678aSKris Buschelman #define PETSCMAT_DLL 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 449b5e25fSSatish Balay #include "src/inline/spops.h" 549b5e25fSSatish Balay #include "src/inline/ilu.h" 649b5e25fSSatish Balay #include "petscbt.h" 73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 849b5e25fSSatish Balay 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 1249b5e25fSSatish Balay { 135eee224dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 146849ba73SBarry Smith PetscErrorCode ierr; 15521d7252SBarry Smith PetscInt brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2; 16521d7252SBarry Smith PetscBT table,table0; 17d94109b8SHong Zhang 18d94109b8SHong Zhang PetscFunctionBegin; 19b3bf805bSHong Zhang if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 20d94109b8SHong Zhang mbs = a->mbs; 21d94109b8SHong Zhang ai = a->i; 22d94109b8SHong Zhang aj = a->j; 23899cda47SBarry Smith bs = A->rmap.bs; 24d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr); 2513f74950SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 26899cda47SBarry Smith ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 27d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr); 28d94109b8SHong Zhang 29d94109b8SHong Zhang for (i=0; i<is_max; i++) { /* for each is */ 30d94109b8SHong Zhang isz = 0; 31d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr); 32d94109b8SHong Zhang 33d94109b8SHong Zhang /* Extract the indices, assume there can be duplicate entries */ 34d94109b8SHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 35d94109b8SHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 36d94109b8SHong Zhang 37d94109b8SHong Zhang /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */ 38dbe03f88SHong Zhang bcol_max = 0; 39d94109b8SHong Zhang for (j=0; j<n ; ++j){ 40d94109b8SHong Zhang brow = idx[j]/bs; /* convert the indices into block indices */ 41d94109b8SHong Zhang if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 42dbe03f88SHong Zhang if(!PetscBTLookupSet(table,brow)) { 43dbe03f88SHong Zhang nidx[isz++] = brow; 44dbe03f88SHong Zhang if (bcol_max < brow) bcol_max = brow; 45dbe03f88SHong Zhang } 46d94109b8SHong Zhang } 47d94109b8SHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 48d94109b8SHong Zhang ierr = ISDestroy(is[i]);CHKERRQ(ierr); 49d94109b8SHong Zhang 50d94109b8SHong Zhang k = 0; 51d94109b8SHong Zhang for (j=0; j<ov; j++){ /* for each overlap */ 52d94109b8SHong Zhang /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */ 53d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr); 54efee365bSSatish Balay for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); } 55d94109b8SHong Zhang 56d94109b8SHong Zhang n = isz; /* length of the updated is[i] */ 57d94109b8SHong Zhang for (brow=0; brow<mbs; brow++){ 58d94109b8SHong Zhang start = ai[brow]; end = ai[brow+1]; 59d94109b8SHong Zhang if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */ 60d94109b8SHong Zhang for (l = start; l<end ; l++){ 61d94109b8SHong Zhang bcol = aj[l]; 62d94109b8SHong Zhang if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;} 63d94109b8SHong Zhang } 64d94109b8SHong Zhang k++; 65d94109b8SHong Zhang if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */ 66d94109b8SHong Zhang } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */ 67d94109b8SHong Zhang for (l = start; l<end ; l++){ 68d94109b8SHong Zhang bcol = aj[l]; 69dbe03f88SHong Zhang if (bcol > bcol_max) break; 70d94109b8SHong Zhang if (PetscBTLookup(table0,bcol)){ 71d94109b8SHong Zhang if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;} 72d94109b8SHong Zhang break; /* for l = start; l<end ; l++) */ 73d94109b8SHong Zhang } 74d94109b8SHong Zhang } 75d94109b8SHong Zhang } 76d94109b8SHong Zhang } 77d94109b8SHong Zhang } /* for each overlap */ 78d94109b8SHong Zhang 79d94109b8SHong Zhang /* expand the Index Set */ 80d94109b8SHong Zhang for (j=0; j<isz; j++) { 81d94109b8SHong Zhang for (k=0; k<bs; k++) 82d94109b8SHong Zhang nidx2[j*bs+k] = nidx[j]*bs+k; 83d94109b8SHong Zhang } 84d94109b8SHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 85d94109b8SHong Zhang } 86d94109b8SHong Zhang ierr = PetscBTDestroy(table);CHKERRQ(ierr); 87d94109b8SHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 88d94109b8SHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 89d94109b8SHong Zhang ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 905eee224dSHong Zhang PetscFunctionReturn(0); 9149b5e25fSSatish Balay } 9249b5e25fSSatish Balay 934a2ae208SSatish Balay #undef __FUNCT__ 944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 9513f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 9649b5e25fSSatish Balay { 9749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 986849ba73SBarry Smith PetscErrorCode ierr; 9913f74950SBarry Smith PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens; 10013f74950SBarry Smith PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 101899cda47SBarry Smith PetscInt *irow,nrows,*ssmap,bs=A->rmap.bs,bs2=a->bs2; 10213f74950SBarry Smith PetscInt *aj = a->j,*ai = a->i; 10349b5e25fSSatish Balay MatScalar *mat_a; 10449b5e25fSSatish Balay Mat C; 10549b5e25fSSatish Balay PetscTruth flag; 10649b5e25fSSatish Balay 10749b5e25fSSatish Balay PetscFunctionBegin; 108e005ede5SBarry Smith if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 10949b5e25fSSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 110347d480fSBarry Smith if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 11149b5e25fSSatish Balay 11249b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 11349b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 11449b5e25fSSatish Balay 11513f74950SBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 11649b5e25fSSatish Balay ssmap = smap; 11713f74950SBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 11813f74950SBarry Smith ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 11949b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 12049b5e25fSSatish Balay /* determine lens of each row */ 12149b5e25fSSatish Balay for (i=0; i<nrows; i++) { 12249b5e25fSSatish Balay kstart = ai[irow[i]]; 12349b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 12449b5e25fSSatish Balay lens[i] = 0; 12549b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 12649b5e25fSSatish Balay if (ssmap[aj[k]]) { 12749b5e25fSSatish Balay lens[i]++; 12849b5e25fSSatish Balay } 12949b5e25fSSatish Balay } 13049b5e25fSSatish Balay } 13149b5e25fSSatish Balay /* Create and fill new matrix */ 13249b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 13349b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 13449b5e25fSSatish Balay 135899cda47SBarry Smith if (c->mbs!=nrows || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 13613f74950SBarry Smith ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 137abc0a331SBarry Smith if (!flag) { 138347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 13949b5e25fSSatish Balay } 14013f74950SBarry Smith ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 14149b5e25fSSatish Balay C = *B; 14249b5e25fSSatish Balay } else { 143*7adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 144f69a0ea3SMatthew Knepley ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 145*7adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 146ab93d7beSBarry Smith ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr); 14749b5e25fSSatish Balay } 14849b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 14949b5e25fSSatish Balay for (i=0; i<nrows; i++) { 15049b5e25fSSatish Balay row = irow[i]; 15149b5e25fSSatish Balay kstart = ai[row]; 15249b5e25fSSatish Balay kend = kstart + a->ilen[row]; 15349b5e25fSSatish Balay mat_i = c->i[i]; 15449b5e25fSSatish Balay mat_j = c->j + mat_i; 15549b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 15649b5e25fSSatish Balay mat_ilen = c->ilen + i; 15749b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 15849b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 15949b5e25fSSatish Balay *mat_j++ = tcol - 1; 16049b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 16149b5e25fSSatish Balay mat_a += bs2; 16249b5e25fSSatish Balay (*mat_ilen)++; 16349b5e25fSSatish Balay } 16449b5e25fSSatish Balay } 16549b5e25fSSatish Balay } 16649b5e25fSSatish Balay 16749b5e25fSSatish Balay /* Free work space */ 16849b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 16949b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 17049b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17149b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17249b5e25fSSatish Balay 17349b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 17449b5e25fSSatish Balay *B = C; 17549b5e25fSSatish Balay PetscFunctionReturn(0); 17649b5e25fSSatish Balay } 17749b5e25fSSatish Balay 1784a2ae208SSatish Balay #undef __FUNCT__ 1794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 18013f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 18149b5e25fSSatish Balay { 18249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 18349b5e25fSSatish Balay IS is1; 1846849ba73SBarry Smith PetscErrorCode ierr; 185899cda47SBarry Smith PetscInt *vary,*iary,*irow,nrows,i,bs=A->rmap.bs,count; 18649b5e25fSSatish Balay 18749b5e25fSSatish Balay PetscFunctionBegin; 188e005ede5SBarry Smith if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 18949b5e25fSSatish Balay 19049b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 19149b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 19249b5e25fSSatish Balay 19349b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 19449b5e25fSSatish Balay and form the IS with compressed IS */ 19513f74950SBarry Smith ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 19649b5e25fSSatish Balay iary = vary + a->mbs; 19713f74950SBarry Smith ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 19849b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 19949b5e25fSSatish Balay 20049b5e25fSSatish Balay count = 0; 20149b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 202e005ede5SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks"); 20349b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 20449b5e25fSSatish Balay } 20549b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 20649b5e25fSSatish Balay 20749b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 20849b5e25fSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 20949b5e25fSSatish Balay 21049b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 21149b5e25fSSatish Balay ISDestroy(is1); 21249b5e25fSSatish Balay PetscFunctionReturn(0); 21349b5e25fSSatish Balay } 21449b5e25fSSatish Balay 2154a2ae208SSatish Balay #undef __FUNCT__ 2164a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 21713f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 21849b5e25fSSatish Balay { 2196849ba73SBarry Smith PetscErrorCode ierr; 22013f74950SBarry Smith PetscInt i; 22149b5e25fSSatish Balay 22249b5e25fSSatish Balay PetscFunctionBegin; 22349b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 22482502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 22549b5e25fSSatish Balay } 22649b5e25fSSatish Balay 22749b5e25fSSatish Balay for (i=0; i<n; i++) { 22849b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 22949b5e25fSSatish Balay } 23049b5e25fSSatish Balay PetscFunctionReturn(0); 23149b5e25fSSatish Balay } 23249b5e25fSSatish Balay 23349b5e25fSSatish Balay /* -------------------------------------------------------*/ 23449b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 23549b5e25fSSatish Balay /* -------------------------------------------------------*/ 236d9eff348SSatish Balay #include "petscblaslapack.h" 23749b5e25fSSatish Balay 2384a2ae208SSatish Balay #undef __FUNCT__ 2394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1" 240dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 24149b5e25fSSatish Balay { 24249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 24387828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,zero=0.0; 24449b5e25fSSatish Balay MatScalar *v; 2456849ba73SBarry Smith PetscErrorCode ierr; 24613f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 24749b5e25fSSatish Balay 24849b5e25fSSatish Balay PetscFunctionBegin; 2492dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2501ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2511ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 25249b5e25fSSatish Balay 25349b5e25fSSatish Balay v = a->a; 25449b5e25fSSatish Balay xb = x; 25549b5e25fSSatish Balay 25649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 25749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 2583cb60589SBarry Smith x1 = *xb++; 2593cb60589SBarry Smith ib = aj + *ai++; 260831a3094SHong Zhang jmin = 0; 2613cb60589SBarry Smith /* if we ALWAYS required a diagonal entry then could remove this if test */ 2623cb60589SBarry Smith /* should we use a tmp to hold the accumulated z[i] */ 263831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 264831a3094SHong Zhang z[i] += *v++ * x[*ib++]; 265831a3094SHong Zhang jmin++; 266831a3094SHong Zhang } 267831a3094SHong Zhang for (j=jmin; j<n; j++) { 26849b5e25fSSatish Balay cval = *ib; 26949b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 27049b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 27149b5e25fSSatish Balay } 27249b5e25fSSatish Balay } 27349b5e25fSSatish Balay 2741ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2751ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 276899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); /* nz = (nz+m)/2 */ 27749b5e25fSSatish Balay PetscFunctionReturn(0); 27849b5e25fSSatish Balay } 27949b5e25fSSatish Balay 2804a2ae208SSatish Balay #undef __FUNCT__ 2814a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 282dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 28349b5e25fSSatish Balay { 28449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 28587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 28649b5e25fSSatish Balay MatScalar *v; 2876849ba73SBarry Smith PetscErrorCode ierr; 28813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 28949b5e25fSSatish Balay 29049b5e25fSSatish Balay PetscFunctionBegin; 2912dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2931ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 29449b5e25fSSatish Balay 29549b5e25fSSatish Balay v = a->a; 29649b5e25fSSatish Balay xb = x; 29749b5e25fSSatish Balay 29849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 29949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 30049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 30149b5e25fSSatish Balay ib = aj + *ai; 302831a3094SHong Zhang jmin = 0; 3037fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 30449b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 30549b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 306831a3094SHong Zhang v += 4; jmin++; 3077fbae186SHong Zhang } 308831a3094SHong Zhang for (j=jmin; j<n; j++) { 30949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 31049b5e25fSSatish Balay cval = ib[j]*2; 31149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 31249b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 31349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 31449b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 31549b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 31649b5e25fSSatish Balay v += 4; 31749b5e25fSSatish Balay } 31849b5e25fSSatish Balay xb +=2; ai++; 31949b5e25fSSatish Balay } 32049b5e25fSSatish Balay 3211ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3221ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 323899cda47SBarry Smith ierr = PetscLogFlops(8*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 32449b5e25fSSatish Balay PetscFunctionReturn(0); 32549b5e25fSSatish Balay } 32649b5e25fSSatish Balay 3274a2ae208SSatish Balay #undef __FUNCT__ 3284a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 329dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 33049b5e25fSSatish Balay { 33149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 33287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 33349b5e25fSSatish Balay MatScalar *v; 3346849ba73SBarry Smith PetscErrorCode ierr; 33513f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 33649b5e25fSSatish Balay 33749b5e25fSSatish Balay PetscFunctionBegin; 3382dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 3391ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3401ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 34149b5e25fSSatish Balay 34249b5e25fSSatish Balay v = a->a; 34349b5e25fSSatish Balay xb = x; 34449b5e25fSSatish Balay 34549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 34649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 34749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 34849b5e25fSSatish Balay ib = aj + *ai; 349831a3094SHong Zhang jmin = 0; 3507fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 35149b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 35249b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 35349b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 354831a3094SHong Zhang v += 9; jmin++; 3557fbae186SHong Zhang } 356831a3094SHong Zhang for (j=jmin; j<n; j++) { 35749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 35849b5e25fSSatish Balay cval = ib[j]*3; 35949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 36049b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 36149b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 36249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 36349b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 36449b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 36549b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 36649b5e25fSSatish Balay v += 9; 36749b5e25fSSatish Balay } 36849b5e25fSSatish Balay xb +=3; ai++; 36949b5e25fSSatish Balay } 37049b5e25fSSatish Balay 3711ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3721ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 373899cda47SBarry Smith ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 37449b5e25fSSatish Balay PetscFunctionReturn(0); 37549b5e25fSSatish Balay } 37649b5e25fSSatish Balay 3774a2ae208SSatish Balay #undef __FUNCT__ 3784a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 379dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 38049b5e25fSSatish Balay { 38149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 38349b5e25fSSatish Balay MatScalar *v; 3846849ba73SBarry Smith PetscErrorCode ierr; 38513f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 38649b5e25fSSatish Balay 38749b5e25fSSatish Balay PetscFunctionBegin; 3882dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 3891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3901ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 39149b5e25fSSatish Balay 39249b5e25fSSatish Balay v = a->a; 39349b5e25fSSatish Balay xb = x; 39449b5e25fSSatish Balay 39549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 39649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 39749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 39849b5e25fSSatish Balay ib = aj + *ai; 399831a3094SHong Zhang jmin = 0; 4007fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 40149b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 40249b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 40349b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 40449b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 405831a3094SHong Zhang v += 16; jmin++; 4067fbae186SHong Zhang } 407831a3094SHong Zhang for (j=jmin; j<n; j++) { 40849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 40949b5e25fSSatish Balay cval = ib[j]*4; 41049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 41149b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 41249b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 41349b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 41449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 41549b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 41649b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 41749b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 41849b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 41949b5e25fSSatish Balay v += 16; 42049b5e25fSSatish Balay } 42149b5e25fSSatish Balay xb +=4; ai++; 42249b5e25fSSatish Balay } 42349b5e25fSSatish Balay 4241ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4251ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 426899cda47SBarry Smith ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 42749b5e25fSSatish Balay PetscFunctionReturn(0); 42849b5e25fSSatish Balay } 42949b5e25fSSatish Balay 4304a2ae208SSatish Balay #undef __FUNCT__ 4314a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 432dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 43349b5e25fSSatish Balay { 43449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 43649b5e25fSSatish Balay MatScalar *v; 4376849ba73SBarry Smith PetscErrorCode ierr; 43813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 43949b5e25fSSatish Balay 44049b5e25fSSatish Balay PetscFunctionBegin; 4412dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4421ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4431ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 44449b5e25fSSatish Balay 44549b5e25fSSatish Balay v = a->a; 44649b5e25fSSatish Balay xb = x; 44749b5e25fSSatish Balay 44849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 44949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 45049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 45149b5e25fSSatish Balay ib = aj + *ai; 452831a3094SHong Zhang jmin = 0; 4537fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 45449b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 45549b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 45649b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 45749b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 45849b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 459831a3094SHong Zhang v += 25; jmin++; 4607fbae186SHong Zhang } 461831a3094SHong Zhang for (j=jmin; j<n; j++) { 46249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 46349b5e25fSSatish Balay cval = ib[j]*5; 46449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 46549b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 46649b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 46749b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 46849b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 46949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 47049b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 47149b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 47249b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 47349b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 47449b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 47549b5e25fSSatish Balay v += 25; 47649b5e25fSSatish Balay } 47749b5e25fSSatish Balay xb +=5; ai++; 47849b5e25fSSatish Balay } 47949b5e25fSSatish Balay 4801ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4811ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 482899cda47SBarry Smith ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 48349b5e25fSSatish Balay PetscFunctionReturn(0); 48449b5e25fSSatish Balay } 48549b5e25fSSatish Balay 48649b5e25fSSatish Balay 4874a2ae208SSatish Balay #undef __FUNCT__ 4884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 489dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 49049b5e25fSSatish Balay { 49149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 49287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 49349b5e25fSSatish Balay MatScalar *v; 4946849ba73SBarry Smith PetscErrorCode ierr; 49513f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 49649b5e25fSSatish Balay 49749b5e25fSSatish Balay PetscFunctionBegin; 4982dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4991ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5001ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 50149b5e25fSSatish Balay 50249b5e25fSSatish Balay v = a->a; 50349b5e25fSSatish Balay xb = x; 50449b5e25fSSatish Balay 50549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 50649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 50749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 50849b5e25fSSatish Balay ib = aj + *ai; 509831a3094SHong Zhang jmin = 0; 5107fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 51149b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 51249b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 51349b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 51449b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 51549b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 51649b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 517831a3094SHong Zhang v += 36; jmin++; 5187fbae186SHong Zhang } 519831a3094SHong Zhang for (j=jmin; j<n; j++) { 52049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 52149b5e25fSSatish Balay cval = ib[j]*6; 52249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 52349b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 52449b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 52549b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 52649b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 52749b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 52849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 52949b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 53049b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 53149b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 53249b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 53349b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 53449b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 53549b5e25fSSatish Balay v += 36; 53649b5e25fSSatish Balay } 53749b5e25fSSatish Balay xb +=6; ai++; 53849b5e25fSSatish Balay } 53949b5e25fSSatish Balay 5401ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 542899cda47SBarry Smith ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 54349b5e25fSSatish Balay PetscFunctionReturn(0); 54449b5e25fSSatish Balay } 5454a2ae208SSatish Balay #undef __FUNCT__ 5464a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 547dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 54849b5e25fSSatish Balay { 54949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55087828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 55149b5e25fSSatish Balay MatScalar *v; 5526849ba73SBarry Smith PetscErrorCode ierr; 55313f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 55449b5e25fSSatish Balay 55549b5e25fSSatish Balay PetscFunctionBegin; 5562dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 5571ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5581ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 55949b5e25fSSatish Balay 56049b5e25fSSatish Balay v = a->a; 56149b5e25fSSatish Balay xb = x; 56249b5e25fSSatish Balay 56349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 56449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 56549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 56649b5e25fSSatish Balay ib = aj + *ai; 567831a3094SHong Zhang jmin = 0; 5687fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 56949b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 57049b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 57149b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 57249b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 57349b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 57449b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 57549b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 576831a3094SHong Zhang v += 49; jmin++; 5777fbae186SHong Zhang } 578831a3094SHong Zhang for (j=jmin; j<n; j++) { 57949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 58049b5e25fSSatish Balay cval = ib[j]*7; 58149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 58249b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 58349b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 58449b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 58549b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 58649b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 58749b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 58849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 58949b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 59049b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 59149b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 59249b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 59349b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 59449b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 59549b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 59649b5e25fSSatish Balay v += 49; 59749b5e25fSSatish Balay } 59849b5e25fSSatish Balay xb +=7; ai++; 59949b5e25fSSatish Balay } 6001ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6011ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 602899cda47SBarry Smith ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 60349b5e25fSSatish Balay PetscFunctionReturn(0); 60449b5e25fSSatish Balay } 60549b5e25fSSatish Balay 60649b5e25fSSatish Balay /* 60749b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 60849b5e25fSSatish Balay */ 6094a2ae208SSatish Balay #undef __FUNCT__ 6104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 611dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 61249b5e25fSSatish Balay { 61349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 61487828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 6150b60a74dSHong Zhang MatScalar *v; 616dfbe8321SBarry Smith PetscErrorCode ierr; 617899cda47SBarry Smith PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k; 61849b5e25fSSatish Balay 61949b5e25fSSatish Balay PetscFunctionBegin; 6202dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 6211ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 6221ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 62349b5e25fSSatish Balay 62449b5e25fSSatish Balay aj = a->j; 62549b5e25fSSatish Balay v = a->a; 62649b5e25fSSatish Balay ii = a->i; 62749b5e25fSSatish Balay 62849b5e25fSSatish Balay if (!a->mult_work) { 629899cda47SBarry Smith ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 63049b5e25fSSatish Balay } 63149b5e25fSSatish Balay work = a->mult_work; 63249b5e25fSSatish Balay 63349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 63449b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 63549b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 63649b5e25fSSatish Balay 63749b5e25fSSatish Balay /* upper triangular part */ 63849b5e25fSSatish Balay for (j=0; j<n; j++) { 63949b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 64049b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 64149b5e25fSSatish Balay workt += bs; 64249b5e25fSSatish Balay } 64349b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 64449b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 64549b5e25fSSatish Balay 64649b5e25fSSatish Balay /* strict lower triangular part */ 647831a3094SHong Zhang idx = aj+ii[0]; 648831a3094SHong Zhang if (*idx == i){ 64996b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 650831a3094SHong Zhang } 65196b9376eSHong Zhang 65249b5e25fSSatish Balay if (ncols > 0){ 65349b5e25fSSatish Balay workt = work; 65487828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 655831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 656831a3094SHong Zhang for (j=0; j<n; j++) { 657831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 65849b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 65949b5e25fSSatish Balay workt += bs; 66049b5e25fSSatish Balay } 66149b5e25fSSatish Balay } 66249b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 66349b5e25fSSatish Balay } 66449b5e25fSSatish Balay 6651ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6661ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 667899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N)*bs2 - A->rmap.N);CHKERRQ(ierr); 66849b5e25fSSatish Balay PetscFunctionReturn(0); 66949b5e25fSSatish Balay } 67049b5e25fSSatish Balay 6716a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec); 6724a2ae208SSatish Balay #undef __FUNCT__ 6734a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 674dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 67549b5e25fSSatish Balay { 67649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 677bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1; 67849b5e25fSSatish Balay MatScalar *v; 6796849ba73SBarry Smith PetscErrorCode ierr; 68013f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 68149b5e25fSSatish Balay 68249b5e25fSSatish Balay PetscFunctionBegin; 6836a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 6841ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6851ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 68649b5e25fSSatish Balay v = a->a; 68749b5e25fSSatish Balay xb = x; 68849b5e25fSSatish Balay 68949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 69049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 69149b5e25fSSatish Balay x1 = xb[0]; 69249b5e25fSSatish Balay ib = aj + *ai; 693831a3094SHong Zhang jmin = 0; 694831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 695831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 696831a3094SHong Zhang } 697831a3094SHong Zhang for (j=jmin; j<n; j++) { 69849b5e25fSSatish Balay cval = *ib; 69949b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 70049b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 70149b5e25fSSatish Balay } 70249b5e25fSSatish Balay xb++; ai++; 70349b5e25fSSatish Balay } 70449b5e25fSSatish Balay 7051ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 706bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 70749b5e25fSSatish Balay 708899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 70949b5e25fSSatish Balay PetscFunctionReturn(0); 71049b5e25fSSatish Balay } 71149b5e25fSSatish Balay 7124a2ae208SSatish Balay #undef __FUNCT__ 7134a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 714dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 71549b5e25fSSatish Balay { 71649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 717bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2; 71849b5e25fSSatish Balay MatScalar *v; 7196849ba73SBarry Smith PetscErrorCode ierr; 72013f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 72149b5e25fSSatish Balay 72249b5e25fSSatish Balay PetscFunctionBegin; 7236a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 7241ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7251ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 72649b5e25fSSatish Balay 72749b5e25fSSatish Balay v = a->a; 72849b5e25fSSatish Balay xb = x; 72949b5e25fSSatish Balay 73049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 73149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 73249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 73349b5e25fSSatish Balay ib = aj + *ai; 734831a3094SHong Zhang jmin = 0; 7357fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 73649b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 73749b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 738831a3094SHong Zhang v += 4; jmin++; 7397fbae186SHong Zhang } 740831a3094SHong Zhang for (j=jmin; j<n; j++) { 74149b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 74249b5e25fSSatish Balay cval = ib[j]*2; 74349b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 74449b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 74549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 74649b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 74749b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 74849b5e25fSSatish Balay v += 4; 74949b5e25fSSatish Balay } 75049b5e25fSSatish Balay xb +=2; ai++; 75149b5e25fSSatish Balay } 7521ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 753bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 75449b5e25fSSatish Balay 755899cda47SBarry Smith ierr = PetscLogFlops(4*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 75649b5e25fSSatish Balay PetscFunctionReturn(0); 75749b5e25fSSatish Balay } 75849b5e25fSSatish Balay 7594a2ae208SSatish Balay #undef __FUNCT__ 7604a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 761dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 76249b5e25fSSatish Balay { 76349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 764bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3; 76549b5e25fSSatish Balay MatScalar *v; 7666849ba73SBarry Smith PetscErrorCode ierr; 76713f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 76849b5e25fSSatish Balay 76949b5e25fSSatish Balay PetscFunctionBegin; 7706a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 7711ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7721ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 77349b5e25fSSatish Balay 77449b5e25fSSatish Balay v = a->a; 77549b5e25fSSatish Balay xb = x; 77649b5e25fSSatish Balay 77749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 77849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 77949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 78049b5e25fSSatish Balay ib = aj + *ai; 781831a3094SHong Zhang jmin = 0; 7827fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 78349b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 78449b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 78549b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 786831a3094SHong Zhang v += 9; jmin++; 7877fbae186SHong Zhang } 788831a3094SHong Zhang for (j=jmin; j<n; j++) { 78949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 79049b5e25fSSatish Balay cval = ib[j]*3; 79149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 79249b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 79349b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 79449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 79549b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 79649b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 79749b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 79849b5e25fSSatish Balay v += 9; 79949b5e25fSSatish Balay } 80049b5e25fSSatish Balay xb +=3; ai++; 80149b5e25fSSatish Balay } 80249b5e25fSSatish Balay 8031ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 80549b5e25fSSatish Balay 806899cda47SBarry Smith ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 80749b5e25fSSatish Balay PetscFunctionReturn(0); 80849b5e25fSSatish Balay } 80949b5e25fSSatish Balay 8104a2ae208SSatish Balay #undef __FUNCT__ 8114a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 812dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 81349b5e25fSSatish Balay { 81449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 815bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4; 81649b5e25fSSatish Balay MatScalar *v; 8176849ba73SBarry Smith PetscErrorCode ierr; 81813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 81949b5e25fSSatish Balay 82049b5e25fSSatish Balay PetscFunctionBegin; 8216a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 8221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8231ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 82449b5e25fSSatish Balay 82549b5e25fSSatish Balay v = a->a; 82649b5e25fSSatish Balay xb = x; 82749b5e25fSSatish Balay 82849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 82949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 83049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 83149b5e25fSSatish Balay ib = aj + *ai; 832831a3094SHong Zhang jmin = 0; 8337fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 83449b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 83549b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 83649b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 83749b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 838831a3094SHong Zhang v += 16; jmin++; 8397fbae186SHong Zhang } 840831a3094SHong Zhang for (j=jmin; j<n; j++) { 84149b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 84249b5e25fSSatish Balay cval = ib[j]*4; 84349b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 84449b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 84549b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 84649b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 84749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 84849b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 84949b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 85049b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 85149b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 85249b5e25fSSatish Balay v += 16; 85349b5e25fSSatish Balay } 85449b5e25fSSatish Balay xb +=4; ai++; 85549b5e25fSSatish Balay } 85649b5e25fSSatish Balay 8571ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 85949b5e25fSSatish Balay 860899cda47SBarry Smith ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 86149b5e25fSSatish Balay PetscFunctionReturn(0); 86249b5e25fSSatish Balay } 86349b5e25fSSatish Balay 8644a2ae208SSatish Balay #undef __FUNCT__ 8654a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 866dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 86749b5e25fSSatish Balay { 86849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 869bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5; 87049b5e25fSSatish Balay MatScalar *v; 8716849ba73SBarry Smith PetscErrorCode ierr; 87213f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 87349b5e25fSSatish Balay 87449b5e25fSSatish Balay PetscFunctionBegin; 8756a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 8761ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8771ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 87849b5e25fSSatish Balay 87949b5e25fSSatish Balay v = a->a; 88049b5e25fSSatish Balay xb = x; 88149b5e25fSSatish Balay 88249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 88349b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 88449b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 88549b5e25fSSatish Balay ib = aj + *ai; 886831a3094SHong Zhang jmin = 0; 8877fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 88849b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 88949b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 89049b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 89149b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 89249b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 893831a3094SHong Zhang v += 25; jmin++; 8947fbae186SHong Zhang } 895831a3094SHong Zhang for (j=jmin; j<n; j++) { 89649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 89749b5e25fSSatish Balay cval = ib[j]*5; 89849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 89949b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 90049b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 90149b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 90249b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 90349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 90449b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 90549b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 90649b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 90749b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 90849b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 90949b5e25fSSatish Balay v += 25; 91049b5e25fSSatish Balay } 91149b5e25fSSatish Balay xb +=5; ai++; 91249b5e25fSSatish Balay } 91349b5e25fSSatish Balay 9141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 915bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 91649b5e25fSSatish Balay 917899cda47SBarry Smith ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 91849b5e25fSSatish Balay PetscFunctionReturn(0); 91949b5e25fSSatish Balay } 9204a2ae208SSatish Balay #undef __FUNCT__ 9214a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 922dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 92349b5e25fSSatish Balay { 92449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 925bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6; 92649b5e25fSSatish Balay MatScalar *v; 9276849ba73SBarry Smith PetscErrorCode ierr; 92813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 92949b5e25fSSatish Balay 93049b5e25fSSatish Balay PetscFunctionBegin; 9316a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 9321ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9331ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 93449b5e25fSSatish Balay 93549b5e25fSSatish Balay v = a->a; 93649b5e25fSSatish Balay xb = x; 93749b5e25fSSatish Balay 93849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 93949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 94049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 94149b5e25fSSatish Balay ib = aj + *ai; 942831a3094SHong Zhang jmin = 0; 9437fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 94449b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 94549b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 94649b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 94749b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 94849b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 94949b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 950831a3094SHong Zhang v += 36; jmin++; 9517fbae186SHong Zhang } 952831a3094SHong Zhang for (j=jmin; j<n; j++) { 95349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 95449b5e25fSSatish Balay cval = ib[j]*6; 95549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 95649b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 95749b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 95849b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 95949b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 96049b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 96149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 96249b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 96349b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 96449b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 96549b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 96649b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 96749b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 96849b5e25fSSatish Balay v += 36; 96949b5e25fSSatish Balay } 97049b5e25fSSatish Balay xb +=6; ai++; 97149b5e25fSSatish Balay } 97249b5e25fSSatish Balay 9731ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 974bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 97549b5e25fSSatish Balay 976899cda47SBarry Smith ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 97749b5e25fSSatish Balay PetscFunctionReturn(0); 97849b5e25fSSatish Balay } 97949b5e25fSSatish Balay 9804a2ae208SSatish Balay #undef __FUNCT__ 9814a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 982dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 98349b5e25fSSatish Balay { 98449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 985bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 98649b5e25fSSatish Balay MatScalar *v; 9876849ba73SBarry Smith PetscErrorCode ierr; 98813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 98949b5e25fSSatish Balay 99049b5e25fSSatish Balay PetscFunctionBegin; 9916a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 9921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9931ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 99449b5e25fSSatish Balay 99549b5e25fSSatish Balay v = a->a; 99649b5e25fSSatish Balay xb = x; 99749b5e25fSSatish Balay 99849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 99949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 100049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 100149b5e25fSSatish Balay ib = aj + *ai; 1002831a3094SHong Zhang jmin = 0; 10037fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 100449b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 100549b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 100649b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 100749b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 100849b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 100949b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 101049b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1011831a3094SHong Zhang v += 49; jmin++; 10127fbae186SHong Zhang } 1013831a3094SHong Zhang for (j=jmin; j<n; j++) { 101449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 101549b5e25fSSatish Balay cval = ib[j]*7; 101649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 101749b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 101849b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 101949b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 102049b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 102149b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 102249b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 102349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 102449b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 102549b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 102649b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 102749b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 102849b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 102949b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 103049b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 103149b5e25fSSatish Balay v += 49; 103249b5e25fSSatish Balay } 103349b5e25fSSatish Balay xb +=7; ai++; 103449b5e25fSSatish Balay } 103549b5e25fSSatish Balay 10361ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1037bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 103849b5e25fSSatish Balay 1039899cda47SBarry Smith ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 104049b5e25fSSatish Balay PetscFunctionReturn(0); 104149b5e25fSSatish Balay } 104249b5e25fSSatish Balay 10434a2ae208SSatish Balay #undef __FUNCT__ 10444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 1045dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 104649b5e25fSSatish Balay { 104749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1048bba805e6SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1049066653e3SSatish Balay MatScalar *v; 1050dfbe8321SBarry Smith PetscErrorCode ierr; 1051899cda47SBarry Smith PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k; 105249b5e25fSSatish Balay 105349b5e25fSSatish Balay PetscFunctionBegin; 10546a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 10551ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 10561ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 105749b5e25fSSatish Balay 105849b5e25fSSatish Balay aj = a->j; 105949b5e25fSSatish Balay v = a->a; 106049b5e25fSSatish Balay ii = a->i; 106149b5e25fSSatish Balay 106249b5e25fSSatish Balay if (!a->mult_work) { 1063899cda47SBarry Smith ierr = PetscMalloc((A->rmap.n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 106449b5e25fSSatish Balay } 106549b5e25fSSatish Balay work = a->mult_work; 106649b5e25fSSatish Balay 106749b5e25fSSatish Balay 106849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 106949b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 107049b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 107149b5e25fSSatish Balay 107249b5e25fSSatish Balay /* upper triangular part */ 107349b5e25fSSatish Balay for (j=0; j<n; j++) { 107449b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 107549b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 107649b5e25fSSatish Balay workt += bs; 107749b5e25fSSatish Balay } 107849b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 107949b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 108049b5e25fSSatish Balay 108149b5e25fSSatish Balay /* strict lower triangular part */ 1082831a3094SHong Zhang idx = aj+ii[0]; 1083831a3094SHong Zhang if (*idx == i){ 108496b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1085831a3094SHong Zhang } 108649b5e25fSSatish Balay if (ncols > 0){ 108749b5e25fSSatish Balay workt = work; 108887828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1089831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1090831a3094SHong Zhang for (j=0; j<n; j++) { 1091831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 109249b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 109349b5e25fSSatish Balay workt += bs; 109449b5e25fSSatish Balay } 109549b5e25fSSatish Balay } 109649b5e25fSSatish Balay 109749b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 109849b5e25fSSatish Balay } 109949b5e25fSSatish Balay 11001ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1101bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 110249b5e25fSSatish Balay 1103899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 110449b5e25fSSatish Balay PetscFunctionReturn(0); 110549b5e25fSSatish Balay } 110649b5e25fSSatish Balay 11074a2ae208SSatish Balay #undef __FUNCT__ 11084a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1109f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha) 111049b5e25fSSatish Balay { 111149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1112f4df32b1SMatthew Knepley PetscScalar oalpha = alpha; 11134ce68768SBarry Smith PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz; 1114efee365bSSatish Balay PetscErrorCode ierr; 111549b5e25fSSatish Balay 111649b5e25fSSatish Balay PetscFunctionBegin; 1117f4df32b1SMatthew Knepley BLASscal_(&totalnz,&oalpha,a->a,&one); 1118efee365bSSatish Balay ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 111949b5e25fSSatish Balay PetscFunctionReturn(0); 112049b5e25fSSatish Balay } 112149b5e25fSSatish Balay 11224a2ae208SSatish Balay #undef __FUNCT__ 11234a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 1124dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 112549b5e25fSSatish Balay { 112649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 112749b5e25fSSatish Balay MatScalar *v = a->a; 112849b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1129899cda47SBarry Smith PetscInt i,j,k,bs = A->rmap.bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 11306849ba73SBarry Smith PetscErrorCode ierr; 113113f74950SBarry Smith PetscInt *jl,*il,jmin,jmax,nexti,ik,*col; 113249b5e25fSSatish Balay 113349b5e25fSSatish Balay PetscFunctionBegin; 113449b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 113549b5e25fSSatish Balay for (k=0; k<mbs; k++){ 113649b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1137831a3094SHong Zhang col = aj + jmin; 1138831a3094SHong Zhang if (*col == k){ /* diagonal block */ 113949b5e25fSSatish Balay for (i=0; i<bs2; i++){ 114049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 114149b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 114249b5e25fSSatish Balay #else 114349b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 114449b5e25fSSatish Balay #endif 114549b5e25fSSatish Balay } 1146831a3094SHong Zhang jmin++; 1147831a3094SHong Zhang } 1148831a3094SHong Zhang for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 114949b5e25fSSatish Balay for (i=0; i<bs2; i++){ 115049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 115149b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 115249b5e25fSSatish Balay #else 115349b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 115449b5e25fSSatish Balay #endif 115549b5e25fSSatish Balay } 115649b5e25fSSatish Balay } 115749b5e25fSSatish Balay } 115849b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 11590b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */ 11600b8dc8d2SHong Zhang ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr); 11610b8dc8d2SHong Zhang jl = il + mbs; 11620b8dc8d2SHong Zhang sum = (PetscReal*)(jl + mbs); 11630b8dc8d2SHong Zhang for (i=0; i<mbs; i++) jl[i] = mbs; 11640b8dc8d2SHong Zhang il[0] = 0; 116549b5e25fSSatish Balay 116649b5e25fSSatish Balay *norm = 0.0; 116749b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 116849b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 116949b5e25fSSatish Balay /*-- col sum --*/ 117049b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1171831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1172831a3094SHong Zhang at step k */ 117349b5e25fSSatish Balay while (i<mbs){ 117449b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 117549b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 117649b5e25fSSatish Balay for (j=0; j<bs; j++){ 117749b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 117849b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 117949b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 118049b5e25fSSatish Balay } 118149b5e25fSSatish Balay } 118249b5e25fSSatish Balay /* update il, jl */ 1183831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1184831a3094SHong Zhang jmax = a->i[i+1]; 118549b5e25fSSatish Balay if (jmin < jmax){ 118649b5e25fSSatish Balay il[i] = jmin; 118749b5e25fSSatish Balay j = a->j[jmin]; 118849b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 118949b5e25fSSatish Balay } 119049b5e25fSSatish Balay i = nexti; 119149b5e25fSSatish Balay } 119249b5e25fSSatish Balay /*-- row sum --*/ 119349b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 119449b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 119549b5e25fSSatish Balay for (j=0; j<bs; j++){ 119649b5e25fSSatish Balay v = a->a + i*bs2 + j; 119749b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 11980b8dc8d2SHong Zhang sum[j] += PetscAbsScalar(*v); v += bs; 119949b5e25fSSatish Balay } 120049b5e25fSSatish Balay } 120149b5e25fSSatish Balay } 120249b5e25fSSatish Balay /* add k_th block row to il, jl */ 1203831a3094SHong Zhang col = aj+jmin; 1204831a3094SHong Zhang if (*col == k) jmin++; 120549b5e25fSSatish Balay if (jmin < jmax){ 120649b5e25fSSatish Balay il[k] = jmin; 12070b8dc8d2SHong Zhang j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k; 120849b5e25fSSatish Balay } 120949b5e25fSSatish Balay for (j=0; j<bs; j++){ 121049b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 121149b5e25fSSatish Balay } 121249b5e25fSSatish Balay } 121349b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 121449b5e25fSSatish Balay } else { 1215347d480fSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 121649b5e25fSSatish Balay } 121749b5e25fSSatish Balay PetscFunctionReturn(0); 121849b5e25fSSatish Balay } 121949b5e25fSSatish Balay 12204a2ae208SSatish Balay #undef __FUNCT__ 12214a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 1222dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 122349b5e25fSSatish Balay { 122449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1225dfbe8321SBarry Smith PetscErrorCode ierr; 122649b5e25fSSatish Balay 122749b5e25fSSatish Balay PetscFunctionBegin; 122849b5e25fSSatish Balay 122949b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1230899cda47SBarry Smith if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) { 1231ef511fbeSHong Zhang *flg = PETSC_FALSE; 1232ef511fbeSHong Zhang PetscFunctionReturn(0); 123349b5e25fSSatish Balay } 123449b5e25fSSatish Balay 123549b5e25fSSatish Balay /* if the a->i are the same */ 123613f74950SBarry Smith ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1237abc0a331SBarry Smith if (!*flg) { 123849b5e25fSSatish Balay PetscFunctionReturn(0); 123949b5e25fSSatish Balay } 124049b5e25fSSatish Balay 124149b5e25fSSatish Balay /* if a->j are the same */ 124213f74950SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1243abc0a331SBarry Smith if (!*flg) { 124449b5e25fSSatish Balay PetscFunctionReturn(0); 124549b5e25fSSatish Balay } 124649b5e25fSSatish Balay /* if a->a are the same */ 1247899cda47SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(A->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1248935af2e7SHong Zhang PetscFunctionReturn(0); 124949b5e25fSSatish Balay } 125049b5e25fSSatish Balay 12514a2ae208SSatish Balay #undef __FUNCT__ 12524a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 1253dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 125449b5e25fSSatish Balay { 125549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1256dfbe8321SBarry Smith PetscErrorCode ierr; 125713f74950SBarry Smith PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 125887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 125949b5e25fSSatish Balay MatScalar *aa,*aa_j; 126049b5e25fSSatish Balay 126149b5e25fSSatish Balay PetscFunctionBegin; 1262899cda47SBarry Smith bs = A->rmap.bs; 126382799104SHong Zhang if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 126482799104SHong Zhang 126549b5e25fSSatish Balay aa = a->a; 126649b5e25fSSatish Balay ai = a->i; 126749b5e25fSSatish Balay aj = a->j; 126849b5e25fSSatish Balay ambs = a->mbs; 126949b5e25fSSatish Balay bs2 = a->bs2; 127049b5e25fSSatish Balay 12712dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 12721ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 127349b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1274899cda47SBarry Smith if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 127549b5e25fSSatish Balay for (i=0; i<ambs; i++) { 127649b5e25fSSatish Balay j=ai[i]; 127749b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 127849b5e25fSSatish Balay row = i*bs; 127949b5e25fSSatish Balay aa_j = aa + j*bs2; 128082799104SHong Zhang if (A->factor && bs==1){ 128182799104SHong Zhang for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k]; 128282799104SHong Zhang } else { 128349b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 128449b5e25fSSatish Balay } 128549b5e25fSSatish Balay } 128682799104SHong Zhang } 128782799104SHong Zhang 12881ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 128949b5e25fSSatish Balay PetscFunctionReturn(0); 129049b5e25fSSatish Balay } 129149b5e25fSSatish Balay 12924a2ae208SSatish Balay #undef __FUNCT__ 12934a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 1294dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 129549b5e25fSSatish Balay { 129649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 12975e90f9d9SHong Zhang PetscScalar *l,x,*li,*ri; 129849b5e25fSSatish Balay MatScalar *aa,*v; 1299dfbe8321SBarry Smith PetscErrorCode ierr; 13005e90f9d9SHong Zhang PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1301b3bf805bSHong Zhang PetscTruth flg; 130249b5e25fSSatish Balay 130349b5e25fSSatish Balay PetscFunctionBegin; 1304b3bf805bSHong Zhang if (ll != rr){ 1305b3bf805bSHong Zhang ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1306b3bf805bSHong Zhang if (!flg) 1307b3bf805bSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1308b3bf805bSHong Zhang } 1309b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 131049b5e25fSSatish Balay ai = a->i; 131149b5e25fSSatish Balay aj = a->j; 131249b5e25fSSatish Balay aa = a->a; 1313899cda47SBarry Smith m = A->rmap.N; 1314899cda47SBarry Smith bs = A->rmap.bs; 131549b5e25fSSatish Balay mbs = a->mbs; 131649b5e25fSSatish Balay bs2 = a->bs2; 131749b5e25fSSatish Balay 13181ebc52fbSHong Zhang ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 131949b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1320347d480fSBarry Smith if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 132149b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 132249b5e25fSSatish Balay M = ai[i+1] - ai[i]; 132349b5e25fSSatish Balay li = l + i*bs; 132449b5e25fSSatish Balay v = aa + bs2*ai[i]; 132549b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 132649b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 13275e90f9d9SHong Zhang for (k=0; k<bs; k++) { 132849b5e25fSSatish Balay x = ri[k]; 132949b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 133049b5e25fSSatish Balay } 133149b5e25fSSatish Balay } 133249b5e25fSSatish Balay } 13331ebc52fbSHong Zhang ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1334efee365bSSatish Balay ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 133549b5e25fSSatish Balay PetscFunctionReturn(0); 133649b5e25fSSatish Balay } 133749b5e25fSSatish Balay 13384a2ae208SSatish Balay #undef __FUNCT__ 13394a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 1340dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 134149b5e25fSSatish Balay { 134249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 134349b5e25fSSatish Balay 134449b5e25fSSatish Balay PetscFunctionBegin; 1345899cda47SBarry Smith info->rows_global = (double)A->rmap.N; 1346899cda47SBarry Smith info->columns_global = (double)A->rmap.N; 1347899cda47SBarry Smith info->rows_local = (double)A->rmap.N; 1348899cda47SBarry Smith info->columns_local = (double)A->rmap.N; 134949b5e25fSSatish Balay info->block_size = a->bs2; 13506c6c5352SBarry Smith info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */ 13516c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 135249b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 135349b5e25fSSatish Balay info->assemblies = A->num_ass; 135449b5e25fSSatish Balay info->mallocs = a->reallocs; 1355*7adad957SLisandro Dalcin info->memory = ((PetscObject)A)->mem; 135649b5e25fSSatish Balay if (A->factor) { 135749b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 135849b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 135949b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 136049b5e25fSSatish Balay } else { 136149b5e25fSSatish Balay info->fill_ratio_given = 0; 136249b5e25fSSatish Balay info->fill_ratio_needed = 0; 136349b5e25fSSatish Balay info->factor_mallocs = 0; 136449b5e25fSSatish Balay } 136549b5e25fSSatish Balay PetscFunctionReturn(0); 136649b5e25fSSatish Balay } 136749b5e25fSSatish Balay 136849b5e25fSSatish Balay 13694a2ae208SSatish Balay #undef __FUNCT__ 13704a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 1371dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A) 137249b5e25fSSatish Balay { 137349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1374dfbe8321SBarry Smith PetscErrorCode ierr; 137549b5e25fSSatish Balay 137649b5e25fSSatish Balay PetscFunctionBegin; 137749b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 137849b5e25fSSatish Balay PetscFunctionReturn(0); 137949b5e25fSSatish Balay } 1380dc354874SHong Zhang 13814a2ae208SSatish Balay #undef __FUNCT__ 1382985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ" 1383985db425SBarry Smith /* 1384985db425SBarry Smith This code does not work since it only checks the upper triangular part of 1385985db425SBarry Smith the matrix. Hence it is not listed in the function table. 1386985db425SBarry Smith */ 1387985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[]) 1388dc354874SHong Zhang { 1389dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1390dfbe8321SBarry Smith PetscErrorCode ierr; 139113f74950SBarry Smith PetscInt i,j,n,row,col,bs,*ai,*aj,mbs; 1392c3fca9a7SHong Zhang PetscReal atmp; 1393273d9f13SBarry Smith MatScalar *aa; 1394985db425SBarry Smith PetscScalar *x; 139513f74950SBarry Smith PetscInt ncols,brow,bcol,krow,kcol; 1396dc354874SHong Zhang 1397dc354874SHong Zhang PetscFunctionBegin; 1398985db425SBarry Smith if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 1399dc354874SHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1400899cda47SBarry Smith bs = A->rmap.bs; 1401dc354874SHong Zhang aa = a->a; 1402dc354874SHong Zhang ai = a->i; 1403dc354874SHong Zhang aj = a->j; 140444117c81SHong Zhang mbs = a->mbs; 1405dc354874SHong Zhang 1406985db425SBarry Smith ierr = VecSet(v,0.0);CHKERRQ(ierr); 14071ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1408dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1409899cda47SBarry Smith if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 141044117c81SHong Zhang for (i=0; i<mbs; i++) { 1411d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1412d0f6400bSHong Zhang brow = bs*i; 141344117c81SHong Zhang for (j=0; j<ncols; j++){ 1414d0f6400bSHong Zhang bcol = bs*(*aj); 141544117c81SHong Zhang for (kcol=0; kcol<bs; kcol++){ 1416d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 141744117c81SHong Zhang for (krow=0; krow<bs; krow++){ 1418d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1419d0f6400bSHong Zhang row = brow + krow; /* row index */ 1420c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1421c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 142244117c81SHong Zhang } 142344117c81SHong Zhang } 1424d0f6400bSHong Zhang aj++; 1425dc354874SHong Zhang } 1426dc354874SHong Zhang } 14271ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1428dc354874SHong Zhang PetscFunctionReturn(0); 1429dc354874SHong Zhang } 1430