1be1d678aSKris Buschelman #define PETSCMAT_DLL 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 449b5e25fSSatish Balay #include "src/inline/spops.h" 549b5e25fSSatish Balay #include "src/inline/ilu.h" 649b5e25fSSatish Balay #include "petscbt.h" 73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 849b5e25fSSatish Balay 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 1249b5e25fSSatish Balay { 135eee224dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 146849ba73SBarry Smith PetscErrorCode ierr; 15521d7252SBarry Smith PetscInt brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2; 16521d7252SBarry Smith PetscBT table,table0; 17d94109b8SHong Zhang 18d94109b8SHong Zhang PetscFunctionBegin; 19b3bf805bSHong Zhang if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 20d94109b8SHong Zhang mbs = a->mbs; 21d94109b8SHong Zhang ai = a->i; 22d94109b8SHong Zhang aj = a->j; 23899cda47SBarry Smith bs = A->rmap.bs; 24d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr); 2513f74950SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 26899cda47SBarry Smith ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 27d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr); 28d94109b8SHong Zhang 29d94109b8SHong Zhang for (i=0; i<is_max; i++) { /* for each is */ 30d94109b8SHong Zhang isz = 0; 31d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr); 32d94109b8SHong Zhang 33d94109b8SHong Zhang /* Extract the indices, assume there can be duplicate entries */ 34d94109b8SHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 35d94109b8SHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 36d94109b8SHong Zhang 37d94109b8SHong Zhang /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */ 38dbe03f88SHong Zhang bcol_max = 0; 39d94109b8SHong Zhang for (j=0; j<n ; ++j){ 40d94109b8SHong Zhang brow = idx[j]/bs; /* convert the indices into block indices */ 41d94109b8SHong Zhang if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 42dbe03f88SHong Zhang if(!PetscBTLookupSet(table,brow)) { 43dbe03f88SHong Zhang nidx[isz++] = brow; 44dbe03f88SHong Zhang if (bcol_max < brow) bcol_max = brow; 45dbe03f88SHong Zhang } 46d94109b8SHong Zhang } 47d94109b8SHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 48d94109b8SHong Zhang ierr = ISDestroy(is[i]);CHKERRQ(ierr); 49d94109b8SHong Zhang 50d94109b8SHong Zhang k = 0; 51d94109b8SHong Zhang for (j=0; j<ov; j++){ /* for each overlap */ 52d94109b8SHong Zhang /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */ 53d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr); 54efee365bSSatish Balay for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); } 55d94109b8SHong Zhang 56d94109b8SHong Zhang n = isz; /* length of the updated is[i] */ 57d94109b8SHong Zhang for (brow=0; brow<mbs; brow++){ 58d94109b8SHong Zhang start = ai[brow]; end = ai[brow+1]; 59d94109b8SHong Zhang if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */ 60d94109b8SHong Zhang for (l = start; l<end ; l++){ 61d94109b8SHong Zhang bcol = aj[l]; 62d94109b8SHong Zhang if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;} 63d94109b8SHong Zhang } 64d94109b8SHong Zhang k++; 65d94109b8SHong Zhang if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */ 66d94109b8SHong Zhang } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */ 67d94109b8SHong Zhang for (l = start; l<end ; l++){ 68d94109b8SHong Zhang bcol = aj[l]; 69dbe03f88SHong Zhang if (bcol > bcol_max) break; 70d94109b8SHong Zhang if (PetscBTLookup(table0,bcol)){ 71d94109b8SHong Zhang if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;} 72d94109b8SHong Zhang break; /* for l = start; l<end ; l++) */ 73d94109b8SHong Zhang } 74d94109b8SHong Zhang } 75d94109b8SHong Zhang } 76d94109b8SHong Zhang } 77d94109b8SHong Zhang } /* for each overlap */ 78d94109b8SHong Zhang 79d94109b8SHong Zhang /* expand the Index Set */ 80d94109b8SHong Zhang for (j=0; j<isz; j++) { 81d94109b8SHong Zhang for (k=0; k<bs; k++) 82d94109b8SHong Zhang nidx2[j*bs+k] = nidx[j]*bs+k; 83d94109b8SHong Zhang } 84d94109b8SHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 85d94109b8SHong Zhang } 86d94109b8SHong Zhang ierr = PetscBTDestroy(table);CHKERRQ(ierr); 87d94109b8SHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 88d94109b8SHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 89d94109b8SHong Zhang ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 905eee224dSHong Zhang PetscFunctionReturn(0); 9149b5e25fSSatish Balay } 9249b5e25fSSatish Balay 934a2ae208SSatish Balay #undef __FUNCT__ 944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 9513f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 9649b5e25fSSatish Balay { 9749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 986849ba73SBarry Smith PetscErrorCode ierr; 9913f74950SBarry Smith PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens; 10013f74950SBarry Smith PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 101899cda47SBarry Smith PetscInt *irow,nrows,*ssmap,bs=A->rmap.bs,bs2=a->bs2; 10213f74950SBarry Smith PetscInt *aj = a->j,*ai = a->i; 10349b5e25fSSatish Balay MatScalar *mat_a; 10449b5e25fSSatish Balay Mat C; 10549b5e25fSSatish Balay PetscTruth flag; 10649b5e25fSSatish Balay 10749b5e25fSSatish Balay PetscFunctionBegin; 108e005ede5SBarry Smith if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 10949b5e25fSSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 110347d480fSBarry Smith if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 11149b5e25fSSatish Balay 11249b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 11349b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 11449b5e25fSSatish Balay 11513f74950SBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 11649b5e25fSSatish Balay ssmap = smap; 11713f74950SBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 11813f74950SBarry Smith ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 11949b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 12049b5e25fSSatish Balay /* determine lens of each row */ 12149b5e25fSSatish Balay for (i=0; i<nrows; i++) { 12249b5e25fSSatish Balay kstart = ai[irow[i]]; 12349b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 12449b5e25fSSatish Balay lens[i] = 0; 12549b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 12649b5e25fSSatish Balay if (ssmap[aj[k]]) { 12749b5e25fSSatish Balay lens[i]++; 12849b5e25fSSatish Balay } 12949b5e25fSSatish Balay } 13049b5e25fSSatish Balay } 13149b5e25fSSatish Balay /* Create and fill new matrix */ 13249b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 13349b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 13449b5e25fSSatish Balay 135899cda47SBarry Smith if (c->mbs!=nrows || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 13613f74950SBarry Smith ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 137abc0a331SBarry Smith if (!flag) { 138347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 13949b5e25fSSatish Balay } 14013f74950SBarry Smith ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 14149b5e25fSSatish Balay C = *B; 14249b5e25fSSatish Balay } else { 143f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 144f69a0ea3SMatthew Knepley ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 145e2d9671bSKris Buschelman ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 146ab93d7beSBarry Smith ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr); 14749b5e25fSSatish Balay } 14849b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 14949b5e25fSSatish Balay for (i=0; i<nrows; i++) { 15049b5e25fSSatish Balay row = irow[i]; 15149b5e25fSSatish Balay kstart = ai[row]; 15249b5e25fSSatish Balay kend = kstart + a->ilen[row]; 15349b5e25fSSatish Balay mat_i = c->i[i]; 15449b5e25fSSatish Balay mat_j = c->j + mat_i; 15549b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 15649b5e25fSSatish Balay mat_ilen = c->ilen + i; 15749b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 15849b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 15949b5e25fSSatish Balay *mat_j++ = tcol - 1; 16049b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 16149b5e25fSSatish Balay mat_a += bs2; 16249b5e25fSSatish Balay (*mat_ilen)++; 16349b5e25fSSatish Balay } 16449b5e25fSSatish Balay } 16549b5e25fSSatish Balay } 16649b5e25fSSatish Balay 16749b5e25fSSatish Balay /* Free work space */ 16849b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 16949b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 17049b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17149b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17249b5e25fSSatish Balay 17349b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 17449b5e25fSSatish Balay *B = C; 17549b5e25fSSatish Balay PetscFunctionReturn(0); 17649b5e25fSSatish Balay } 17749b5e25fSSatish Balay 1784a2ae208SSatish Balay #undef __FUNCT__ 1794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 18013f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 18149b5e25fSSatish Balay { 18249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 18349b5e25fSSatish Balay IS is1; 1846849ba73SBarry Smith PetscErrorCode ierr; 185899cda47SBarry Smith PetscInt *vary,*iary,*irow,nrows,i,bs=A->rmap.bs,count; 18649b5e25fSSatish Balay 18749b5e25fSSatish Balay PetscFunctionBegin; 188e005ede5SBarry Smith if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro"); 18949b5e25fSSatish Balay 19049b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 19149b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 19249b5e25fSSatish Balay 19349b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 19449b5e25fSSatish Balay and form the IS with compressed IS */ 19513f74950SBarry Smith ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 19649b5e25fSSatish Balay iary = vary + a->mbs; 19713f74950SBarry Smith ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 19849b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 19949b5e25fSSatish Balay 20049b5e25fSSatish Balay count = 0; 20149b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 202e005ede5SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks"); 20349b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 20449b5e25fSSatish Balay } 20549b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 20649b5e25fSSatish Balay 20749b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 20849b5e25fSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 20949b5e25fSSatish Balay 21049b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 21149b5e25fSSatish Balay ISDestroy(is1); 21249b5e25fSSatish Balay PetscFunctionReturn(0); 21349b5e25fSSatish Balay } 21449b5e25fSSatish Balay 2154a2ae208SSatish Balay #undef __FUNCT__ 2164a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 21713f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 21849b5e25fSSatish Balay { 2196849ba73SBarry Smith PetscErrorCode ierr; 22013f74950SBarry Smith PetscInt i; 22149b5e25fSSatish Balay 22249b5e25fSSatish Balay PetscFunctionBegin; 22349b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 22482502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 22549b5e25fSSatish Balay } 22649b5e25fSSatish Balay 22749b5e25fSSatish Balay for (i=0; i<n; i++) { 22849b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 22949b5e25fSSatish Balay } 23049b5e25fSSatish Balay PetscFunctionReturn(0); 23149b5e25fSSatish Balay } 23249b5e25fSSatish Balay 23349b5e25fSSatish Balay /* -------------------------------------------------------*/ 23449b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 23549b5e25fSSatish Balay /* -------------------------------------------------------*/ 236d9eff348SSatish Balay #include "petscblaslapack.h" 23749b5e25fSSatish Balay 2384a2ae208SSatish Balay #undef __FUNCT__ 2394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1" 240dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 24149b5e25fSSatish Balay { 24249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 24387828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,zero=0.0; 24449b5e25fSSatish Balay MatScalar *v; 2456849ba73SBarry Smith PetscErrorCode ierr; 24613f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 24749b5e25fSSatish Balay 24849b5e25fSSatish Balay PetscFunctionBegin; 2492dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2501ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2511ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 25249b5e25fSSatish Balay 25349b5e25fSSatish Balay v = a->a; 25449b5e25fSSatish Balay xb = x; 25549b5e25fSSatish Balay 25649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 25749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 258*3cb60589SBarry Smith x1 = *xb++; 259*3cb60589SBarry Smith ib = aj + *ai++; 260831a3094SHong Zhang jmin = 0; 261*3cb60589SBarry Smith /* if we ALWAYS required a diagonal entry then could remove this if test */ 262*3cb60589SBarry Smith /* should we use a tmp to hold the accumulated z[i] */ 263831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 264831a3094SHong Zhang z[i] += *v++ * x[*ib++]; 265831a3094SHong Zhang jmin++; 266831a3094SHong Zhang } 267831a3094SHong Zhang for (j=jmin; j<n; j++) { 26849b5e25fSSatish Balay cval = *ib; 26949b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 27049b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 27149b5e25fSSatish Balay } 27249b5e25fSSatish Balay } 27349b5e25fSSatish Balay 2741ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2751ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 276899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); /* nz = (nz+m)/2 */ 27749b5e25fSSatish Balay PetscFunctionReturn(0); 27849b5e25fSSatish Balay } 27949b5e25fSSatish Balay 2804a2ae208SSatish Balay #undef __FUNCT__ 2814a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 282dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 28349b5e25fSSatish Balay { 28449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 28587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 28649b5e25fSSatish Balay MatScalar *v; 2876849ba73SBarry Smith PetscErrorCode ierr; 28813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 28949b5e25fSSatish Balay 29049b5e25fSSatish Balay PetscFunctionBegin; 2912dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 2921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2931ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 29449b5e25fSSatish Balay 29549b5e25fSSatish Balay v = a->a; 29649b5e25fSSatish Balay xb = x; 29749b5e25fSSatish Balay 29849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 29949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 30049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 30149b5e25fSSatish Balay ib = aj + *ai; 302831a3094SHong Zhang jmin = 0; 3037fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 30449b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 30549b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 306831a3094SHong Zhang v += 4; jmin++; 3077fbae186SHong Zhang } 308831a3094SHong Zhang for (j=jmin; j<n; j++) { 30949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 31049b5e25fSSatish Balay cval = ib[j]*2; 31149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 31249b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 31349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 31449b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 31549b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 31649b5e25fSSatish Balay v += 4; 31749b5e25fSSatish Balay } 31849b5e25fSSatish Balay xb +=2; ai++; 31949b5e25fSSatish Balay } 32049b5e25fSSatish Balay 3211ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3221ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 323899cda47SBarry Smith ierr = PetscLogFlops(8*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 32449b5e25fSSatish Balay PetscFunctionReturn(0); 32549b5e25fSSatish Balay } 32649b5e25fSSatish Balay 3274a2ae208SSatish Balay #undef __FUNCT__ 3284a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 329dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 33049b5e25fSSatish Balay { 33149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 33287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 33349b5e25fSSatish Balay MatScalar *v; 3346849ba73SBarry Smith PetscErrorCode ierr; 33513f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 33649b5e25fSSatish Balay 33749b5e25fSSatish Balay PetscFunctionBegin; 3382dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 3391ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3401ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 34149b5e25fSSatish Balay 34249b5e25fSSatish Balay v = a->a; 34349b5e25fSSatish Balay xb = x; 34449b5e25fSSatish Balay 34549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 34649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 34749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 34849b5e25fSSatish Balay ib = aj + *ai; 349831a3094SHong Zhang jmin = 0; 3507fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 35149b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 35249b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 35349b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 354831a3094SHong Zhang v += 9; jmin++; 3557fbae186SHong Zhang } 356831a3094SHong Zhang for (j=jmin; j<n; j++) { 35749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 35849b5e25fSSatish Balay cval = ib[j]*3; 35949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 36049b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 36149b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 36249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 36349b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 36449b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 36549b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 36649b5e25fSSatish Balay v += 9; 36749b5e25fSSatish Balay } 36849b5e25fSSatish Balay xb +=3; ai++; 36949b5e25fSSatish Balay } 37049b5e25fSSatish Balay 3711ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3721ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 373899cda47SBarry Smith ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 37449b5e25fSSatish Balay PetscFunctionReturn(0); 37549b5e25fSSatish Balay } 37649b5e25fSSatish Balay 3774a2ae208SSatish Balay #undef __FUNCT__ 3784a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 379dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 38049b5e25fSSatish Balay { 38149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 38349b5e25fSSatish Balay MatScalar *v; 3846849ba73SBarry Smith PetscErrorCode ierr; 38513f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 38649b5e25fSSatish Balay 38749b5e25fSSatish Balay PetscFunctionBegin; 3882dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 3891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3901ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 39149b5e25fSSatish Balay 39249b5e25fSSatish Balay v = a->a; 39349b5e25fSSatish Balay xb = x; 39449b5e25fSSatish Balay 39549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 39649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 39749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 39849b5e25fSSatish Balay ib = aj + *ai; 399831a3094SHong Zhang jmin = 0; 4007fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 40149b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 40249b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 40349b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 40449b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 405831a3094SHong Zhang v += 16; jmin++; 4067fbae186SHong Zhang } 407831a3094SHong Zhang for (j=jmin; j<n; j++) { 40849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 40949b5e25fSSatish Balay cval = ib[j]*4; 41049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 41149b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 41249b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 41349b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 41449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 41549b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 41649b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 41749b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 41849b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 41949b5e25fSSatish Balay v += 16; 42049b5e25fSSatish Balay } 42149b5e25fSSatish Balay xb +=4; ai++; 42249b5e25fSSatish Balay } 42349b5e25fSSatish Balay 4241ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4251ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 426899cda47SBarry Smith ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 42749b5e25fSSatish Balay PetscFunctionReturn(0); 42849b5e25fSSatish Balay } 42949b5e25fSSatish Balay 4304a2ae208SSatish Balay #undef __FUNCT__ 4314a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 432dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 43349b5e25fSSatish Balay { 43449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43587828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 43649b5e25fSSatish Balay MatScalar *v; 4376849ba73SBarry Smith PetscErrorCode ierr; 43813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 43949b5e25fSSatish Balay 44049b5e25fSSatish Balay PetscFunctionBegin; 4412dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4421ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4431ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 44449b5e25fSSatish Balay 44549b5e25fSSatish Balay v = a->a; 44649b5e25fSSatish Balay xb = x; 44749b5e25fSSatish Balay 44849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 44949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 45049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 45149b5e25fSSatish Balay ib = aj + *ai; 452831a3094SHong Zhang jmin = 0; 4537fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 45449b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 45549b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 45649b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 45749b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 45849b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 459831a3094SHong Zhang v += 25; jmin++; 4607fbae186SHong Zhang } 461831a3094SHong Zhang for (j=jmin; j<n; j++) { 46249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 46349b5e25fSSatish Balay cval = ib[j]*5; 46449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 46549b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 46649b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 46749b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 46849b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 46949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 47049b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 47149b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 47249b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 47349b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 47449b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 47549b5e25fSSatish Balay v += 25; 47649b5e25fSSatish Balay } 47749b5e25fSSatish Balay xb +=5; ai++; 47849b5e25fSSatish Balay } 47949b5e25fSSatish Balay 4801ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4811ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 482899cda47SBarry Smith ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 48349b5e25fSSatish Balay PetscFunctionReturn(0); 48449b5e25fSSatish Balay } 48549b5e25fSSatish Balay 48649b5e25fSSatish Balay 4874a2ae208SSatish Balay #undef __FUNCT__ 4884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 489dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 49049b5e25fSSatish Balay { 49149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 49287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 49349b5e25fSSatish Balay MatScalar *v; 4946849ba73SBarry Smith PetscErrorCode ierr; 49513f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 49649b5e25fSSatish Balay 49749b5e25fSSatish Balay PetscFunctionBegin; 4982dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 4991ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5001ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 50149b5e25fSSatish Balay 50249b5e25fSSatish Balay v = a->a; 50349b5e25fSSatish Balay xb = x; 50449b5e25fSSatish Balay 50549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 50649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 50749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 50849b5e25fSSatish Balay ib = aj + *ai; 509831a3094SHong Zhang jmin = 0; 5107fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 51149b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 51249b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 51349b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 51449b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 51549b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 51649b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 517831a3094SHong Zhang v += 36; jmin++; 5187fbae186SHong Zhang } 519831a3094SHong Zhang for (j=jmin; j<n; j++) { 52049b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 52149b5e25fSSatish Balay cval = ib[j]*6; 52249b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 52349b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 52449b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 52549b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 52649b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 52749b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 52849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 52949b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 53049b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 53149b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 53249b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 53349b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 53449b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 53549b5e25fSSatish Balay v += 36; 53649b5e25fSSatish Balay } 53749b5e25fSSatish Balay xb +=6; ai++; 53849b5e25fSSatish Balay } 53949b5e25fSSatish Balay 5401ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 542899cda47SBarry Smith ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 54349b5e25fSSatish Balay PetscFunctionReturn(0); 54449b5e25fSSatish Balay } 5454a2ae208SSatish Balay #undef __FUNCT__ 5464a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 547dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 54849b5e25fSSatish Balay { 54949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55087828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 55149b5e25fSSatish Balay MatScalar *v; 5526849ba73SBarry Smith PetscErrorCode ierr; 55313f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 55449b5e25fSSatish Balay 55549b5e25fSSatish Balay PetscFunctionBegin; 5562dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 5571ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5581ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 55949b5e25fSSatish Balay 56049b5e25fSSatish Balay v = a->a; 56149b5e25fSSatish Balay xb = x; 56249b5e25fSSatish Balay 56349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 56449b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 56549b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 56649b5e25fSSatish Balay ib = aj + *ai; 567831a3094SHong Zhang jmin = 0; 5687fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 56949b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 57049b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 57149b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 57249b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 57349b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 57449b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 57549b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 576831a3094SHong Zhang v += 49; jmin++; 5777fbae186SHong Zhang } 578831a3094SHong Zhang for (j=jmin; j<n; j++) { 57949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 58049b5e25fSSatish Balay cval = ib[j]*7; 58149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 58249b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 58349b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 58449b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 58549b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 58649b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 58749b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 58849b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 58949b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 59049b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 59149b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 59249b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 59349b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 59449b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 59549b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 59649b5e25fSSatish Balay v += 49; 59749b5e25fSSatish Balay } 59849b5e25fSSatish Balay xb +=7; ai++; 59949b5e25fSSatish Balay } 6001ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6011ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 602899cda47SBarry Smith ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr); 60349b5e25fSSatish Balay PetscFunctionReturn(0); 60449b5e25fSSatish Balay } 60549b5e25fSSatish Balay 60649b5e25fSSatish Balay /* 60749b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 60849b5e25fSSatish Balay */ 6094a2ae208SSatish Balay #undef __FUNCT__ 6104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 611dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 61249b5e25fSSatish Balay { 61349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 61487828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 6150b60a74dSHong Zhang MatScalar *v; 616dfbe8321SBarry Smith PetscErrorCode ierr; 617899cda47SBarry Smith PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k; 61849b5e25fSSatish Balay 61949b5e25fSSatish Balay PetscFunctionBegin; 6202dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 6211ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 6221ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 62349b5e25fSSatish Balay 62449b5e25fSSatish Balay aj = a->j; 62549b5e25fSSatish Balay v = a->a; 62649b5e25fSSatish Balay ii = a->i; 62749b5e25fSSatish Balay 62849b5e25fSSatish Balay if (!a->mult_work) { 629899cda47SBarry Smith ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 63049b5e25fSSatish Balay } 63149b5e25fSSatish Balay work = a->mult_work; 63249b5e25fSSatish Balay 63349b5e25fSSatish Balay for (i=0; i<mbs; i++) { 63449b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 63549b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 63649b5e25fSSatish Balay 63749b5e25fSSatish Balay /* upper triangular part */ 63849b5e25fSSatish Balay for (j=0; j<n; j++) { 63949b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 64049b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 64149b5e25fSSatish Balay workt += bs; 64249b5e25fSSatish Balay } 64349b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 64449b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 64549b5e25fSSatish Balay 64649b5e25fSSatish Balay /* strict lower triangular part */ 647831a3094SHong Zhang idx = aj+ii[0]; 648831a3094SHong Zhang if (*idx == i){ 64996b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 650831a3094SHong Zhang } 65196b9376eSHong Zhang 65249b5e25fSSatish Balay if (ncols > 0){ 65349b5e25fSSatish Balay workt = work; 65487828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 655831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 656831a3094SHong Zhang for (j=0; j<n; j++) { 657831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 65849b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 65949b5e25fSSatish Balay workt += bs; 66049b5e25fSSatish Balay } 66149b5e25fSSatish Balay } 66249b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 66349b5e25fSSatish Balay } 66449b5e25fSSatish Balay 6651ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6661ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 667899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N)*bs2 - A->rmap.N);CHKERRQ(ierr); 66849b5e25fSSatish Balay PetscFunctionReturn(0); 66949b5e25fSSatish Balay } 67049b5e25fSSatish Balay 6716a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec); 6724a2ae208SSatish Balay #undef __FUNCT__ 6734a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 674dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 67549b5e25fSSatish Balay { 67649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 677bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1; 67849b5e25fSSatish Balay MatScalar *v; 6796849ba73SBarry Smith PetscErrorCode ierr; 68013f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 68149b5e25fSSatish Balay 68249b5e25fSSatish Balay PetscFunctionBegin; 6836a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 6841ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6851ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 68649b5e25fSSatish Balay v = a->a; 68749b5e25fSSatish Balay xb = x; 68849b5e25fSSatish Balay 68949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 69049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 69149b5e25fSSatish Balay x1 = xb[0]; 69249b5e25fSSatish Balay ib = aj + *ai; 693831a3094SHong Zhang jmin = 0; 694831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 695831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 696831a3094SHong Zhang } 697831a3094SHong Zhang for (j=jmin; j<n; j++) { 69849b5e25fSSatish Balay cval = *ib; 69949b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 70049b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 70149b5e25fSSatish Balay } 70249b5e25fSSatish Balay xb++; ai++; 70349b5e25fSSatish Balay } 70449b5e25fSSatish Balay 7051ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 706bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 70749b5e25fSSatish Balay 708899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 70949b5e25fSSatish Balay PetscFunctionReturn(0); 71049b5e25fSSatish Balay } 71149b5e25fSSatish Balay 7124a2ae208SSatish Balay #undef __FUNCT__ 7134a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 714dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 71549b5e25fSSatish Balay { 71649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 717bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2; 71849b5e25fSSatish Balay MatScalar *v; 7196849ba73SBarry Smith PetscErrorCode ierr; 72013f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 72149b5e25fSSatish Balay 72249b5e25fSSatish Balay PetscFunctionBegin; 7236a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 7241ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7251ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 72649b5e25fSSatish Balay 72749b5e25fSSatish Balay v = a->a; 72849b5e25fSSatish Balay xb = x; 72949b5e25fSSatish Balay 73049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 73149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 73249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 73349b5e25fSSatish Balay ib = aj + *ai; 734831a3094SHong Zhang jmin = 0; 7357fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 73649b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 73749b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 738831a3094SHong Zhang v += 4; jmin++; 7397fbae186SHong Zhang } 740831a3094SHong Zhang for (j=jmin; j<n; j++) { 74149b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 74249b5e25fSSatish Balay cval = ib[j]*2; 74349b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 74449b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 74549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 74649b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 74749b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 74849b5e25fSSatish Balay v += 4; 74949b5e25fSSatish Balay } 75049b5e25fSSatish Balay xb +=2; ai++; 75149b5e25fSSatish Balay } 7521ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 753bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 75449b5e25fSSatish Balay 755899cda47SBarry Smith ierr = PetscLogFlops(4*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 75649b5e25fSSatish Balay PetscFunctionReturn(0); 75749b5e25fSSatish Balay } 75849b5e25fSSatish Balay 7594a2ae208SSatish Balay #undef __FUNCT__ 7604a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 761dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 76249b5e25fSSatish Balay { 76349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 764bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3; 76549b5e25fSSatish Balay MatScalar *v; 7666849ba73SBarry Smith PetscErrorCode ierr; 76713f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 76849b5e25fSSatish Balay 76949b5e25fSSatish Balay PetscFunctionBegin; 7706a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 7711ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7721ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 77349b5e25fSSatish Balay 77449b5e25fSSatish Balay v = a->a; 77549b5e25fSSatish Balay xb = x; 77649b5e25fSSatish Balay 77749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 77849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 77949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 78049b5e25fSSatish Balay ib = aj + *ai; 781831a3094SHong Zhang jmin = 0; 7827fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 78349b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 78449b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 78549b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 786831a3094SHong Zhang v += 9; jmin++; 7877fbae186SHong Zhang } 788831a3094SHong Zhang for (j=jmin; j<n; j++) { 78949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 79049b5e25fSSatish Balay cval = ib[j]*3; 79149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 79249b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 79349b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 79449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 79549b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 79649b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 79749b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 79849b5e25fSSatish Balay v += 9; 79949b5e25fSSatish Balay } 80049b5e25fSSatish Balay xb +=3; ai++; 80149b5e25fSSatish Balay } 80249b5e25fSSatish Balay 8031ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 80549b5e25fSSatish Balay 806899cda47SBarry Smith ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 80749b5e25fSSatish Balay PetscFunctionReturn(0); 80849b5e25fSSatish Balay } 80949b5e25fSSatish Balay 8104a2ae208SSatish Balay #undef __FUNCT__ 8114a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 812dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 81349b5e25fSSatish Balay { 81449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 815bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4; 81649b5e25fSSatish Balay MatScalar *v; 8176849ba73SBarry Smith PetscErrorCode ierr; 81813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 81949b5e25fSSatish Balay 82049b5e25fSSatish Balay PetscFunctionBegin; 8216a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 8221ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8231ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 82449b5e25fSSatish Balay 82549b5e25fSSatish Balay v = a->a; 82649b5e25fSSatish Balay xb = x; 82749b5e25fSSatish Balay 82849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 82949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 83049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 83149b5e25fSSatish Balay ib = aj + *ai; 832831a3094SHong Zhang jmin = 0; 8337fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 83449b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 83549b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 83649b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 83749b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 838831a3094SHong Zhang v += 16; jmin++; 8397fbae186SHong Zhang } 840831a3094SHong Zhang for (j=jmin; j<n; j++) { 84149b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 84249b5e25fSSatish Balay cval = ib[j]*4; 84349b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 84449b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 84549b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 84649b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 84749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 84849b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 84949b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 85049b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 85149b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 85249b5e25fSSatish Balay v += 16; 85349b5e25fSSatish Balay } 85449b5e25fSSatish Balay xb +=4; ai++; 85549b5e25fSSatish Balay } 85649b5e25fSSatish Balay 8571ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 85949b5e25fSSatish Balay 860899cda47SBarry Smith ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 86149b5e25fSSatish Balay PetscFunctionReturn(0); 86249b5e25fSSatish Balay } 86349b5e25fSSatish Balay 8644a2ae208SSatish Balay #undef __FUNCT__ 8654a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 866dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 86749b5e25fSSatish Balay { 86849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 869bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5; 87049b5e25fSSatish Balay MatScalar *v; 8716849ba73SBarry Smith PetscErrorCode ierr; 87213f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 87349b5e25fSSatish Balay 87449b5e25fSSatish Balay PetscFunctionBegin; 8756a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 8761ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8771ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 87849b5e25fSSatish Balay 87949b5e25fSSatish Balay v = a->a; 88049b5e25fSSatish Balay xb = x; 88149b5e25fSSatish Balay 88249b5e25fSSatish Balay for (i=0; i<mbs; i++) { 88349b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 88449b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 88549b5e25fSSatish Balay ib = aj + *ai; 886831a3094SHong Zhang jmin = 0; 8877fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 88849b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 88949b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 89049b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 89149b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 89249b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 893831a3094SHong Zhang v += 25; jmin++; 8947fbae186SHong Zhang } 895831a3094SHong Zhang for (j=jmin; j<n; j++) { 89649b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 89749b5e25fSSatish Balay cval = ib[j]*5; 89849b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 89949b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 90049b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 90149b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 90249b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 90349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 90449b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 90549b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 90649b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 90749b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 90849b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 90949b5e25fSSatish Balay v += 25; 91049b5e25fSSatish Balay } 91149b5e25fSSatish Balay xb +=5; ai++; 91249b5e25fSSatish Balay } 91349b5e25fSSatish Balay 9141ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 915bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 91649b5e25fSSatish Balay 917899cda47SBarry Smith ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 91849b5e25fSSatish Balay PetscFunctionReturn(0); 91949b5e25fSSatish Balay } 9204a2ae208SSatish Balay #undef __FUNCT__ 9214a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 922dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 92349b5e25fSSatish Balay { 92449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 925bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6; 92649b5e25fSSatish Balay MatScalar *v; 9276849ba73SBarry Smith PetscErrorCode ierr; 92813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 92949b5e25fSSatish Balay 93049b5e25fSSatish Balay PetscFunctionBegin; 9316a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 9321ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9331ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 93449b5e25fSSatish Balay 93549b5e25fSSatish Balay v = a->a; 93649b5e25fSSatish Balay xb = x; 93749b5e25fSSatish Balay 93849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 93949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 94049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 94149b5e25fSSatish Balay ib = aj + *ai; 942831a3094SHong Zhang jmin = 0; 9437fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 94449b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 94549b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 94649b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 94749b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 94849b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 94949b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 950831a3094SHong Zhang v += 36; jmin++; 9517fbae186SHong Zhang } 952831a3094SHong Zhang for (j=jmin; j<n; j++) { 95349b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 95449b5e25fSSatish Balay cval = ib[j]*6; 95549b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 95649b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 95749b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 95849b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 95949b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 96049b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 96149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 96249b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 96349b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 96449b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 96549b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 96649b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 96749b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 96849b5e25fSSatish Balay v += 36; 96949b5e25fSSatish Balay } 97049b5e25fSSatish Balay xb +=6; ai++; 97149b5e25fSSatish Balay } 97249b5e25fSSatish Balay 9731ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 974bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 97549b5e25fSSatish Balay 976899cda47SBarry Smith ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 97749b5e25fSSatish Balay PetscFunctionReturn(0); 97849b5e25fSSatish Balay } 97949b5e25fSSatish Balay 9804a2ae208SSatish Balay #undef __FUNCT__ 9814a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 982dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 98349b5e25fSSatish Balay { 98449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 985bba805e6SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 98649b5e25fSSatish Balay MatScalar *v; 9876849ba73SBarry Smith PetscErrorCode ierr; 98813f74950SBarry Smith PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin; 98949b5e25fSSatish Balay 99049b5e25fSSatish Balay PetscFunctionBegin; 9916a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 9921ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9931ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 99449b5e25fSSatish Balay 99549b5e25fSSatish Balay v = a->a; 99649b5e25fSSatish Balay xb = x; 99749b5e25fSSatish Balay 99849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 99949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 100049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 100149b5e25fSSatish Balay ib = aj + *ai; 1002831a3094SHong Zhang jmin = 0; 10037fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 100449b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 100549b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 100649b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 100749b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 100849b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 100949b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 101049b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1011831a3094SHong Zhang v += 49; jmin++; 10127fbae186SHong Zhang } 1013831a3094SHong Zhang for (j=jmin; j<n; j++) { 101449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 101549b5e25fSSatish Balay cval = ib[j]*7; 101649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 101749b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 101849b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 101949b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 102049b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 102149b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 102249b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 102349b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 102449b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 102549b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 102649b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 102749b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 102849b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 102949b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 103049b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 103149b5e25fSSatish Balay v += 49; 103249b5e25fSSatish Balay } 103349b5e25fSSatish Balay xb +=7; ai++; 103449b5e25fSSatish Balay } 103549b5e25fSSatish Balay 10361ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1037bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 103849b5e25fSSatish Balay 1039899cda47SBarry Smith ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 104049b5e25fSSatish Balay PetscFunctionReturn(0); 104149b5e25fSSatish Balay } 104249b5e25fSSatish Balay 10434a2ae208SSatish Balay #undef __FUNCT__ 10444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 1045dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 104649b5e25fSSatish Balay { 104749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1048bba805e6SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1049066653e3SSatish Balay MatScalar *v; 1050dfbe8321SBarry Smith PetscErrorCode ierr; 1051899cda47SBarry Smith PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k; 105249b5e25fSSatish Balay 105349b5e25fSSatish Balay PetscFunctionBegin; 10546a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 10551ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 10561ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 105749b5e25fSSatish Balay 105849b5e25fSSatish Balay aj = a->j; 105949b5e25fSSatish Balay v = a->a; 106049b5e25fSSatish Balay ii = a->i; 106149b5e25fSSatish Balay 106249b5e25fSSatish Balay if (!a->mult_work) { 1063899cda47SBarry Smith ierr = PetscMalloc((A->rmap.n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 106449b5e25fSSatish Balay } 106549b5e25fSSatish Balay work = a->mult_work; 106649b5e25fSSatish Balay 106749b5e25fSSatish Balay 106849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 106949b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 107049b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 107149b5e25fSSatish Balay 107249b5e25fSSatish Balay /* upper triangular part */ 107349b5e25fSSatish Balay for (j=0; j<n; j++) { 107449b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 107549b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 107649b5e25fSSatish Balay workt += bs; 107749b5e25fSSatish Balay } 107849b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 107949b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 108049b5e25fSSatish Balay 108149b5e25fSSatish Balay /* strict lower triangular part */ 1082831a3094SHong Zhang idx = aj+ii[0]; 1083831a3094SHong Zhang if (*idx == i){ 108496b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1085831a3094SHong Zhang } 108649b5e25fSSatish Balay if (ncols > 0){ 108749b5e25fSSatish Balay workt = work; 108887828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1089831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1090831a3094SHong Zhang for (j=0; j<n; j++) { 1091831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 109249b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 109349b5e25fSSatish Balay workt += bs; 109449b5e25fSSatish Balay } 109549b5e25fSSatish Balay } 109649b5e25fSSatish Balay 109749b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 109849b5e25fSSatish Balay } 109949b5e25fSSatish Balay 11001ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1101bba805e6SBarry Smith ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 110249b5e25fSSatish Balay 1103899cda47SBarry Smith ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr); 110449b5e25fSSatish Balay PetscFunctionReturn(0); 110549b5e25fSSatish Balay } 110649b5e25fSSatish Balay 11074a2ae208SSatish Balay #undef __FUNCT__ 11084a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1109f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha) 111049b5e25fSSatish Balay { 111149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1112f4df32b1SMatthew Knepley PetscScalar oalpha = alpha; 11134ce68768SBarry Smith PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz; 1114efee365bSSatish Balay PetscErrorCode ierr; 111549b5e25fSSatish Balay 111649b5e25fSSatish Balay PetscFunctionBegin; 1117f4df32b1SMatthew Knepley BLASscal_(&totalnz,&oalpha,a->a,&one); 1118efee365bSSatish Balay ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 111949b5e25fSSatish Balay PetscFunctionReturn(0); 112049b5e25fSSatish Balay } 112149b5e25fSSatish Balay 11224a2ae208SSatish Balay #undef __FUNCT__ 11234a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 1124dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 112549b5e25fSSatish Balay { 112649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 112749b5e25fSSatish Balay MatScalar *v = a->a; 112849b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1129899cda47SBarry Smith PetscInt i,j,k,bs = A->rmap.bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 11306849ba73SBarry Smith PetscErrorCode ierr; 113113f74950SBarry Smith PetscInt *jl,*il,jmin,jmax,nexti,ik,*col; 113249b5e25fSSatish Balay 113349b5e25fSSatish Balay PetscFunctionBegin; 113449b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 113549b5e25fSSatish Balay for (k=0; k<mbs; k++){ 113649b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1137831a3094SHong Zhang col = aj + jmin; 1138831a3094SHong Zhang if (*col == k){ /* diagonal block */ 113949b5e25fSSatish Balay for (i=0; i<bs2; i++){ 114049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 114149b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 114249b5e25fSSatish Balay #else 114349b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 114449b5e25fSSatish Balay #endif 114549b5e25fSSatish Balay } 1146831a3094SHong Zhang jmin++; 1147831a3094SHong Zhang } 1148831a3094SHong Zhang for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 114949b5e25fSSatish Balay for (i=0; i<bs2; i++){ 115049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 115149b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 115249b5e25fSSatish Balay #else 115349b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 115449b5e25fSSatish Balay #endif 115549b5e25fSSatish Balay } 115649b5e25fSSatish Balay } 115749b5e25fSSatish Balay } 115849b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 11590b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */ 11600b8dc8d2SHong Zhang ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr); 11610b8dc8d2SHong Zhang jl = il + mbs; 11620b8dc8d2SHong Zhang sum = (PetscReal*)(jl + mbs); 11630b8dc8d2SHong Zhang for (i=0; i<mbs; i++) jl[i] = mbs; 11640b8dc8d2SHong Zhang il[0] = 0; 116549b5e25fSSatish Balay 116649b5e25fSSatish Balay *norm = 0.0; 116749b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 116849b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 116949b5e25fSSatish Balay /*-- col sum --*/ 117049b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1171831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1172831a3094SHong Zhang at step k */ 117349b5e25fSSatish Balay while (i<mbs){ 117449b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 117549b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 117649b5e25fSSatish Balay for (j=0; j<bs; j++){ 117749b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 117849b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 117949b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 118049b5e25fSSatish Balay } 118149b5e25fSSatish Balay } 118249b5e25fSSatish Balay /* update il, jl */ 1183831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1184831a3094SHong Zhang jmax = a->i[i+1]; 118549b5e25fSSatish Balay if (jmin < jmax){ 118649b5e25fSSatish Balay il[i] = jmin; 118749b5e25fSSatish Balay j = a->j[jmin]; 118849b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 118949b5e25fSSatish Balay } 119049b5e25fSSatish Balay i = nexti; 119149b5e25fSSatish Balay } 119249b5e25fSSatish Balay /*-- row sum --*/ 119349b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 119449b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 119549b5e25fSSatish Balay for (j=0; j<bs; j++){ 119649b5e25fSSatish Balay v = a->a + i*bs2 + j; 119749b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 11980b8dc8d2SHong Zhang sum[j] += PetscAbsScalar(*v); v += bs; 119949b5e25fSSatish Balay } 120049b5e25fSSatish Balay } 120149b5e25fSSatish Balay } 120249b5e25fSSatish Balay /* add k_th block row to il, jl */ 1203831a3094SHong Zhang col = aj+jmin; 1204831a3094SHong Zhang if (*col == k) jmin++; 120549b5e25fSSatish Balay if (jmin < jmax){ 120649b5e25fSSatish Balay il[k] = jmin; 12070b8dc8d2SHong Zhang j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k; 120849b5e25fSSatish Balay } 120949b5e25fSSatish Balay for (j=0; j<bs; j++){ 121049b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 121149b5e25fSSatish Balay } 121249b5e25fSSatish Balay } 121349b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 121449b5e25fSSatish Balay } else { 1215347d480fSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 121649b5e25fSSatish Balay } 121749b5e25fSSatish Balay PetscFunctionReturn(0); 121849b5e25fSSatish Balay } 121949b5e25fSSatish Balay 12204a2ae208SSatish Balay #undef __FUNCT__ 12214a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 1222dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 122349b5e25fSSatish Balay { 122449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1225dfbe8321SBarry Smith PetscErrorCode ierr; 122649b5e25fSSatish Balay 122749b5e25fSSatish Balay PetscFunctionBegin; 122849b5e25fSSatish Balay 122949b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1230899cda47SBarry Smith if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) { 1231ef511fbeSHong Zhang *flg = PETSC_FALSE; 1232ef511fbeSHong Zhang PetscFunctionReturn(0); 123349b5e25fSSatish Balay } 123449b5e25fSSatish Balay 123549b5e25fSSatish Balay /* if the a->i are the same */ 123613f74950SBarry Smith ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1237abc0a331SBarry Smith if (!*flg) { 123849b5e25fSSatish Balay PetscFunctionReturn(0); 123949b5e25fSSatish Balay } 124049b5e25fSSatish Balay 124149b5e25fSSatish Balay /* if a->j are the same */ 124213f74950SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1243abc0a331SBarry Smith if (!*flg) { 124449b5e25fSSatish Balay PetscFunctionReturn(0); 124549b5e25fSSatish Balay } 124649b5e25fSSatish Balay /* if a->a are the same */ 1247899cda47SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(A->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1248935af2e7SHong Zhang PetscFunctionReturn(0); 124949b5e25fSSatish Balay } 125049b5e25fSSatish Balay 12514a2ae208SSatish Balay #undef __FUNCT__ 12524a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 1253dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 125449b5e25fSSatish Balay { 125549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1256dfbe8321SBarry Smith PetscErrorCode ierr; 125713f74950SBarry Smith PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 125887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 125949b5e25fSSatish Balay MatScalar *aa,*aa_j; 126049b5e25fSSatish Balay 126149b5e25fSSatish Balay PetscFunctionBegin; 1262899cda47SBarry Smith bs = A->rmap.bs; 126382799104SHong Zhang if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 126482799104SHong Zhang 126549b5e25fSSatish Balay aa = a->a; 126649b5e25fSSatish Balay ai = a->i; 126749b5e25fSSatish Balay aj = a->j; 126849b5e25fSSatish Balay ambs = a->mbs; 126949b5e25fSSatish Balay bs2 = a->bs2; 127049b5e25fSSatish Balay 12712dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 12721ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 127349b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1274899cda47SBarry Smith if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 127549b5e25fSSatish Balay for (i=0; i<ambs; i++) { 127649b5e25fSSatish Balay j=ai[i]; 127749b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 127849b5e25fSSatish Balay row = i*bs; 127949b5e25fSSatish Balay aa_j = aa + j*bs2; 128082799104SHong Zhang if (A->factor && bs==1){ 128182799104SHong Zhang for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k]; 128282799104SHong Zhang } else { 128349b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 128449b5e25fSSatish Balay } 128549b5e25fSSatish Balay } 128682799104SHong Zhang } 128782799104SHong Zhang 12881ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 128949b5e25fSSatish Balay PetscFunctionReturn(0); 129049b5e25fSSatish Balay } 129149b5e25fSSatish Balay 12924a2ae208SSatish Balay #undef __FUNCT__ 12934a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 1294dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 129549b5e25fSSatish Balay { 129649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 12975e90f9d9SHong Zhang PetscScalar *l,x,*li,*ri; 129849b5e25fSSatish Balay MatScalar *aa,*v; 1299dfbe8321SBarry Smith PetscErrorCode ierr; 13005e90f9d9SHong Zhang PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1301b3bf805bSHong Zhang PetscTruth flg; 130249b5e25fSSatish Balay 130349b5e25fSSatish Balay PetscFunctionBegin; 1304b3bf805bSHong Zhang if (ll != rr){ 1305b3bf805bSHong Zhang ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1306b3bf805bSHong Zhang if (!flg) 1307b3bf805bSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1308b3bf805bSHong Zhang } 1309b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 131049b5e25fSSatish Balay ai = a->i; 131149b5e25fSSatish Balay aj = a->j; 131249b5e25fSSatish Balay aa = a->a; 1313899cda47SBarry Smith m = A->rmap.N; 1314899cda47SBarry Smith bs = A->rmap.bs; 131549b5e25fSSatish Balay mbs = a->mbs; 131649b5e25fSSatish Balay bs2 = a->bs2; 131749b5e25fSSatish Balay 13181ebc52fbSHong Zhang ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 131949b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1320347d480fSBarry Smith if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 132149b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 132249b5e25fSSatish Balay M = ai[i+1] - ai[i]; 132349b5e25fSSatish Balay li = l + i*bs; 132449b5e25fSSatish Balay v = aa + bs2*ai[i]; 132549b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 132649b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 13275e90f9d9SHong Zhang for (k=0; k<bs; k++) { 132849b5e25fSSatish Balay x = ri[k]; 132949b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 133049b5e25fSSatish Balay } 133149b5e25fSSatish Balay } 133249b5e25fSSatish Balay } 13331ebc52fbSHong Zhang ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1334efee365bSSatish Balay ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 133549b5e25fSSatish Balay PetscFunctionReturn(0); 133649b5e25fSSatish Balay } 133749b5e25fSSatish Balay 13384a2ae208SSatish Balay #undef __FUNCT__ 13394a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 1340dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 134149b5e25fSSatish Balay { 134249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 134349b5e25fSSatish Balay 134449b5e25fSSatish Balay PetscFunctionBegin; 1345899cda47SBarry Smith info->rows_global = (double)A->rmap.N; 1346899cda47SBarry Smith info->columns_global = (double)A->rmap.N; 1347899cda47SBarry Smith info->rows_local = (double)A->rmap.N; 1348899cda47SBarry Smith info->columns_local = (double)A->rmap.N; 134949b5e25fSSatish Balay info->block_size = a->bs2; 13506c6c5352SBarry Smith info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */ 13516c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 135249b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 135349b5e25fSSatish Balay info->assemblies = A->num_ass; 135449b5e25fSSatish Balay info->mallocs = a->reallocs; 135549b5e25fSSatish Balay info->memory = A->mem; 135649b5e25fSSatish Balay if (A->factor) { 135749b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 135849b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 135949b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 136049b5e25fSSatish Balay } else { 136149b5e25fSSatish Balay info->fill_ratio_given = 0; 136249b5e25fSSatish Balay info->fill_ratio_needed = 0; 136349b5e25fSSatish Balay info->factor_mallocs = 0; 136449b5e25fSSatish Balay } 136549b5e25fSSatish Balay PetscFunctionReturn(0); 136649b5e25fSSatish Balay } 136749b5e25fSSatish Balay 136849b5e25fSSatish Balay 13694a2ae208SSatish Balay #undef __FUNCT__ 13704a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 1371dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A) 137249b5e25fSSatish Balay { 137349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1374dfbe8321SBarry Smith PetscErrorCode ierr; 137549b5e25fSSatish Balay 137649b5e25fSSatish Balay PetscFunctionBegin; 137749b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 137849b5e25fSSatish Balay PetscFunctionReturn(0); 137949b5e25fSSatish Balay } 1380dc354874SHong Zhang 13814a2ae208SSatish Balay #undef __FUNCT__ 13824a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ" 1383dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1384dc354874SHong Zhang { 1385dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1386dfbe8321SBarry Smith PetscErrorCode ierr; 138713f74950SBarry Smith PetscInt i,j,n,row,col,bs,*ai,*aj,mbs; 1388c3fca9a7SHong Zhang PetscReal atmp; 1389273d9f13SBarry Smith MatScalar *aa; 139087828ca2SBarry Smith PetscScalar zero = 0.0,*x; 139113f74950SBarry Smith PetscInt ncols,brow,bcol,krow,kcol; 1392dc354874SHong Zhang 1393dc354874SHong Zhang PetscFunctionBegin; 1394dc354874SHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1395899cda47SBarry Smith bs = A->rmap.bs; 1396dc354874SHong Zhang aa = a->a; 1397dc354874SHong Zhang ai = a->i; 1398dc354874SHong Zhang aj = a->j; 139944117c81SHong Zhang mbs = a->mbs; 1400dc354874SHong Zhang 14012dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 14021ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1403dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1404899cda47SBarry Smith if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 140544117c81SHong Zhang for (i=0; i<mbs; i++) { 1406d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1407d0f6400bSHong Zhang brow = bs*i; 140844117c81SHong Zhang for (j=0; j<ncols; j++){ 1409d0f6400bSHong Zhang bcol = bs*(*aj); 141044117c81SHong Zhang for (kcol=0; kcol<bs; kcol++){ 1411d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 141244117c81SHong Zhang for (krow=0; krow<bs; krow++){ 1413d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1414d0f6400bSHong Zhang row = brow + krow; /* row index */ 1415c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1416c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 141744117c81SHong Zhang } 141844117c81SHong Zhang } 1419d0f6400bSHong Zhang aj++; 1420dc354874SHong Zhang } 1421dc354874SHong Zhang } 14221ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1423dc354874SHong Zhang PetscFunctionReturn(0); 1424dc354874SHong Zhang } 1425