173f4d377SMatthew Knepley /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/ 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 449b5e25fSSatish Balay #include "src/inline/spops.h" 549b5e25fSSatish Balay #include "src/inline/ilu.h" 649b5e25fSSatish Balay #include "petscbt.h" 73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 849b5e25fSSatish Balay 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 11268466fbSBarry Smith int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov) 1249b5e25fSSatish Balay { 135eee224dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 145333f59eSHong Zhang int brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol, 15*d94109b8SHong Zhang start,end,*ai,*aj,bs,*nidx2; 16*d94109b8SHong Zhang PetscBT table; 17*d94109b8SHong Zhang PetscBT table0; 18*d94109b8SHong Zhang 19*d94109b8SHong Zhang PetscFunctionBegin; 20*d94109b8SHong Zhang mbs = a->mbs; 21*d94109b8SHong Zhang ai = a->i; 22*d94109b8SHong Zhang aj = a->j; 23*d94109b8SHong Zhang bs = a->bs; 24*d94109b8SHong Zhang 25*d94109b8SHong Zhang if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 26*d94109b8SHong Zhang 27*d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr); 28*d94109b8SHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 29*d94109b8SHong Zhang ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr); 30*d94109b8SHong Zhang ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr); 31*d94109b8SHong Zhang 32*d94109b8SHong Zhang for (i=0; i<is_max; i++) { /* for each is */ 33*d94109b8SHong Zhang isz = 0; 34*d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr); 35*d94109b8SHong Zhang 36*d94109b8SHong Zhang /* Extract the indices, assume there can be duplicate entries */ 37*d94109b8SHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 38*d94109b8SHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 39*d94109b8SHong Zhang 40*d94109b8SHong Zhang /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */ 41*d94109b8SHong Zhang for (j=0; j<n ; ++j){ 42*d94109b8SHong Zhang brow = idx[j]/bs; /* convert the indices into block indices */ 43*d94109b8SHong Zhang if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 44*d94109b8SHong Zhang if(!PetscBTLookupSet(table,brow)) { nidx[isz++] = brow;} 45*d94109b8SHong Zhang } 46*d94109b8SHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 47*d94109b8SHong Zhang ierr = ISDestroy(is[i]);CHKERRQ(ierr); 48*d94109b8SHong Zhang 49*d94109b8SHong Zhang k = 0; 50*d94109b8SHong Zhang for (j=0; j<ov; j++){ /* for each overlap */ 51*d94109b8SHong Zhang /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */ 52*d94109b8SHong Zhang ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr); 53*d94109b8SHong Zhang for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]); 54*d94109b8SHong Zhang 55*d94109b8SHong Zhang n = isz; /* length of the updated is[i] */ 56*d94109b8SHong Zhang for (brow=0; brow<mbs; brow++){ 57*d94109b8SHong Zhang start = ai[brow]; end = ai[brow+1]; 58*d94109b8SHong Zhang if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */ 59*d94109b8SHong Zhang for (l = start; l<end ; l++){ 60*d94109b8SHong Zhang bcol = aj[l]; 61*d94109b8SHong Zhang if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;} 62*d94109b8SHong Zhang } 63*d94109b8SHong Zhang k++; 64*d94109b8SHong Zhang if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */ 65*d94109b8SHong Zhang } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */ 66*d94109b8SHong Zhang for (l = start; l<end ; l++){ 67*d94109b8SHong Zhang bcol = aj[l]; 68*d94109b8SHong Zhang if (PetscBTLookup(table0,bcol)){ 69*d94109b8SHong Zhang if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;} 70*d94109b8SHong Zhang break; /* for l = start; l<end ; l++) */ 71*d94109b8SHong Zhang } 72*d94109b8SHong Zhang } 73*d94109b8SHong Zhang } 74*d94109b8SHong Zhang } 75*d94109b8SHong Zhang } /* for each overlap */ 76*d94109b8SHong Zhang 77*d94109b8SHong Zhang /* expand the Index Set */ 78*d94109b8SHong Zhang for (j=0; j<isz; j++) { 79*d94109b8SHong Zhang for (k=0; k<bs; k++) 80*d94109b8SHong Zhang nidx2[j*bs+k] = nidx[j]*bs+k; 81*d94109b8SHong Zhang } 82*d94109b8SHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 83*d94109b8SHong Zhang } 84*d94109b8SHong Zhang ierr = PetscBTDestroy(table);CHKERRQ(ierr); 85*d94109b8SHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 86*d94109b8SHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 87*d94109b8SHong Zhang ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 88*d94109b8SHong Zhang #ifdef VERSION_1 89*d94109b8SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90*d94109b8SHong Zhang int brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol, 915333f59eSHong Zhang start,end,*ai,*aj,bs,*nidx2,*colsearch; 925eee224dSHong Zhang PetscBT table; 935eee224dSHong Zhang 9449b5e25fSSatish Balay PetscFunctionBegin; 955eee224dSHong Zhang mbs = a->mbs; 965eee224dSHong Zhang ai = a->i; 975eee224dSHong Zhang aj = a->j; 985eee224dSHong Zhang bs = a->bs; 995eee224dSHong Zhang 1005eee224dSHong Zhang if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 1015eee224dSHong Zhang 1025eee224dSHong Zhang ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr); 1035333f59eSHong Zhang ierr = PetscMalloc((2*mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 1045333f59eSHong Zhang colsearch = nidx + mbs + 1; 1055eee224dSHong Zhang ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr); 1065eee224dSHong Zhang 1075eee224dSHong Zhang for (i=0; i<is_max; i++) { 1085333f59eSHong Zhang 1095eee224dSHong Zhang /* Initialise the two local arrays */ 1105eee224dSHong Zhang isz = 0; 1115eee224dSHong Zhang ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr); 1125eee224dSHong Zhang 1135eee224dSHong Zhang /* Extract the indices, assume there can be duplicate entries */ 1145eee224dSHong Zhang ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 1155eee224dSHong Zhang ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 1165eee224dSHong Zhang 1175eee224dSHong Zhang /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */ 1185eee224dSHong Zhang for (j=0; j<n ; ++j){ 1195eee224dSHong Zhang bcol = idx[j]/bs; /* convert the indices into block indices */ 1205eee224dSHong Zhang if (bcol >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 1215eee224dSHong Zhang if(!PetscBTLookupSet(table,bcol)) { nidx[isz++] = bcol;} 1225eee224dSHong Zhang } 1235eee224dSHong Zhang ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 1245eee224dSHong Zhang ierr = ISDestroy(is[i]);CHKERRQ(ierr); 1255eee224dSHong Zhang 1265eee224dSHong Zhang k = 0; 1275eee224dSHong Zhang for (j=0; j<ov; j++){ /* for each overlap */ 1285333f59eSHong Zhang /* initialize colsearch - 1295333f59eSHong Zhang colsearch[i] points to the possible non-zero (i,active_col)-th entry in the array aj */ 1305333f59eSHong Zhang for (brow=0; brow<mbs; brow++) colsearch[brow] = ai[brow]; 1315333f59eSHong Zhang 1325333f59eSHong Zhang /* sort nidx[k:isz-1] - needed by col search */ 1335333f59eSHong Zhang ierr = PetscSortInt(isz-k,nidx+k); 1345333f59eSHong Zhang 1355eee224dSHong Zhang n = isz; 1365eee224dSHong Zhang for (; k<n ; k++){ /* do only those brows in nidx[k], which are not done yet */ 1375eee224dSHong Zhang brow = nidx[k]; 1385333f59eSHong Zhang /* search block column */ 1395333f59eSHong Zhang for (l=0; l<brow; l++){ 1405333f59eSHong Zhang if (colsearch[l]<ai[l+1]){ 1415333f59eSHong Zhang bcol = aj[colsearch[l]]; 1425333f59eSHong Zhang while (bcol < brow ) { 1435333f59eSHong Zhang colsearch[l]++; 1445333f59eSHong Zhang if (colsearch[l]<ai[l+1]){ 1455333f59eSHong Zhang bcol = aj[colsearch[l]]; 1465333f59eSHong Zhang } else { 1475333f59eSHong Zhang bcol = mbs; break; 1485333f59eSHong Zhang } 1495333f59eSHong Zhang } 1505333f59eSHong Zhang if (bcol==brow){ 1515333f59eSHong Zhang if (!PetscBTLookupSet(table,l)) {nidx[isz++] = l;} 1525333f59eSHong Zhang colsearch[l]++; 1535333f59eSHong Zhang } 1545333f59eSHong Zhang } 1555333f59eSHong Zhang } 1565333f59eSHong Zhang /* search block row */ 1575eee224dSHong Zhang start = ai[brow]; 1585eee224dSHong Zhang end = ai[brow+1]; 1595eee224dSHong Zhang for (l = start; l<end ; l++){ 1605eee224dSHong Zhang bcol = aj[l]; 1615eee224dSHong Zhang if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;} 1625eee224dSHong Zhang } 1635eee224dSHong Zhang } 1645333f59eSHong Zhang } /* for each overlap */ 1655333f59eSHong Zhang 1665eee224dSHong Zhang /* expand the Index Set */ 1675eee224dSHong Zhang for (j=0; j<isz; j++) { 1685eee224dSHong Zhang for (k=0; k<bs; k++) 1695eee224dSHong Zhang nidx2[j*bs+k] = nidx[j]*bs+k; 1705eee224dSHong Zhang } 1715eee224dSHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 1725eee224dSHong Zhang } 1735eee224dSHong Zhang ierr = PetscBTDestroy(table);CHKERRQ(ierr); 1745eee224dSHong Zhang ierr = PetscFree(nidx);CHKERRQ(ierr); 1755eee224dSHong Zhang ierr = PetscFree(nidx2);CHKERRQ(ierr); 176*d94109b8SHong Zhang #endif /* VERSION_1 */ 1775eee224dSHong Zhang PetscFunctionReturn(0); 17849b5e25fSSatish Balay } 17949b5e25fSSatish Balay 1804a2ae208SSatish Balay #undef __FUNCT__ 1814a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 18249b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 18349b5e25fSSatish Balay { 18449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 18549b5e25fSSatish Balay int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 18649b5e25fSSatish Balay int row,mat_i,*mat_j,tcol,*mat_ilen; 18749b5e25fSSatish Balay int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 18849b5e25fSSatish Balay int *aj = a->j,*ai = a->i; 18949b5e25fSSatish Balay MatScalar *mat_a; 19049b5e25fSSatish Balay Mat C; 19149b5e25fSSatish Balay PetscTruth flag; 19249b5e25fSSatish Balay 19349b5e25fSSatish Balay PetscFunctionBegin; 19449b5e25fSSatish Balay 195ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 19649b5e25fSSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 197347d480fSBarry Smith if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 19849b5e25fSSatish Balay 19949b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 20049b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 20149b5e25fSSatish Balay 202b0a32e0cSBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr); 20349b5e25fSSatish Balay ssmap = smap; 204b0a32e0cSBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr); 20549b5e25fSSatish Balay ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 20649b5e25fSSatish Balay for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 20749b5e25fSSatish Balay /* determine lens of each row */ 20849b5e25fSSatish Balay for (i=0; i<nrows; i++) { 20949b5e25fSSatish Balay kstart = ai[irow[i]]; 21049b5e25fSSatish Balay kend = kstart + a->ilen[irow[i]]; 21149b5e25fSSatish Balay lens[i] = 0; 21249b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 21349b5e25fSSatish Balay if (ssmap[aj[k]]) { 21449b5e25fSSatish Balay lens[i]++; 21549b5e25fSSatish Balay } 21649b5e25fSSatish Balay } 21749b5e25fSSatish Balay } 21849b5e25fSSatish Balay /* Create and fill new matrix */ 21949b5e25fSSatish Balay if (scall == MAT_REUSE_MATRIX) { 22049b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)((*B)->data); 22149b5e25fSSatish Balay 222347d480fSBarry Smith if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 22349b5e25fSSatish Balay ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 22449b5e25fSSatish Balay if (flag == PETSC_FALSE) { 225347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 22649b5e25fSSatish Balay } 22749b5e25fSSatish Balay ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 22849b5e25fSSatish Balay C = *B; 22949b5e25fSSatish Balay } else { 230e2d9671bSKris Buschelman ierr = MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr); 231e2d9671bSKris Buschelman ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 232e2d9671bSKris Buschelman ierr = MatSeqSBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr); 23349b5e25fSSatish Balay } 23449b5e25fSSatish Balay c = (Mat_SeqSBAIJ *)(C->data); 23549b5e25fSSatish Balay for (i=0; i<nrows; i++) { 23649b5e25fSSatish Balay row = irow[i]; 23749b5e25fSSatish Balay kstart = ai[row]; 23849b5e25fSSatish Balay kend = kstart + a->ilen[row]; 23949b5e25fSSatish Balay mat_i = c->i[i]; 24049b5e25fSSatish Balay mat_j = c->j + mat_i; 24149b5e25fSSatish Balay mat_a = c->a + mat_i*bs2; 24249b5e25fSSatish Balay mat_ilen = c->ilen + i; 24349b5e25fSSatish Balay for (k=kstart; k<kend; k++) { 24449b5e25fSSatish Balay if ((tcol=ssmap[a->j[k]])) { 24549b5e25fSSatish Balay *mat_j++ = tcol - 1; 24649b5e25fSSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 24749b5e25fSSatish Balay mat_a += bs2; 24849b5e25fSSatish Balay (*mat_ilen)++; 24949b5e25fSSatish Balay } 25049b5e25fSSatish Balay } 25149b5e25fSSatish Balay } 25249b5e25fSSatish Balay 25349b5e25fSSatish Balay /* Free work space */ 25449b5e25fSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 25549b5e25fSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 25649b5e25fSSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25749b5e25fSSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25849b5e25fSSatish Balay 25949b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 26049b5e25fSSatish Balay *B = C; 26149b5e25fSSatish Balay PetscFunctionReturn(0); 26249b5e25fSSatish Balay } 26349b5e25fSSatish Balay 2644a2ae208SSatish Balay #undef __FUNCT__ 2654a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 26649b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 26749b5e25fSSatish Balay { 26849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 26949b5e25fSSatish Balay IS is1; 27049b5e25fSSatish Balay int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 27149b5e25fSSatish Balay 27249b5e25fSSatish Balay PetscFunctionBegin; 273ac355199SBarry Smith if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 27449b5e25fSSatish Balay 27549b5e25fSSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 27649b5e25fSSatish Balay ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 27749b5e25fSSatish Balay 27849b5e25fSSatish Balay /* Verify if the indices corespond to each element in a block 27949b5e25fSSatish Balay and form the IS with compressed IS */ 28082502324SSatish Balay ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr); 28149b5e25fSSatish Balay iary = vary + a->mbs; 28249b5e25fSSatish Balay ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 28349b5e25fSSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 28449b5e25fSSatish Balay 28549b5e25fSSatish Balay count = 0; 28649b5e25fSSatish Balay for (i=0; i<a->mbs; i++) { 287ac355199SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks"); 28849b5e25fSSatish Balay if (vary[i]==bs) iary[count++] = i; 28949b5e25fSSatish Balay } 29049b5e25fSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 29149b5e25fSSatish Balay 29249b5e25fSSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 29349b5e25fSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 29449b5e25fSSatish Balay 29549b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 29649b5e25fSSatish Balay ISDestroy(is1); 29749b5e25fSSatish Balay PetscFunctionReturn(0); 29849b5e25fSSatish Balay } 29949b5e25fSSatish Balay 3004a2ae208SSatish Balay #undef __FUNCT__ 3014a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 302268466fbSBarry Smith int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 30349b5e25fSSatish Balay { 30449b5e25fSSatish Balay int ierr,i; 30549b5e25fSSatish Balay 30649b5e25fSSatish Balay PetscFunctionBegin; 30749b5e25fSSatish Balay if (scall == MAT_INITIAL_MATRIX) { 30882502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 30949b5e25fSSatish Balay } 31049b5e25fSSatish Balay 31149b5e25fSSatish Balay for (i=0; i<n; i++) { 31249b5e25fSSatish Balay ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 31349b5e25fSSatish Balay } 31449b5e25fSSatish Balay PetscFunctionReturn(0); 31549b5e25fSSatish Balay } 31649b5e25fSSatish Balay 31749b5e25fSSatish Balay /* -------------------------------------------------------*/ 31849b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */ 31949b5e25fSSatish Balay /* -------------------------------------------------------*/ 320d9eff348SSatish Balay #include "petscblaslapack.h" 32149b5e25fSSatish Balay 3224a2ae208SSatish Balay #undef __FUNCT__ 3234a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1" 32449b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 32549b5e25fSSatish Balay { 32649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 32787828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,zero=0.0; 32849b5e25fSSatish Balay MatScalar *v; 329831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 33049b5e25fSSatish Balay 33149b5e25fSSatish Balay PetscFunctionBegin; 33249b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 333b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 334b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 33549b5e25fSSatish Balay 33649b5e25fSSatish Balay v = a->a; 33749b5e25fSSatish Balay xb = x; 33849b5e25fSSatish Balay 33949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 34049b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 34149b5e25fSSatish Balay x1 = xb[0]; 34249b5e25fSSatish Balay ib = aj + *ai; 343831a3094SHong Zhang jmin = 0; 344831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 345831a3094SHong Zhang z[i] += *v++ * x[*ib++]; 346831a3094SHong Zhang jmin++; 347831a3094SHong Zhang } 348831a3094SHong Zhang for (j=jmin; j<n; j++) { 34949b5e25fSSatish Balay cval = *ib; 35049b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 35149b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 35249b5e25fSSatish Balay } 35349b5e25fSSatish Balay xb++; ai++; 35449b5e25fSSatish Balay } 35549b5e25fSSatish Balay 356b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 357b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 3586c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m) - A->m); /* nz = (nz+m)/2 */ 35949b5e25fSSatish Balay PetscFunctionReturn(0); 36049b5e25fSSatish Balay } 36149b5e25fSSatish Balay 3624a2ae208SSatish Balay #undef __FUNCT__ 3634a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2" 36449b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 36549b5e25fSSatish Balay { 36649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 36787828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 36849b5e25fSSatish Balay MatScalar *v; 369831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 37049b5e25fSSatish Balay 37149b5e25fSSatish Balay 37249b5e25fSSatish Balay PetscFunctionBegin; 37349b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 374b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 375b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 37649b5e25fSSatish Balay 37749b5e25fSSatish Balay v = a->a; 37849b5e25fSSatish Balay xb = x; 37949b5e25fSSatish Balay 38049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 38149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 38249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 38349b5e25fSSatish Balay ib = aj + *ai; 384831a3094SHong Zhang jmin = 0; 3857fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 38649b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 38749b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 388831a3094SHong Zhang v += 4; jmin++; 3897fbae186SHong Zhang } 390831a3094SHong Zhang for (j=jmin; j<n; j++) { 39149b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 39249b5e25fSSatish Balay cval = ib[j]*2; 39349b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 39449b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 39549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 39649b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 39749b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 39849b5e25fSSatish Balay v += 4; 39949b5e25fSSatish Balay } 40049b5e25fSSatish Balay xb +=2; ai++; 40149b5e25fSSatish Balay } 40249b5e25fSSatish Balay 403b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 404b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 4056c6c5352SBarry Smith PetscLogFlops(8*(a->nz*2 - A->m) - A->m); 40649b5e25fSSatish Balay PetscFunctionReturn(0); 40749b5e25fSSatish Balay } 40849b5e25fSSatish Balay 4094a2ae208SSatish Balay #undef __FUNCT__ 4104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3" 41149b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 41249b5e25fSSatish Balay { 41349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41487828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 41549b5e25fSSatish Balay MatScalar *v; 416831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 41749b5e25fSSatish Balay 41849b5e25fSSatish Balay 41949b5e25fSSatish Balay PetscFunctionBegin; 42049b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 421b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 422b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 42349b5e25fSSatish Balay 42449b5e25fSSatish Balay v = a->a; 42549b5e25fSSatish Balay xb = x; 42649b5e25fSSatish Balay 42749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 42849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 42949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 43049b5e25fSSatish Balay ib = aj + *ai; 431831a3094SHong Zhang jmin = 0; 4327fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 43349b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 43449b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 43549b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 436831a3094SHong Zhang v += 9; jmin++; 4377fbae186SHong Zhang } 438831a3094SHong Zhang for (j=jmin; j<n; j++) { 43949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 44049b5e25fSSatish Balay cval = ib[j]*3; 44149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 44249b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 44349b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 44449b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 44549b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 44649b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 44749b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 44849b5e25fSSatish Balay v += 9; 44949b5e25fSSatish Balay } 45049b5e25fSSatish Balay xb +=3; ai++; 45149b5e25fSSatish Balay } 45249b5e25fSSatish Balay 453b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 454b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 4556c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m) - A->m); 45649b5e25fSSatish Balay PetscFunctionReturn(0); 45749b5e25fSSatish Balay } 45849b5e25fSSatish Balay 4594a2ae208SSatish Balay #undef __FUNCT__ 4604a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4" 46149b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 46249b5e25fSSatish Balay { 46349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 46487828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 46549b5e25fSSatish Balay MatScalar *v; 466831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 46749b5e25fSSatish Balay 46849b5e25fSSatish Balay PetscFunctionBegin; 46949b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 470b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 471b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 47249b5e25fSSatish Balay 47349b5e25fSSatish Balay v = a->a; 47449b5e25fSSatish Balay xb = x; 47549b5e25fSSatish Balay 47649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 47749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 47849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 47949b5e25fSSatish Balay ib = aj + *ai; 480831a3094SHong Zhang jmin = 0; 4817fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 48249b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 48349b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 48449b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 48549b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 486831a3094SHong Zhang v += 16; jmin++; 4877fbae186SHong Zhang } 488831a3094SHong Zhang for (j=jmin; j<n; j++) { 48949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 49049b5e25fSSatish Balay cval = ib[j]*4; 49149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 49249b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 49349b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 49449b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 49549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 49649b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 49749b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 49849b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 49949b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 50049b5e25fSSatish Balay v += 16; 50149b5e25fSSatish Balay } 50249b5e25fSSatish Balay xb +=4; ai++; 50349b5e25fSSatish Balay } 50449b5e25fSSatish Balay 505b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 506b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 5076c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m) - A->m); 50849b5e25fSSatish Balay PetscFunctionReturn(0); 50949b5e25fSSatish Balay } 51049b5e25fSSatish Balay 5114a2ae208SSatish Balay #undef __FUNCT__ 5124a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5" 51349b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 51449b5e25fSSatish Balay { 51549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 51687828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 51749b5e25fSSatish Balay MatScalar *v; 518831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 51949b5e25fSSatish Balay 52049b5e25fSSatish Balay PetscFunctionBegin; 52149b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 522b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 523b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 52449b5e25fSSatish Balay 52549b5e25fSSatish Balay v = a->a; 52649b5e25fSSatish Balay xb = x; 52749b5e25fSSatish Balay 52849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 52949b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 53049b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 53149b5e25fSSatish Balay ib = aj + *ai; 532831a3094SHong Zhang jmin = 0; 5337fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 53449b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 53549b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 53649b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 53749b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 53849b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 539831a3094SHong Zhang v += 25; jmin++; 5407fbae186SHong Zhang } 541831a3094SHong Zhang for (j=jmin; j<n; j++) { 54249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 54349b5e25fSSatish Balay cval = ib[j]*5; 54449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 54549b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 54649b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 54749b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 54849b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 54949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 55049b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 55149b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 55249b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 55349b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 55449b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 55549b5e25fSSatish Balay v += 25; 55649b5e25fSSatish Balay } 55749b5e25fSSatish Balay xb +=5; ai++; 55849b5e25fSSatish Balay } 55949b5e25fSSatish Balay 560b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 561b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 5626c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m) - A->m); 56349b5e25fSSatish Balay PetscFunctionReturn(0); 56449b5e25fSSatish Balay } 56549b5e25fSSatish Balay 56649b5e25fSSatish Balay 5674a2ae208SSatish Balay #undef __FUNCT__ 5684a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6" 56949b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 57049b5e25fSSatish Balay { 57149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 57287828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 57349b5e25fSSatish Balay MatScalar *v; 574831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 57549b5e25fSSatish Balay 57649b5e25fSSatish Balay PetscFunctionBegin; 57749b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 578b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 579b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 58049b5e25fSSatish Balay 58149b5e25fSSatish Balay v = a->a; 58249b5e25fSSatish Balay xb = x; 58349b5e25fSSatish Balay 58449b5e25fSSatish Balay for (i=0; i<mbs; i++) { 58549b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 58649b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 58749b5e25fSSatish Balay ib = aj + *ai; 588831a3094SHong Zhang jmin = 0; 5897fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 59049b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 59149b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 59249b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 59349b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 59449b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 59549b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 596831a3094SHong Zhang v += 36; jmin++; 5977fbae186SHong Zhang } 598831a3094SHong Zhang for (j=jmin; j<n; j++) { 59949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 60049b5e25fSSatish Balay cval = ib[j]*6; 60149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 60249b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 60349b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 60449b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 60549b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 60649b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 60749b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 60849b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 60949b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 61049b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 61149b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 61249b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 61349b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 61449b5e25fSSatish Balay v += 36; 61549b5e25fSSatish Balay } 61649b5e25fSSatish Balay xb +=6; ai++; 61749b5e25fSSatish Balay } 61849b5e25fSSatish Balay 619b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 620b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 6216c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m) - A->m); 62249b5e25fSSatish Balay PetscFunctionReturn(0); 62349b5e25fSSatish Balay } 6244a2ae208SSatish Balay #undef __FUNCT__ 6254a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7" 62649b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 62749b5e25fSSatish Balay { 62849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 62987828ca2SBarry Smith PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 63049b5e25fSSatish Balay MatScalar *v; 631831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 63249b5e25fSSatish Balay 63349b5e25fSSatish Balay PetscFunctionBegin; 63449b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 635b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 636b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 63749b5e25fSSatish Balay 63849b5e25fSSatish Balay v = a->a; 63949b5e25fSSatish Balay xb = x; 64049b5e25fSSatish Balay 64149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 64249b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 64349b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 64449b5e25fSSatish Balay ib = aj + *ai; 645831a3094SHong Zhang jmin = 0; 6467fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 64749b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 64849b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 64949b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 65049b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 65149b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 65249b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 65349b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 654831a3094SHong Zhang v += 49; jmin++; 6557fbae186SHong Zhang } 656831a3094SHong Zhang for (j=jmin; j<n; j++) { 65749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 65849b5e25fSSatish Balay cval = ib[j]*7; 65949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 66049b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 66149b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 66249b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 66349b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 66449b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 66549b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 66649b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 66749b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 66849b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 66949b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 67049b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 67149b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 67249b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 67349b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 67449b5e25fSSatish Balay v += 49; 67549b5e25fSSatish Balay } 67649b5e25fSSatish Balay xb +=7; ai++; 67749b5e25fSSatish Balay } 678b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 679b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 6806c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m) - A->m); 68149b5e25fSSatish Balay PetscFunctionReturn(0); 68249b5e25fSSatish Balay } 68349b5e25fSSatish Balay 68449b5e25fSSatish Balay /* 68549b5e25fSSatish Balay This will not work with MatScalar == float because it calls the BLAS 68649b5e25fSSatish Balay */ 6874a2ae208SSatish Balay #undef __FUNCT__ 6884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N" 68949b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 69049b5e25fSSatish Balay { 69149b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 69287828ca2SBarry Smith PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 6930b60a74dSHong Zhang MatScalar *v; 694066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 6950b60a74dSHong Zhang int ncols,k; 69649b5e25fSSatish Balay 69749b5e25fSSatish Balay PetscFunctionBegin; 69849b5e25fSSatish Balay ierr = VecSet(&zero,zz);CHKERRQ(ierr); 699b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x; 700b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z; 70149b5e25fSSatish Balay 70249b5e25fSSatish Balay aj = a->j; 70349b5e25fSSatish Balay v = a->a; 70449b5e25fSSatish Balay ii = a->i; 70549b5e25fSSatish Balay 70649b5e25fSSatish Balay if (!a->mult_work) { 70787828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 70849b5e25fSSatish Balay } 70949b5e25fSSatish Balay work = a->mult_work; 71049b5e25fSSatish Balay 71149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 71249b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 71349b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 71449b5e25fSSatish Balay 71549b5e25fSSatish Balay /* upper triangular part */ 71649b5e25fSSatish Balay for (j=0; j<n; j++) { 71749b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 71849b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 71949b5e25fSSatish Balay workt += bs; 72049b5e25fSSatish Balay } 72149b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 72249b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 72349b5e25fSSatish Balay 72449b5e25fSSatish Balay /* strict lower triangular part */ 725831a3094SHong Zhang idx = aj+ii[0]; 726831a3094SHong Zhang if (*idx == i){ 72796b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 728831a3094SHong Zhang } 72996b9376eSHong Zhang 73049b5e25fSSatish Balay if (ncols > 0){ 73149b5e25fSSatish Balay workt = work; 73287828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 733831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 734831a3094SHong Zhang for (j=0; j<n; j++) { 735831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 73649b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 73749b5e25fSSatish Balay workt += bs; 73849b5e25fSSatish Balay } 73949b5e25fSSatish Balay } 74049b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 74149b5e25fSSatish Balay } 74249b5e25fSSatish Balay 743b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 744b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 7456c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m); 74649b5e25fSSatish Balay PetscFunctionReturn(0); 74749b5e25fSSatish Balay } 74849b5e25fSSatish Balay 7494a2ae208SSatish Balay #undef __FUNCT__ 7504a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 75149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 75249b5e25fSSatish Balay { 75349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 75487828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1; 75549b5e25fSSatish Balay MatScalar *v; 756831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 75749b5e25fSSatish Balay 75849b5e25fSSatish Balay PetscFunctionBegin; 759b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 76049b5e25fSSatish Balay if (yy != xx) { 761b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 76249b5e25fSSatish Balay } else { 76349b5e25fSSatish Balay y = x; 76449b5e25fSSatish Balay } 76549b5e25fSSatish Balay if (zz != yy) { 766a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 767b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 768a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 76949b5e25fSSatish Balay } else { 77049b5e25fSSatish Balay z = y; 77149b5e25fSSatish Balay } 77249b5e25fSSatish Balay 77349b5e25fSSatish Balay v = a->a; 77449b5e25fSSatish Balay xb = x; 77549b5e25fSSatish Balay 77649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 77749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th row of A */ 77849b5e25fSSatish Balay x1 = xb[0]; 77949b5e25fSSatish Balay ib = aj + *ai; 780831a3094SHong Zhang jmin = 0; 781831a3094SHong Zhang if (*ib == i) { /* (diag of A)*x */ 782831a3094SHong Zhang z[i] += *v++ * x[*ib++]; jmin++; 783831a3094SHong Zhang } 784831a3094SHong Zhang for (j=jmin; j<n; j++) { 78549b5e25fSSatish Balay cval = *ib; 78649b5e25fSSatish Balay z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 78749b5e25fSSatish Balay z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 78849b5e25fSSatish Balay } 78949b5e25fSSatish Balay xb++; ai++; 79049b5e25fSSatish Balay } 79149b5e25fSSatish Balay 792b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 793b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 794b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 79549b5e25fSSatish Balay 7966c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 79749b5e25fSSatish Balay PetscFunctionReturn(0); 79849b5e25fSSatish Balay } 79949b5e25fSSatish Balay 8004a2ae208SSatish Balay #undef __FUNCT__ 8014a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 80249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 80349b5e25fSSatish Balay { 80449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 80587828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2; 80649b5e25fSSatish Balay MatScalar *v; 807831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 80849b5e25fSSatish Balay 80949b5e25fSSatish Balay PetscFunctionBegin; 810b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 81149b5e25fSSatish Balay if (yy != xx) { 812b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 81349b5e25fSSatish Balay } else { 81449b5e25fSSatish Balay y = x; 81549b5e25fSSatish Balay } 81649b5e25fSSatish Balay if (zz != yy) { 817a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 818b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 819a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 82049b5e25fSSatish Balay } else { 82149b5e25fSSatish Balay z = y; 82249b5e25fSSatish Balay } 82349b5e25fSSatish Balay 82449b5e25fSSatish Balay v = a->a; 82549b5e25fSSatish Balay xb = x; 82649b5e25fSSatish Balay 82749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 82849b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 82949b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; 83049b5e25fSSatish Balay ib = aj + *ai; 831831a3094SHong Zhang jmin = 0; 8327fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 83349b5e25fSSatish Balay z[2*i] += v[0]*x1 + v[2]*x2; 83449b5e25fSSatish Balay z[2*i+1] += v[2]*x1 + v[3]*x2; 835831a3094SHong Zhang v += 4; jmin++; 8367fbae186SHong Zhang } 837831a3094SHong Zhang for (j=jmin; j<n; j++) { 83849b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 83949b5e25fSSatish Balay cval = ib[j]*2; 84049b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2; 84149b5e25fSSatish Balay z[cval+1] += v[2]*x1 + v[3]*x2; 84249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 84349b5e25fSSatish Balay z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 84449b5e25fSSatish Balay z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 84549b5e25fSSatish Balay v += 4; 84649b5e25fSSatish Balay } 84749b5e25fSSatish Balay xb +=2; ai++; 84849b5e25fSSatish Balay } 84949b5e25fSSatish Balay 850b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 851b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 852b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 85349b5e25fSSatish Balay 8546c6c5352SBarry Smith PetscLogFlops(4*(a->nz*2 - A->m)); 85549b5e25fSSatish Balay PetscFunctionReturn(0); 85649b5e25fSSatish Balay } 85749b5e25fSSatish Balay 8584a2ae208SSatish Balay #undef __FUNCT__ 8594a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 86049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 86149b5e25fSSatish Balay { 86249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 86387828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3; 86449b5e25fSSatish Balay MatScalar *v; 865831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 86649b5e25fSSatish Balay 86749b5e25fSSatish Balay PetscFunctionBegin; 868b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 86949b5e25fSSatish Balay if (yy != xx) { 870b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 87149b5e25fSSatish Balay } else { 87249b5e25fSSatish Balay y = x; 87349b5e25fSSatish Balay } 87449b5e25fSSatish Balay if (zz != yy) { 875a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 876b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 877a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 87849b5e25fSSatish Balay } else { 87949b5e25fSSatish Balay z = y; 88049b5e25fSSatish Balay } 88149b5e25fSSatish Balay 88249b5e25fSSatish Balay v = a->a; 88349b5e25fSSatish Balay xb = x; 88449b5e25fSSatish Balay 88549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 88649b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 88749b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 88849b5e25fSSatish Balay ib = aj + *ai; 889831a3094SHong Zhang jmin = 0; 8907fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 89149b5e25fSSatish Balay z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 89249b5e25fSSatish Balay z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 89349b5e25fSSatish Balay z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 894831a3094SHong Zhang v += 9; jmin++; 8957fbae186SHong Zhang } 896831a3094SHong Zhang for (j=jmin; j<n; j++) { 89749b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 89849b5e25fSSatish Balay cval = ib[j]*3; 89949b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 90049b5e25fSSatish Balay z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 90149b5e25fSSatish Balay z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 90249b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 90349b5e25fSSatish Balay z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 90449b5e25fSSatish Balay z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 90549b5e25fSSatish Balay z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 90649b5e25fSSatish Balay v += 9; 90749b5e25fSSatish Balay } 90849b5e25fSSatish Balay xb +=3; ai++; 90949b5e25fSSatish Balay } 91049b5e25fSSatish Balay 911b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 912b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 913b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 91449b5e25fSSatish Balay 9156c6c5352SBarry Smith PetscLogFlops(18*(a->nz*2 - A->m)); 91649b5e25fSSatish Balay PetscFunctionReturn(0); 91749b5e25fSSatish Balay } 91849b5e25fSSatish Balay 9194a2ae208SSatish Balay #undef __FUNCT__ 9204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 92149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 92249b5e25fSSatish Balay { 92349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 92487828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4; 92549b5e25fSSatish Balay MatScalar *v; 926831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 92749b5e25fSSatish Balay 92849b5e25fSSatish Balay PetscFunctionBegin; 929b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 93049b5e25fSSatish Balay if (yy != xx) { 931b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 93249b5e25fSSatish Balay } else { 93349b5e25fSSatish Balay y = x; 93449b5e25fSSatish Balay } 93549b5e25fSSatish Balay if (zz != yy) { 936a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 937b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 938a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 93949b5e25fSSatish Balay } else { 94049b5e25fSSatish Balay z = y; 94149b5e25fSSatish Balay } 94249b5e25fSSatish Balay 94349b5e25fSSatish Balay v = a->a; 94449b5e25fSSatish Balay xb = x; 94549b5e25fSSatish Balay 94649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 94749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 94849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 94949b5e25fSSatish Balay ib = aj + *ai; 950831a3094SHong Zhang jmin = 0; 9517fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 95249b5e25fSSatish Balay z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 95349b5e25fSSatish Balay z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 95449b5e25fSSatish Balay z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 95549b5e25fSSatish Balay z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 956831a3094SHong Zhang v += 16; jmin++; 9577fbae186SHong Zhang } 958831a3094SHong Zhang for (j=jmin; j<n; j++) { 95949b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 96049b5e25fSSatish Balay cval = ib[j]*4; 96149b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 96249b5e25fSSatish Balay z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 96349b5e25fSSatish Balay z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 96449b5e25fSSatish Balay z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 96549b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 96649b5e25fSSatish Balay z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 96749b5e25fSSatish Balay z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 96849b5e25fSSatish Balay z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 96949b5e25fSSatish Balay z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 97049b5e25fSSatish Balay v += 16; 97149b5e25fSSatish Balay } 97249b5e25fSSatish Balay xb +=4; ai++; 97349b5e25fSSatish Balay } 97449b5e25fSSatish Balay 975b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 976b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 977b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 97849b5e25fSSatish Balay 9796c6c5352SBarry Smith PetscLogFlops(32*(a->nz*2 - A->m)); 98049b5e25fSSatish Balay PetscFunctionReturn(0); 98149b5e25fSSatish Balay } 98249b5e25fSSatish Balay 9834a2ae208SSatish Balay #undef __FUNCT__ 9844a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 98549b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 98649b5e25fSSatish Balay { 98749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 98887828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 98949b5e25fSSatish Balay MatScalar *v; 990831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 99149b5e25fSSatish Balay 99249b5e25fSSatish Balay PetscFunctionBegin; 993b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 99449b5e25fSSatish Balay if (yy != xx) { 995b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 99649b5e25fSSatish Balay } else { 99749b5e25fSSatish Balay y = x; 99849b5e25fSSatish Balay } 99949b5e25fSSatish Balay if (zz != yy) { 1000a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1001b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 1002a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 100349b5e25fSSatish Balay } else { 100449b5e25fSSatish Balay z = y; 100549b5e25fSSatish Balay } 100649b5e25fSSatish Balay 100749b5e25fSSatish Balay v = a->a; 100849b5e25fSSatish Balay xb = x; 100949b5e25fSSatish Balay 101049b5e25fSSatish Balay for (i=0; i<mbs; i++) { 101149b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 101249b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 101349b5e25fSSatish Balay ib = aj + *ai; 1014831a3094SHong Zhang jmin = 0; 10157fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 101649b5e25fSSatish Balay z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 101749b5e25fSSatish Balay z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 101849b5e25fSSatish Balay z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 101949b5e25fSSatish Balay z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 102049b5e25fSSatish Balay z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 1021831a3094SHong Zhang v += 25; jmin++; 10227fbae186SHong Zhang } 1023831a3094SHong Zhang for (j=jmin; j<n; j++) { 102449b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 102549b5e25fSSatish Balay cval = ib[j]*5; 102649b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 102749b5e25fSSatish Balay z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 102849b5e25fSSatish Balay z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 102949b5e25fSSatish Balay z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 103049b5e25fSSatish Balay z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 103149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 103249b5e25fSSatish Balay z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 103349b5e25fSSatish Balay z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 103449b5e25fSSatish Balay z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 103549b5e25fSSatish Balay z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 103649b5e25fSSatish Balay z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 103749b5e25fSSatish Balay v += 25; 103849b5e25fSSatish Balay } 103949b5e25fSSatish Balay xb +=5; ai++; 104049b5e25fSSatish Balay } 104149b5e25fSSatish Balay 1042b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1043b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1044b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 104549b5e25fSSatish Balay 10466c6c5352SBarry Smith PetscLogFlops(50*(a->nz*2 - A->m)); 104749b5e25fSSatish Balay PetscFunctionReturn(0); 104849b5e25fSSatish Balay } 10494a2ae208SSatish Balay #undef __FUNCT__ 10504a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 105149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 105249b5e25fSSatish Balay { 105349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 105487828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 105549b5e25fSSatish Balay MatScalar *v; 1056831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 105749b5e25fSSatish Balay 105849b5e25fSSatish Balay PetscFunctionBegin; 1059b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 106049b5e25fSSatish Balay if (yy != xx) { 1061b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 106249b5e25fSSatish Balay } else { 106349b5e25fSSatish Balay y = x; 106449b5e25fSSatish Balay } 106549b5e25fSSatish Balay if (zz != yy) { 1066a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1067b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 1068a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 106949b5e25fSSatish Balay } else { 107049b5e25fSSatish Balay z = y; 107149b5e25fSSatish Balay } 107249b5e25fSSatish Balay 107349b5e25fSSatish Balay v = a->a; 107449b5e25fSSatish Balay xb = x; 107549b5e25fSSatish Balay 107649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 107749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 107849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 107949b5e25fSSatish Balay ib = aj + *ai; 1080831a3094SHong Zhang jmin = 0; 10817fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 108249b5e25fSSatish Balay z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 108349b5e25fSSatish Balay z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 108449b5e25fSSatish Balay z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 108549b5e25fSSatish Balay z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 108649b5e25fSSatish Balay z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 108749b5e25fSSatish Balay z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 1088831a3094SHong Zhang v += 36; jmin++; 10897fbae186SHong Zhang } 1090831a3094SHong Zhang for (j=jmin; j<n; j++) { 109149b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 109249b5e25fSSatish Balay cval = ib[j]*6; 109349b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 109449b5e25fSSatish Balay z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 109549b5e25fSSatish Balay z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 109649b5e25fSSatish Balay z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 109749b5e25fSSatish Balay z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 109849b5e25fSSatish Balay z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 109949b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 110049b5e25fSSatish Balay z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 110149b5e25fSSatish Balay z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 110249b5e25fSSatish Balay z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 110349b5e25fSSatish Balay z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 110449b5e25fSSatish Balay z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 110549b5e25fSSatish Balay z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 110649b5e25fSSatish Balay v += 36; 110749b5e25fSSatish Balay } 110849b5e25fSSatish Balay xb +=6; ai++; 110949b5e25fSSatish Balay } 111049b5e25fSSatish Balay 1111b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1112b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1113b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 111449b5e25fSSatish Balay 11156c6c5352SBarry Smith PetscLogFlops(72*(a->nz*2 - A->m)); 111649b5e25fSSatish Balay PetscFunctionReturn(0); 111749b5e25fSSatish Balay } 111849b5e25fSSatish Balay 11194a2ae208SSatish Balay #undef __FUNCT__ 11204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 112149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 112249b5e25fSSatish Balay { 112349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 112487828ca2SBarry Smith PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 112549b5e25fSSatish Balay MatScalar *v; 1126831a3094SHong Zhang int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 112749b5e25fSSatish Balay 112849b5e25fSSatish Balay PetscFunctionBegin; 1129b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 113049b5e25fSSatish Balay if (yy != xx) { 1131b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 113249b5e25fSSatish Balay } else { 113349b5e25fSSatish Balay y = x; 113449b5e25fSSatish Balay } 113549b5e25fSSatish Balay if (zz != yy) { 1136a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1137b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); 1138a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 113949b5e25fSSatish Balay } else { 114049b5e25fSSatish Balay z = y; 114149b5e25fSSatish Balay } 114249b5e25fSSatish Balay 114349b5e25fSSatish Balay v = a->a; 114449b5e25fSSatish Balay xb = x; 114549b5e25fSSatish Balay 114649b5e25fSSatish Balay for (i=0; i<mbs; i++) { 114749b5e25fSSatish Balay n = ai[1] - ai[0]; /* length of i_th block row of A */ 114849b5e25fSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 114949b5e25fSSatish Balay ib = aj + *ai; 1150831a3094SHong Zhang jmin = 0; 11517fbae186SHong Zhang if (*ib == i){ /* (diag of A)*x */ 115249b5e25fSSatish Balay z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 115349b5e25fSSatish Balay z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 115449b5e25fSSatish Balay z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 115549b5e25fSSatish Balay z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 115649b5e25fSSatish Balay z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 115749b5e25fSSatish Balay z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 115849b5e25fSSatish Balay z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1159831a3094SHong Zhang v += 49; jmin++; 11607fbae186SHong Zhang } 1161831a3094SHong Zhang for (j=jmin; j<n; j++) { 116249b5e25fSSatish Balay /* (strict lower triangular part of A)*x */ 116349b5e25fSSatish Balay cval = ib[j]*7; 116449b5e25fSSatish Balay z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 116549b5e25fSSatish Balay z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 116649b5e25fSSatish Balay z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 116749b5e25fSSatish Balay z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 116849b5e25fSSatish Balay z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 116949b5e25fSSatish Balay z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 117049b5e25fSSatish Balay z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 117149b5e25fSSatish Balay /* (strict upper triangular part of A)*x */ 117249b5e25fSSatish Balay z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 117349b5e25fSSatish Balay z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 117449b5e25fSSatish Balay z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 117549b5e25fSSatish Balay z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 117649b5e25fSSatish Balay z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 117749b5e25fSSatish Balay z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 117849b5e25fSSatish Balay z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 117949b5e25fSSatish Balay v += 49; 118049b5e25fSSatish Balay } 118149b5e25fSSatish Balay xb +=7; ai++; 118249b5e25fSSatish Balay } 118349b5e25fSSatish Balay 1184b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1185b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1186b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 118749b5e25fSSatish Balay 11886c6c5352SBarry Smith PetscLogFlops(98*(a->nz*2 - A->m)); 118949b5e25fSSatish Balay PetscFunctionReturn(0); 119049b5e25fSSatish Balay } 119149b5e25fSSatish Balay 11924a2ae208SSatish Balay #undef __FUNCT__ 11934a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 119449b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 119549b5e25fSSatish Balay { 119649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 119787828ca2SBarry Smith PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1198066653e3SSatish Balay MatScalar *v; 1199066653e3SSatish Balay int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1200066653e3SSatish Balay int ncols,k; 120149b5e25fSSatish Balay 120249b5e25fSSatish Balay PetscFunctionBegin; 1203b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x; 120449b5e25fSSatish Balay if (yy != xx) { 1205b1d4fb26SBarry Smith ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 120649b5e25fSSatish Balay } else { 120749b5e25fSSatish Balay y = x; 120849b5e25fSSatish Balay } 120949b5e25fSSatish Balay if (zz != yy) { 1210a9d4b620SHong Zhang /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1211b1d4fb26SBarry Smith ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z; 1212a9d4b620SHong Zhang ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 121349b5e25fSSatish Balay } else { 121449b5e25fSSatish Balay z = y; 121549b5e25fSSatish Balay } 121649b5e25fSSatish Balay 121749b5e25fSSatish Balay aj = a->j; 121849b5e25fSSatish Balay v = a->a; 121949b5e25fSSatish Balay ii = a->i; 122049b5e25fSSatish Balay 122149b5e25fSSatish Balay if (!a->mult_work) { 122287828ca2SBarry Smith ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 122349b5e25fSSatish Balay } 122449b5e25fSSatish Balay work = a->mult_work; 122549b5e25fSSatish Balay 122649b5e25fSSatish Balay 122749b5e25fSSatish Balay for (i=0; i<mbs; i++) { 122849b5e25fSSatish Balay n = ii[1] - ii[0]; ncols = n*bs; 122949b5e25fSSatish Balay workt = work; idx=aj+ii[0]; 123049b5e25fSSatish Balay 123149b5e25fSSatish Balay /* upper triangular part */ 123249b5e25fSSatish Balay for (j=0; j<n; j++) { 123349b5e25fSSatish Balay xb = x_ptr + bs*(*idx++); 123449b5e25fSSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 123549b5e25fSSatish Balay workt += bs; 123649b5e25fSSatish Balay } 123749b5e25fSSatish Balay /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 123849b5e25fSSatish Balay Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 123949b5e25fSSatish Balay 124049b5e25fSSatish Balay /* strict lower triangular part */ 1241831a3094SHong Zhang idx = aj+ii[0]; 1242831a3094SHong Zhang if (*idx == i){ 124396b9376eSHong Zhang ncols -= bs; v += bs2; idx++; n--; 1244831a3094SHong Zhang } 124549b5e25fSSatish Balay if (ncols > 0){ 124649b5e25fSSatish Balay workt = work; 124787828ca2SBarry Smith ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1248831a3094SHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1249831a3094SHong Zhang for (j=0; j<n; j++) { 1250831a3094SHong Zhang zb = z_ptr + bs*(*idx++); 1251831a3094SHong Zhang /* idx++; */ 125249b5e25fSSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 125349b5e25fSSatish Balay workt += bs; 125449b5e25fSSatish Balay } 125549b5e25fSSatish Balay } 125649b5e25fSSatish Balay 125749b5e25fSSatish Balay x += bs; v += n*bs2; z += bs; ii++; 125849b5e25fSSatish Balay } 125949b5e25fSSatish Balay 1260b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1261b1d4fb26SBarry Smith if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1262b1d4fb26SBarry Smith if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr); 126349b5e25fSSatish Balay 12646c6c5352SBarry Smith PetscLogFlops(2*(a->nz*2 - A->m)); 126549b5e25fSSatish Balay PetscFunctionReturn(0); 126649b5e25fSSatish Balay } 126749b5e25fSSatish Balay 12684a2ae208SSatish Balay #undef __FUNCT__ 12694a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqSBAIJ" 127049b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 127149b5e25fSSatish Balay { 1272eeeff2ecSHong Zhang int ierr; 1273eeeff2ecSHong Zhang 127449b5e25fSSatish Balay PetscFunctionBegin; 1275eeeff2ecSHong Zhang ierr = MatMult(A,xx,zz);CHKERRQ(ierr); 1276eeeff2ecSHong Zhang PetscFunctionReturn(0); 127749b5e25fSSatish Balay } 127849b5e25fSSatish Balay 12794a2ae208SSatish Balay #undef __FUNCT__ 12804a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ" 128149b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 128249b5e25fSSatish Balay 128349b5e25fSSatish Balay { 1284eeeff2ecSHong Zhang int ierr; 1285eeeff2ecSHong Zhang 128649b5e25fSSatish Balay PetscFunctionBegin; 1287eeeff2ecSHong Zhang ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1288eeeff2ecSHong Zhang PetscFunctionReturn(0); 128949b5e25fSSatish Balay } 129049b5e25fSSatish Balay 12914a2ae208SSatish Balay #undef __FUNCT__ 12924a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ" 1293268466fbSBarry Smith int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA) 129449b5e25fSSatish Balay { 129549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 12966c6c5352SBarry Smith int one = 1,totalnz = a->bs2*a->nz; 129749b5e25fSSatish Balay 129849b5e25fSSatish Balay PetscFunctionBegin; 1299268466fbSBarry Smith BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one); 1300b0a32e0cSBarry Smith PetscLogFlops(totalnz); 130149b5e25fSSatish Balay PetscFunctionReturn(0); 130249b5e25fSSatish Balay } 130349b5e25fSSatish Balay 13044a2ae208SSatish Balay #undef __FUNCT__ 13054a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ" 130649b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 130749b5e25fSSatish Balay { 130849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 130949b5e25fSSatish Balay MatScalar *v = a->a; 131049b5e25fSSatish Balay PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1311831a3094SHong Zhang int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 1312831a3094SHong Zhang int *jl,*il,jmin,jmax,ierr,nexti,ik,*col; 131349b5e25fSSatish Balay 131449b5e25fSSatish Balay PetscFunctionBegin; 131549b5e25fSSatish Balay if (type == NORM_FROBENIUS) { 131649b5e25fSSatish Balay for (k=0; k<mbs; k++){ 131749b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 1318831a3094SHong Zhang col = aj + jmin; 1319831a3094SHong Zhang if (*col == k){ /* diagonal block */ 132049b5e25fSSatish Balay for (i=0; i<bs2; i++){ 132149b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 132249b5e25fSSatish Balay sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 132349b5e25fSSatish Balay #else 132449b5e25fSSatish Balay sum_diag += (*v)*(*v); v++; 132549b5e25fSSatish Balay #endif 132649b5e25fSSatish Balay } 1327831a3094SHong Zhang jmin++; 1328831a3094SHong Zhang } 1329831a3094SHong Zhang for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 133049b5e25fSSatish Balay for (i=0; i<bs2; i++){ 133149b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX) 133249b5e25fSSatish Balay sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 133349b5e25fSSatish Balay #else 133449b5e25fSSatish Balay sum_off += (*v)*(*v); v++; 133549b5e25fSSatish Balay #endif 133649b5e25fSSatish Balay } 133749b5e25fSSatish Balay } 133849b5e25fSSatish Balay } 133949b5e25fSSatish Balay *norm = sqrt(sum_diag + 2*sum_off); 134049b5e25fSSatish Balay 134149b5e25fSSatish Balay } else if (type == NORM_INFINITY) { /* maximum row sum */ 134282502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr); 134382502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 134482502324SSatish Balay ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr); 134549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 134649b5e25fSSatish Balay jl[i] = mbs; il[0] = 0; 134749b5e25fSSatish Balay } 134849b5e25fSSatish Balay 134949b5e25fSSatish Balay *norm = 0.0; 135049b5e25fSSatish Balay for (k=0; k<mbs; k++) { /* k_th block row */ 135149b5e25fSSatish Balay for (j=0; j<bs; j++) sum[j]=0.0; 135249b5e25fSSatish Balay 135349b5e25fSSatish Balay /*-- col sum --*/ 135449b5e25fSSatish Balay i = jl[k]; /* first |A(i,k)| to be added */ 1355831a3094SHong Zhang /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1356831a3094SHong Zhang at step k */ 135749b5e25fSSatish Balay while (i<mbs){ 135849b5e25fSSatish Balay nexti = jl[i]; /* next block row to be added */ 135949b5e25fSSatish Balay ik = il[i]; /* block index of A(i,k) in the array a */ 136049b5e25fSSatish Balay for (j=0; j<bs; j++){ 136149b5e25fSSatish Balay v = a->a + ik*bs2 + j*bs; 136249b5e25fSSatish Balay for (k1=0; k1<bs; k1++) { 136349b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); v++; 136449b5e25fSSatish Balay } 136549b5e25fSSatish Balay } 136649b5e25fSSatish Balay /* update il, jl */ 1367831a3094SHong Zhang jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1368831a3094SHong Zhang jmax = a->i[i+1]; 136949b5e25fSSatish Balay if (jmin < jmax){ 137049b5e25fSSatish Balay il[i] = jmin; 137149b5e25fSSatish Balay j = a->j[jmin]; 137249b5e25fSSatish Balay jl[i] = jl[j]; jl[j]=i; 137349b5e25fSSatish Balay } 137449b5e25fSSatish Balay i = nexti; 137549b5e25fSSatish Balay } 137649b5e25fSSatish Balay 137749b5e25fSSatish Balay /*-- row sum --*/ 137849b5e25fSSatish Balay jmin = a->i[k]; jmax = a->i[k+1]; 137949b5e25fSSatish Balay for (i=jmin; i<jmax; i++) { 138049b5e25fSSatish Balay for (j=0; j<bs; j++){ 138149b5e25fSSatish Balay v = a->a + i*bs2 + j; 138249b5e25fSSatish Balay for (k1=0; k1<bs; k1++){ 138349b5e25fSSatish Balay sum[j] += PetscAbsScalar(*v); 138449b5e25fSSatish Balay v += bs; 138549b5e25fSSatish Balay } 138649b5e25fSSatish Balay } 138749b5e25fSSatish Balay } 138849b5e25fSSatish Balay /* add k_th block row to il, jl */ 1389831a3094SHong Zhang col = aj+jmin; 1390831a3094SHong Zhang if (*col == k) jmin++; 139149b5e25fSSatish Balay if (jmin < jmax){ 139249b5e25fSSatish Balay il[k] = jmin; 139349b5e25fSSatish Balay j = a->j[jmin]; 139449b5e25fSSatish Balay jl[k] = jl[j]; jl[j] = k; 139549b5e25fSSatish Balay } 139649b5e25fSSatish Balay for (j=0; j<bs; j++){ 139749b5e25fSSatish Balay if (sum[j] > *norm) *norm = sum[j]; 139849b5e25fSSatish Balay } 139949b5e25fSSatish Balay } 140049b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 140149b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 140249b5e25fSSatish Balay ierr = PetscFree(sum);CHKERRQ(ierr); 140349b5e25fSSatish Balay } else { 1404347d480fSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 140549b5e25fSSatish Balay } 140649b5e25fSSatish Balay PetscFunctionReturn(0); 140749b5e25fSSatish Balay } 140849b5e25fSSatish Balay 14094a2ae208SSatish Balay #undef __FUNCT__ 14104a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ" 141149b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 141249b5e25fSSatish Balay { 141349b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 141449b5e25fSSatish Balay int ierr; 141549b5e25fSSatish Balay 141649b5e25fSSatish Balay PetscFunctionBegin; 141749b5e25fSSatish Balay 141849b5e25fSSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 14196c6c5352SBarry Smith if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) { 1420ef511fbeSHong Zhang *flg = PETSC_FALSE; 1421ef511fbeSHong Zhang PetscFunctionReturn(0); 142249b5e25fSSatish Balay } 142349b5e25fSSatish Balay 142449b5e25fSSatish Balay /* if the a->i are the same */ 142549b5e25fSSatish Balay ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 142649b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 142749b5e25fSSatish Balay PetscFunctionReturn(0); 142849b5e25fSSatish Balay } 142949b5e25fSSatish Balay 143049b5e25fSSatish Balay /* if a->j are the same */ 14316c6c5352SBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr); 143249b5e25fSSatish Balay if (*flg == PETSC_FALSE) { 143349b5e25fSSatish Balay PetscFunctionReturn(0); 143449b5e25fSSatish Balay } 143549b5e25fSSatish Balay /* if a->a are the same */ 14366c6c5352SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 143749b5e25fSSatish Balay 1438935af2e7SHong Zhang PetscFunctionReturn(0); 143949b5e25fSSatish Balay } 144049b5e25fSSatish Balay 14414a2ae208SSatish Balay #undef __FUNCT__ 14424a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 144349b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 144449b5e25fSSatish Balay { 144549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 144649b5e25fSSatish Balay int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 144787828ca2SBarry Smith PetscScalar *x,zero = 0.0; 144849b5e25fSSatish Balay MatScalar *aa,*aa_j; 144949b5e25fSSatish Balay 145049b5e25fSSatish Balay PetscFunctionBegin; 145149b5e25fSSatish Balay bs = a->bs; 145282799104SHong Zhang if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); 145382799104SHong Zhang 145449b5e25fSSatish Balay aa = a->a; 145549b5e25fSSatish Balay ai = a->i; 145649b5e25fSSatish Balay aj = a->j; 145749b5e25fSSatish Balay ambs = a->mbs; 145849b5e25fSSatish Balay bs2 = a->bs2; 145949b5e25fSSatish Balay 146049b5e25fSSatish Balay ierr = VecSet(&zero,v);CHKERRQ(ierr); 1461b1d4fb26SBarry Smith ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 146249b5e25fSSatish Balay ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1463ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 146449b5e25fSSatish Balay for (i=0; i<ambs; i++) { 146549b5e25fSSatish Balay j=ai[i]; 146649b5e25fSSatish Balay if (aj[j] == i) { /* if this is a diagonal element */ 146749b5e25fSSatish Balay row = i*bs; 146849b5e25fSSatish Balay aa_j = aa + j*bs2; 146982799104SHong Zhang if (A->factor && bs==1){ 147082799104SHong Zhang for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k]; 147182799104SHong Zhang } else { 147249b5e25fSSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 147349b5e25fSSatish Balay } 147449b5e25fSSatish Balay } 147582799104SHong Zhang } 147682799104SHong Zhang 1477b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 147849b5e25fSSatish Balay PetscFunctionReturn(0); 147949b5e25fSSatish Balay } 148049b5e25fSSatish Balay 14814a2ae208SSatish Balay #undef __FUNCT__ 14824a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 148349b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 148449b5e25fSSatish Balay { 148549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148687828ca2SBarry Smith PetscScalar *l,*r,x,*li,*ri; 148749b5e25fSSatish Balay MatScalar *aa,*v; 1488066653e3SSatish Balay int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 148949b5e25fSSatish Balay 149049b5e25fSSatish Balay PetscFunctionBegin; 149149b5e25fSSatish Balay ai = a->i; 149249b5e25fSSatish Balay aj = a->j; 149349b5e25fSSatish Balay aa = a->a; 1494ef511fbeSHong Zhang m = A->m; 149549b5e25fSSatish Balay bs = a->bs; 149649b5e25fSSatish Balay mbs = a->mbs; 149749b5e25fSSatish Balay bs2 = a->bs2; 149849b5e25fSSatish Balay 149949b5e25fSSatish Balay if (ll != rr) { 1500347d480fSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 150149b5e25fSSatish Balay } 150249b5e25fSSatish Balay if (ll) { 1503b1d4fb26SBarry Smith ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr); 150449b5e25fSSatish Balay ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1505347d480fSBarry Smith if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 150649b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 150749b5e25fSSatish Balay M = ai[i+1] - ai[i]; 150849b5e25fSSatish Balay li = l + i*bs; 150949b5e25fSSatish Balay v = aa + bs2*ai[i]; 151049b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 151149b5e25fSSatish Balay for (k=0; k<bs2; k++) { 151249b5e25fSSatish Balay (*v++) *= li[k%bs]; 151349b5e25fSSatish Balay } 151449b5e25fSSatish Balay #ifdef CONT 151549b5e25fSSatish Balay /* will be used to replace the above loop */ 151649b5e25fSSatish Balay ri = l + bs*aj[ai[i]+j]; 151749b5e25fSSatish Balay for (k=0; k<bs; k++) { /* column value */ 151849b5e25fSSatish Balay x = ri[k]; 151949b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 152049b5e25fSSatish Balay } 152149b5e25fSSatish Balay #endif 152249b5e25fSSatish Balay 152349b5e25fSSatish Balay } 152449b5e25fSSatish Balay } 1525b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr); 15266c6c5352SBarry Smith PetscLogFlops(2*a->nz); 152749b5e25fSSatish Balay } 152849b5e25fSSatish Balay /* will be deleted */ 152949b5e25fSSatish Balay if (rr) { 1530b1d4fb26SBarry Smith ierr = VecGetArrayFast(rr,&r);CHKERRQ(ierr); 153149b5e25fSSatish Balay ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1532347d480fSBarry Smith if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 153349b5e25fSSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 153449b5e25fSSatish Balay M = ai[i+1] - ai[i]; 153549b5e25fSSatish Balay v = aa + bs2*ai[i]; 153649b5e25fSSatish Balay for (j=0; j<M; j++) { /* for each block */ 153749b5e25fSSatish Balay ri = r + bs*aj[ai[i]+j]; 153849b5e25fSSatish Balay for (k=0; k<bs; k++) { 153949b5e25fSSatish Balay x = ri[k]; 154049b5e25fSSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 154149b5e25fSSatish Balay } 154249b5e25fSSatish Balay } 154349b5e25fSSatish Balay } 1544b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(rr,&r);CHKERRQ(ierr); 15456c6c5352SBarry Smith PetscLogFlops(a->nz); 154649b5e25fSSatish Balay } 154749b5e25fSSatish Balay PetscFunctionReturn(0); 154849b5e25fSSatish Balay } 154949b5e25fSSatish Balay 15504a2ae208SSatish Balay #undef __FUNCT__ 15514a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 155249b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 155349b5e25fSSatish Balay { 155449b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 155549b5e25fSSatish Balay 155649b5e25fSSatish Balay PetscFunctionBegin; 1557ef511fbeSHong Zhang info->rows_global = (double)A->m; 1558ef511fbeSHong Zhang info->columns_global = (double)A->m; 1559ef511fbeSHong Zhang info->rows_local = (double)A->m; 1560ef511fbeSHong Zhang info->columns_local = (double)A->m; 156149b5e25fSSatish Balay info->block_size = a->bs2; 15626c6c5352SBarry Smith info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */ 15636c6c5352SBarry Smith info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ 156449b5e25fSSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 156549b5e25fSSatish Balay info->assemblies = A->num_ass; 156649b5e25fSSatish Balay info->mallocs = a->reallocs; 156749b5e25fSSatish Balay info->memory = A->mem; 156849b5e25fSSatish Balay if (A->factor) { 156949b5e25fSSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 157049b5e25fSSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 157149b5e25fSSatish Balay info->factor_mallocs = A->info.factor_mallocs; 157249b5e25fSSatish Balay } else { 157349b5e25fSSatish Balay info->fill_ratio_given = 0; 157449b5e25fSSatish Balay info->fill_ratio_needed = 0; 157549b5e25fSSatish Balay info->factor_mallocs = 0; 157649b5e25fSSatish Balay } 157749b5e25fSSatish Balay PetscFunctionReturn(0); 157849b5e25fSSatish Balay } 157949b5e25fSSatish Balay 158049b5e25fSSatish Balay 15814a2ae208SSatish Balay #undef __FUNCT__ 15824a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 158349b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A) 158449b5e25fSSatish Balay { 158549b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 158649b5e25fSSatish Balay int ierr; 158749b5e25fSSatish Balay 158849b5e25fSSatish Balay PetscFunctionBegin; 158949b5e25fSSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 159049b5e25fSSatish Balay PetscFunctionReturn(0); 159149b5e25fSSatish Balay } 1592dc354874SHong Zhang 15934a2ae208SSatish Balay #undef __FUNCT__ 15944a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ" 1595dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1596dc354874SHong Zhang { 1597dc354874SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1598d0f6400bSHong Zhang int ierr,i,j,n,row,col,bs,*ai,*aj,mbs; 1599c3fca9a7SHong Zhang PetscReal atmp; 1600273d9f13SBarry Smith MatScalar *aa; 160187828ca2SBarry Smith PetscScalar zero = 0.0,*x; 1602d0f6400bSHong Zhang int ncols,brow,bcol,krow,kcol; 1603dc354874SHong Zhang 1604dc354874SHong Zhang PetscFunctionBegin; 1605dc354874SHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1606dc354874SHong Zhang bs = a->bs; 1607dc354874SHong Zhang aa = a->a; 1608dc354874SHong Zhang ai = a->i; 1609dc354874SHong Zhang aj = a->j; 161044117c81SHong Zhang mbs = a->mbs; 1611dc354874SHong Zhang 1612dc354874SHong Zhang ierr = VecSet(&zero,v);CHKERRQ(ierr); 1613b1d4fb26SBarry Smith ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 1614dc354874SHong Zhang ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1615ef511fbeSHong Zhang if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 161644117c81SHong Zhang for (i=0; i<mbs; i++) { 1617d0f6400bSHong Zhang ncols = ai[1] - ai[0]; ai++; 1618d0f6400bSHong Zhang brow = bs*i; 161944117c81SHong Zhang for (j=0; j<ncols; j++){ 1620d0f6400bSHong Zhang bcol = bs*(*aj); 162144117c81SHong Zhang for (kcol=0; kcol<bs; kcol++){ 1622d0f6400bSHong Zhang col = bcol + kcol; /* col index */ 162344117c81SHong Zhang for (krow=0; krow<bs; krow++){ 1624d0f6400bSHong Zhang atmp = PetscAbsScalar(*aa); aa++; 1625d0f6400bSHong Zhang row = brow + krow; /* row index */ 162644117c81SHong Zhang /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 1627c3fca9a7SHong Zhang if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1628c3fca9a7SHong Zhang if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 162944117c81SHong Zhang } 163044117c81SHong Zhang } 1631d0f6400bSHong Zhang aj++; 1632dc354874SHong Zhang } 1633dc354874SHong Zhang } 1634b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 1635dc354874SHong Zhang PetscFunctionReturn(0); 1636dc354874SHong Zhang } 1637