1be1d678aSKris Buschelman #define PETSCMAT_DLL 2cac129eeSSatish Balay 370f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 42d61bbb3SSatish Balay #include "src/inline/spops.h" 53f1db9ecSBarry Smith #include "src/inline/ilu.h" 60a835dfdSSatish Balay #include "petscbt.h" 7cac129eeSSatish Balay 84a2ae208SSatish Balay #undef __FUNCT__ 94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 10690b6cddSBarry Smith PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 11a3192f15SSatish Balay { 12a3192f15SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 136849ba73SBarry Smith PetscErrorCode ierr; 14690b6cddSBarry Smith PetscInt row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival; 15690b6cddSBarry Smith PetscInt start,end,*ai,*aj,bs,*nidx2; 16f1af5d2fSBarry Smith PetscBT table; 17a3192f15SSatish Balay 183a40ed3dSBarry Smith PetscFunctionBegin; 19a3192f15SSatish Balay m = a->mbs; 20a3192f15SSatish Balay ai = a->i; 21a3192f15SSatish Balay aj = a->j; 22*d0f46423SBarry Smith bs = A->rmap->bs; 23a3192f15SSatish Balay 2429bbc08cSBarry Smith if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25a3192f15SSatish Balay 266831982aSBarry Smith ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27690b6cddSBarry Smith ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28*d0f46423SBarry Smith ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29a3192f15SSatish Balay 30a3192f15SSatish Balay for (i=0; i<is_max; i++) { 31a3192f15SSatish Balay /* Initialise the two local arrays */ 32a3192f15SSatish Balay isz = 0; 336831982aSBarry Smith ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34a3192f15SSatish Balay 35a3192f15SSatish Balay /* Extract the indices, assume there can be duplicate entries */ 36a3192f15SSatish Balay ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37b9b97703SBarry Smith ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38a3192f15SSatish Balay 39a3192f15SSatish Balay /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40a3192f15SSatish Balay for (j=0; j<n ; ++j){ 41218c64b6SSatish Balay ival = idx[j]/bs; /* convert the indices into block indices */ 425eee224dSHong Zhang if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43f1af5d2fSBarry Smith if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44a3192f15SSatish Balay } 45a3192f15SSatish Balay ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46a3192f15SSatish Balay ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47a3192f15SSatish Balay 48a3192f15SSatish Balay k = 0; 49a3192f15SSatish Balay for (j=0; j<ov; j++){ /* for each overlap*/ 50a3192f15SSatish Balay n = isz; 51a3192f15SSatish Balay for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52a3192f15SSatish Balay row = nidx[k]; 53a3192f15SSatish Balay start = ai[row]; 54a3192f15SSatish Balay end = ai[row+1]; 55a3192f15SSatish Balay for (l = start; l<end ; l++){ 56a3192f15SSatish Balay val = aj[l]; 57f1af5d2fSBarry Smith if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58a3192f15SSatish Balay } 59a3192f15SSatish Balay } 60a3192f15SSatish Balay } 61218c64b6SSatish Balay /* expand the Index Set */ 62218c64b6SSatish Balay for (j=0; j<isz; j++) { 63218c64b6SSatish Balay for (k=0; k<bs; k++) 64218c64b6SSatish Balay nidx2[j*bs+k] = nidx[j]*bs+k; 65218c64b6SSatish Balay } 66329f5518SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67a3192f15SSatish Balay } 686831982aSBarry Smith ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69606d414cSSatish Balay ierr = PetscFree(nidx);CHKERRQ(ierr); 70606d414cSSatish Balay ierr = PetscFree(nidx2);CHKERRQ(ierr); 713a40ed3dSBarry Smith PetscFunctionReturn(0); 72a3192f15SSatish Balay } 731c351548SSatish Balay 744a2ae208SSatish Balay #undef __FUNCT__ 754a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76690b6cddSBarry Smith PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 77736121d4SSatish Balay { 78736121d4SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 796849ba73SBarry Smith PetscErrorCode ierr; 80690b6cddSBarry Smith PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81690b6cddSBarry Smith PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82*d0f46423SBarry Smith PetscInt *irow,*icol,nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; 83690b6cddSBarry Smith PetscInt *aj = a->j,*ai = a->i; 843f1db9ecSBarry Smith MatScalar *mat_a; 85736121d4SSatish Balay Mat C; 860f5bd95cSBarry Smith PetscTruth flag; 87736121d4SSatish Balay 883a40ed3dSBarry Smith PetscFunctionBegin; 89888f2ed8SSatish Balay ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 9029bbc08cSBarry Smith if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 91736121d4SSatish Balay 92736121d4SSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 93218c64b6SSatish Balay ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 94b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 95b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 96736121d4SSatish Balay 97690b6cddSBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 98736121d4SSatish Balay ssmap = smap; 99690b6cddSBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 100690b6cddSBarry Smith ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 101736121d4SSatish Balay for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 102736121d4SSatish Balay /* determine lens of each row */ 103736121d4SSatish Balay for (i=0; i<nrows; i++) { 104736121d4SSatish Balay kstart = ai[irow[i]]; 105736121d4SSatish Balay kend = kstart + a->ilen[irow[i]]; 106736121d4SSatish Balay lens[i] = 0; 107736121d4SSatish Balay for (k=kstart; k<kend; k++) { 108736121d4SSatish Balay if (ssmap[aj[k]]) { 109736121d4SSatish Balay lens[i]++; 110736121d4SSatish Balay } 111736121d4SSatish Balay } 112736121d4SSatish Balay } 113736121d4SSatish Balay /* Create and fill new matrix */ 114736121d4SSatish Balay if (scall == MAT_REUSE_MATRIX) { 115736121d4SSatish Balay c = (Mat_SeqBAIJ *)((*B)->data); 116736121d4SSatish Balay 117*d0f46423SBarry Smith if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 118690b6cddSBarry Smith ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 119abc0a331SBarry Smith if (!flag) { 12029bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 121736121d4SSatish Balay } 122690b6cddSBarry Smith ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 123736121d4SSatish Balay C = *B; 1243a40ed3dSBarry Smith } else { 1257adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 126f69a0ea3SMatthew Knepley ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1277adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 128ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 129736121d4SSatish Balay } 130736121d4SSatish Balay c = (Mat_SeqBAIJ *)(C->data); 131736121d4SSatish Balay for (i=0; i<nrows; i++) { 132736121d4SSatish Balay row = irow[i]; 133736121d4SSatish Balay kstart = ai[row]; 134736121d4SSatish Balay kend = kstart + a->ilen[row]; 135736121d4SSatish Balay mat_i = c->i[i]; 136736121d4SSatish Balay mat_j = c->j + mat_i; 137218c64b6SSatish Balay mat_a = c->a + mat_i*bs2; 138736121d4SSatish Balay mat_ilen = c->ilen + i; 139736121d4SSatish Balay for (k=kstart; k<kend; k++) { 140736121d4SSatish Balay if ((tcol=ssmap[a->j[k]])) { 141736121d4SSatish Balay *mat_j++ = tcol - 1; 142549d3d68SSatish Balay ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 143549d3d68SSatish Balay mat_a += bs2; 144736121d4SSatish Balay (*mat_ilen)++; 145736121d4SSatish Balay } 146736121d4SSatish Balay } 147736121d4SSatish Balay } 148218c64b6SSatish Balay 149736121d4SSatish Balay /* Free work space */ 150736121d4SSatish Balay ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 151606d414cSSatish Balay ierr = PetscFree(smap);CHKERRQ(ierr); 152606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1536d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1546d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155736121d4SSatish Balay 156736121d4SSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 157736121d4SSatish Balay *B = C; 1583a40ed3dSBarry Smith PetscFunctionReturn(0); 159736121d4SSatish Balay } 160736121d4SSatish Balay 1614a2ae208SSatish Balay #undef __FUNCT__ 1624a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 163690b6cddSBarry Smith PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 164218c64b6SSatish Balay { 165218c64b6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 166218c64b6SSatish Balay IS is1,is2; 1676849ba73SBarry Smith PetscErrorCode ierr; 168*d0f46423SBarry Smith PetscInt *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->rmap->bs,count; 169218c64b6SSatish Balay 1703a40ed3dSBarry Smith PetscFunctionBegin; 171218c64b6SSatish Balay ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 172218c64b6SSatish Balay ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 173b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 174b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 175218c64b6SSatish Balay 176218c64b6SSatish Balay /* Verify if the indices corespond to each element in a block 177218c64b6SSatish Balay and form the IS with compressed IS */ 178690b6cddSBarry Smith ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 179218c64b6SSatish Balay iary = vary + a->mbs; 180690b6cddSBarry Smith ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 181218c64b6SSatish Balay for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 182218c64b6SSatish Balay count = 0; 183218c64b6SSatish Balay for (i=0; i<a->mbs; i++) { 184634064b4SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 185218c64b6SSatish Balay if (vary[i]==bs) iary[count++] = i; 186218c64b6SSatish Balay } 187029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 188218c64b6SSatish Balay 189690b6cddSBarry Smith ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 190218c64b6SSatish Balay for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 191218c64b6SSatish Balay count = 0; 192218c64b6SSatish Balay for (i=0; i<a->mbs; i++) { 193634064b4SBarry Smith if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 194218c64b6SSatish Balay if (vary[i]==bs) iary[count++] = i; 195218c64b6SSatish Balay } 196029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 197218c64b6SSatish Balay ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 198218c64b6SSatish Balay ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 199606d414cSSatish Balay ierr = PetscFree(vary);CHKERRQ(ierr); 200218c64b6SSatish Balay 2016a6a5d1dSBarry Smith ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr); 202218c64b6SSatish Balay ISDestroy(is1); 203218c64b6SSatish Balay ISDestroy(is2); 2043a40ed3dSBarry Smith PetscFunctionReturn(0); 205218c64b6SSatish Balay } 206218c64b6SSatish Balay 2074a2ae208SSatish Balay #undef __FUNCT__ 2084a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 209690b6cddSBarry Smith PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 210736121d4SSatish Balay { 2116849ba73SBarry Smith PetscErrorCode ierr; 212690b6cddSBarry Smith PetscInt i; 213736121d4SSatish Balay 2143a40ed3dSBarry Smith PetscFunctionBegin; 215736121d4SSatish Balay if (scall == MAT_INITIAL_MATRIX) { 21682502324SSatish Balay ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 217736121d4SSatish Balay } 218736121d4SSatish Balay 219736121d4SSatish Balay for (i=0; i<n; i++) { 2206a6a5d1dSBarry Smith ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 221736121d4SSatish Balay } 2223a40ed3dSBarry Smith PetscFunctionReturn(0); 223736121d4SSatish Balay } 224218c64b6SSatish Balay 225218c64b6SSatish Balay 2262d61bbb3SSatish Balay /* -------------------------------------------------------*/ 2272d61bbb3SSatish Balay /* Should check that shapes of vectors and matrices match */ 2282d61bbb3SSatish Balay /* -------------------------------------------------------*/ 229d9eff348SSatish Balay #include "petscblaslapack.h" 2302d61bbb3SSatish Balay 2314a2ae208SSatish Balay #undef __FUNCT__ 2324a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_1" 233dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 2342d61bbb3SSatish Balay { 2352d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 236d9fead3dSBarry Smith PetscScalar *z,sum; 237d9fead3dSBarry Smith const PetscScalar *x; 238d9fead3dSBarry Smith const MatScalar *v; 2396849ba73SBarry Smith PetscErrorCode ierr; 24098c9bda7SSatish Balay PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0; 24126e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 2422d61bbb3SSatish Balay 2432d61bbb3SSatish Balay PetscFunctionBegin; 244d9fead3dSBarry Smith ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2451ebc52fbSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 2462d61bbb3SSatish Balay 2472d61bbb3SSatish Balay idx = a->j; 2482d61bbb3SSatish Balay v = a->a; 24926e093fcSHong Zhang if (usecprow){ 25026e093fcSHong Zhang mbs = a->compressedrow.nrows; 25126e093fcSHong Zhang ii = a->compressedrow.i; 2527b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 25326e093fcSHong Zhang } else { 25426e093fcSHong Zhang mbs = a->mbs; 2552d61bbb3SSatish Balay ii = a->i; 25626e093fcSHong Zhang } 2572d61bbb3SSatish Balay 2582d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 2592d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 2602d61bbb3SSatish Balay sum = 0.0; 26198c9bda7SSatish Balay nonzerorow += (n>0); 2622d61bbb3SSatish Balay while (n--) sum += *v++ * x[*idx++]; 26326e093fcSHong Zhang if (usecprow){ 2647b2bb3b9SHong Zhang z[ridx[i]] = sum; 26526e093fcSHong Zhang } else { 2662d61bbb3SSatish Balay z[i] = sum; 2672d61bbb3SSatish Balay } 26826e093fcSHong Zhang } 269d9fead3dSBarry Smith ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2701ebc52fbSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 27198c9bda7SSatish Balay ierr = PetscLogFlops(2*a->nz - nonzerorow);CHKERRQ(ierr); 2722d61bbb3SSatish Balay PetscFunctionReturn(0); 2732d61bbb3SSatish Balay } 2742d61bbb3SSatish Balay 2754a2ae208SSatish Balay #undef __FUNCT__ 2764a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_2" 277dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 2782d61bbb3SSatish Balay { 2792d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 280d9fead3dSBarry Smith PetscScalar *z = 0,sum1,sum2,*zarray; 281d9fead3dSBarry Smith const PetscScalar *x,*xb; 28287828ca2SBarry Smith PetscScalar x1,x2; 283d9fead3dSBarry Smith const MatScalar *v; 284dfbe8321SBarry Smith PetscErrorCode ierr; 28598c9bda7SSatish Balay PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 28626e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 2872d61bbb3SSatish Balay 2882d61bbb3SSatish Balay PetscFunctionBegin; 289d9fead3dSBarry Smith ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 29026e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 2912d61bbb3SSatish Balay 2922d61bbb3SSatish Balay idx = a->j; 2932d61bbb3SSatish Balay v = a->a; 29426e093fcSHong Zhang if (usecprow){ 29526e093fcSHong Zhang mbs = a->compressedrow.nrows; 29626e093fcSHong Zhang ii = a->compressedrow.i; 2977b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 29826e093fcSHong Zhang } else { 29926e093fcSHong Zhang mbs = a->mbs; 3002d61bbb3SSatish Balay ii = a->i; 30126e093fcSHong Zhang z = zarray; 30226e093fcSHong Zhang } 3032d61bbb3SSatish Balay 3042d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 3052d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 3062d61bbb3SSatish Balay sum1 = 0.0; sum2 = 0.0; 30798c9bda7SSatish Balay nonzerorow += (n>0); 3082d61bbb3SSatish Balay for (j=0; j<n; j++) { 3092d61bbb3SSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 3102d61bbb3SSatish Balay sum1 += v[0]*x1 + v[2]*x2; 3112d61bbb3SSatish Balay sum2 += v[1]*x1 + v[3]*x2; 3122d61bbb3SSatish Balay v += 4; 3132d61bbb3SSatish Balay } 3147b2bb3b9SHong Zhang if (usecprow) z = zarray + 2*ridx[i]; 3152d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; 31626e093fcSHong Zhang if (!usecprow) z += 2; 3172d61bbb3SSatish Balay } 318d9fead3dSBarry Smith ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 31926e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 32098c9bda7SSatish Balay ierr = PetscLogFlops(8*a->nz - 2*nonzerorow);CHKERRQ(ierr); 3212d61bbb3SSatish Balay PetscFunctionReturn(0); 3222d61bbb3SSatish Balay } 3232d61bbb3SSatish Balay 3244a2ae208SSatish Balay #undef __FUNCT__ 3254a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_3" 326dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 3272d61bbb3SSatish Balay { 3282d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 329d9fead3dSBarry Smith PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; 330d9fead3dSBarry Smith const PetscScalar *x,*xb; 331d9fead3dSBarry Smith const MatScalar *v; 332dfbe8321SBarry Smith PetscErrorCode ierr; 333028cd4eaSSatish Balay PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 334028cd4eaSSatish Balay PetscTruth usecprow=a->compressedrow.use; 33526e093fcSHong Zhang 3362d61bbb3SSatish Balay 337b6410449SSatish Balay #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 338fee21e36SBarry Smith #pragma disjoint(*v,*z,*xb) 339fee21e36SBarry Smith #endif 340fee21e36SBarry Smith 3412d61bbb3SSatish Balay PetscFunctionBegin; 342d9fead3dSBarry Smith ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 34326e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 3442d61bbb3SSatish Balay 3452d61bbb3SSatish Balay idx = a->j; 3462d61bbb3SSatish Balay v = a->a; 34726e093fcSHong Zhang if (usecprow){ 34826e093fcSHong Zhang mbs = a->compressedrow.nrows; 34926e093fcSHong Zhang ii = a->compressedrow.i; 3507b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 35126e093fcSHong Zhang } else { 35226e093fcSHong Zhang mbs = a->mbs; 3532d61bbb3SSatish Balay ii = a->i; 35426e093fcSHong Zhang z = zarray; 35526e093fcSHong Zhang } 3562d61bbb3SSatish Balay 3572d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 3582d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 3592d61bbb3SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 36098c9bda7SSatish Balay nonzerorow += (n>0); 3612d61bbb3SSatish Balay for (j=0; j<n; j++) { 3622d61bbb3SSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 3632d61bbb3SSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 3642d61bbb3SSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 3652d61bbb3SSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 3662d61bbb3SSatish Balay v += 9; 3672d61bbb3SSatish Balay } 3687b2bb3b9SHong Zhang if (usecprow) z = zarray + 3*ridx[i]; 3692d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 37026e093fcSHong Zhang if (!usecprow) z += 3; 3712d61bbb3SSatish Balay } 372d9fead3dSBarry Smith ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 37326e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 37498c9bda7SSatish Balay ierr = PetscLogFlops(18*a->nz - 3*nonzerorow);CHKERRQ(ierr); 3752d61bbb3SSatish Balay PetscFunctionReturn(0); 3762d61bbb3SSatish Balay } 3772d61bbb3SSatish Balay 3784a2ae208SSatish Balay #undef __FUNCT__ 3794a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_4" 380dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 3812d61bbb3SSatish Balay { 3822d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 383d9fead3dSBarry Smith PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 384d9fead3dSBarry Smith const PetscScalar *x,*xb; 385d9fead3dSBarry Smith const MatScalar *v; 386dfbe8321SBarry Smith PetscErrorCode ierr; 38798c9bda7SSatish Balay PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 38826e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 3892d61bbb3SSatish Balay 3902d61bbb3SSatish Balay PetscFunctionBegin; 391d9fead3dSBarry Smith ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 39226e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 3932d61bbb3SSatish Balay 3942d61bbb3SSatish Balay idx = a->j; 3952d61bbb3SSatish Balay v = a->a; 39626e093fcSHong Zhang if (usecprow){ 39726e093fcSHong Zhang mbs = a->compressedrow.nrows; 39826e093fcSHong Zhang ii = a->compressedrow.i; 3997b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 40026e093fcSHong Zhang } else { 40126e093fcSHong Zhang mbs = a->mbs; 4022d61bbb3SSatish Balay ii = a->i; 40326e093fcSHong Zhang z = zarray; 40426e093fcSHong Zhang } 4052d61bbb3SSatish Balay 4062d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 4072d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 4082d61bbb3SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 40998c9bda7SSatish Balay nonzerorow += (n>0); 4102d61bbb3SSatish Balay for (j=0; j<n; j++) { 4112d61bbb3SSatish Balay xb = x + 4*(*idx++); 4122d61bbb3SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 4132d61bbb3SSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 4142d61bbb3SSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 4152d61bbb3SSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 4162d61bbb3SSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 4172d61bbb3SSatish Balay v += 16; 4182d61bbb3SSatish Balay } 4197b2bb3b9SHong Zhang if (usecprow) z = zarray + 4*ridx[i]; 4202d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 42126e093fcSHong Zhang if (!usecprow) z += 4; 4222d61bbb3SSatish Balay } 423d9fead3dSBarry Smith ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 42426e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 42598c9bda7SSatish Balay ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr); 4262d61bbb3SSatish Balay PetscFunctionReturn(0); 4272d61bbb3SSatish Balay } 4282d61bbb3SSatish Balay 4294a2ae208SSatish Balay #undef __FUNCT__ 4304a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_5" 431dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 4322d61bbb3SSatish Balay { 4332d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434d9fead3dSBarry Smith PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; 435d9fead3dSBarry Smith const PetscScalar *xb,*x; 436d9fead3dSBarry Smith const MatScalar *v; 437dfbe8321SBarry Smith PetscErrorCode ierr; 43898c9bda7SSatish Balay PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 43926e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 4402d61bbb3SSatish Balay 441433994e6SBarry Smith PetscFunctionBegin; 442d9fead3dSBarry Smith ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 44326e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 4442d61bbb3SSatish Balay 4452d61bbb3SSatish Balay idx = a->j; 4462d61bbb3SSatish Balay v = a->a; 44726e093fcSHong Zhang if (usecprow){ 44826e093fcSHong Zhang mbs = a->compressedrow.nrows; 44926e093fcSHong Zhang ii = a->compressedrow.i; 4507b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 45126e093fcSHong Zhang } else { 45226e093fcSHong Zhang mbs = a->mbs; 4532d61bbb3SSatish Balay ii = a->i; 45426e093fcSHong Zhang z = zarray; 45526e093fcSHong Zhang } 4562d61bbb3SSatish Balay 4572d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 4582d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 4592d61bbb3SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 46098c9bda7SSatish Balay nonzerorow += (n>0); 4612d61bbb3SSatish Balay for (j=0; j<n; j++) { 4622d61bbb3SSatish Balay xb = x + 5*(*idx++); 4632d61bbb3SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 4642d61bbb3SSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 4652d61bbb3SSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 4662d61bbb3SSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 4672d61bbb3SSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 4682d61bbb3SSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 4692d61bbb3SSatish Balay v += 25; 4702d61bbb3SSatish Balay } 4717b2bb3b9SHong Zhang if (usecprow) z = zarray + 5*ridx[i]; 4722d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 47326e093fcSHong Zhang if (!usecprow) z += 5; 4742d61bbb3SSatish Balay } 475d9fead3dSBarry Smith ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 47626e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 47798c9bda7SSatish Balay ierr = PetscLogFlops(50*a->nz - 5*nonzerorow);CHKERRQ(ierr); 4782d61bbb3SSatish Balay PetscFunctionReturn(0); 4792d61bbb3SSatish Balay } 4802d61bbb3SSatish Balay 48115091d37SBarry Smith 4824a2ae208SSatish Balay #undef __FUNCT__ 4834a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_6" 484dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 48515091d37SBarry Smith { 48615091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 487d9fead3dSBarry Smith PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; 488d9fead3dSBarry Smith const PetscScalar *x,*xb; 48926e093fcSHong Zhang PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 490d9fead3dSBarry Smith const MatScalar *v; 491dfbe8321SBarry Smith PetscErrorCode ierr; 49298c9bda7SSatish Balay PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 49326e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 49415091d37SBarry Smith 495433994e6SBarry Smith PetscFunctionBegin; 496d9fead3dSBarry Smith ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 49726e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 49815091d37SBarry Smith 49915091d37SBarry Smith idx = a->j; 50015091d37SBarry Smith v = a->a; 50126e093fcSHong Zhang if (usecprow){ 50226e093fcSHong Zhang mbs = a->compressedrow.nrows; 50326e093fcSHong Zhang ii = a->compressedrow.i; 5047b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 50526e093fcSHong Zhang } else { 50626e093fcSHong Zhang mbs = a->mbs; 50715091d37SBarry Smith ii = a->i; 50826e093fcSHong Zhang z = zarray; 50926e093fcSHong Zhang } 51015091d37SBarry Smith 51115091d37SBarry Smith for (i=0; i<mbs; i++) { 51215091d37SBarry Smith n = ii[1] - ii[0]; ii++; 51315091d37SBarry Smith sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 51498c9bda7SSatish Balay nonzerorow += (n>0); 51515091d37SBarry Smith for (j=0; j<n; j++) { 51615091d37SBarry Smith xb = x + 6*(*idx++); 51715091d37SBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 51815091d37SBarry Smith sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 51915091d37SBarry Smith sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 52015091d37SBarry Smith sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 52115091d37SBarry Smith sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 52215091d37SBarry Smith sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 52315091d37SBarry Smith sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 52415091d37SBarry Smith v += 36; 52515091d37SBarry Smith } 5267b2bb3b9SHong Zhang if (usecprow) z = zarray + 6*ridx[i]; 52715091d37SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 52826e093fcSHong Zhang if (!usecprow) z += 6; 52915091d37SBarry Smith } 53015091d37SBarry Smith 531d9fead3dSBarry Smith ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 53226e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 53398c9bda7SSatish Balay ierr = PetscLogFlops(72*a->nz - 6*nonzerorow);CHKERRQ(ierr); 53415091d37SBarry Smith PetscFunctionReturn(0); 53515091d37SBarry Smith } 5364a2ae208SSatish Balay #undef __FUNCT__ 5374a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_7" 538dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 5392d61bbb3SSatish Balay { 5402d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 541d9fead3dSBarry Smith PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 542d9fead3dSBarry Smith const PetscScalar *x,*xb; 54326e093fcSHong Zhang PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 544d9fead3dSBarry Smith const MatScalar *v; 545dfbe8321SBarry Smith PetscErrorCode ierr; 54698c9bda7SSatish Balay PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 54726e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 5482d61bbb3SSatish Balay 549433994e6SBarry Smith PetscFunctionBegin; 550d9fead3dSBarry Smith ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 55126e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 5522d61bbb3SSatish Balay 5532d61bbb3SSatish Balay idx = a->j; 5542d61bbb3SSatish Balay v = a->a; 55526e093fcSHong Zhang if (usecprow){ 55626e093fcSHong Zhang mbs = a->compressedrow.nrows; 55726e093fcSHong Zhang ii = a->compressedrow.i; 5587b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 55926e093fcSHong Zhang } else { 56026e093fcSHong Zhang mbs = a->mbs; 5612d61bbb3SSatish Balay ii = a->i; 56226e093fcSHong Zhang z = zarray; 56326e093fcSHong Zhang } 5642d61bbb3SSatish Balay 5652d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 5662d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 5672d61bbb3SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 56898c9bda7SSatish Balay nonzerorow += (n>0); 5692d61bbb3SSatish Balay for (j=0; j<n; j++) { 5702d61bbb3SSatish Balay xb = x + 7*(*idx++); 5712d61bbb3SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 5722d61bbb3SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 5732d61bbb3SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 5742d61bbb3SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 5752d61bbb3SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 5762d61bbb3SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 5772d61bbb3SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 5782d61bbb3SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 5792d61bbb3SSatish Balay v += 49; 5802d61bbb3SSatish Balay } 5817b2bb3b9SHong Zhang if (usecprow) z = zarray + 7*ridx[i]; 5822d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 58326e093fcSHong Zhang if (!usecprow) z += 7; 5842d61bbb3SSatish Balay } 5852d61bbb3SSatish Balay 586d9fead3dSBarry Smith ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 58726e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 58898c9bda7SSatish Balay ierr = PetscLogFlops(98*a->nz - 7*nonzerorow);CHKERRQ(ierr); 5892d61bbb3SSatish Balay PetscFunctionReturn(0); 5902d61bbb3SSatish Balay } 5912d61bbb3SSatish Balay 5923f1db9ecSBarry Smith /* 5933f1db9ecSBarry Smith This will not work with MatScalar == float because it calls the BLAS 5943f1db9ecSBarry Smith */ 5954a2ae208SSatish Balay #undef __FUNCT__ 5964a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_N" 597dfbe8321SBarry Smith PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 5982d61bbb3SSatish Balay { 5992d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 600dcf5cc72SBarry Smith PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 6013f1db9ecSBarry Smith MatScalar *v; 602dfbe8321SBarry Smith PetscErrorCode ierr; 603*d0f46423SBarry Smith PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 60498c9bda7SSatish Balay PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0; 60526e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 6062d61bbb3SSatish Balay 6072d61bbb3SSatish Balay PetscFunctionBegin; 6081ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 60926e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 6102d61bbb3SSatish Balay 6112d61bbb3SSatish Balay idx = a->j; 6122d61bbb3SSatish Balay v = a->a; 61326e093fcSHong Zhang if (usecprow){ 61426e093fcSHong Zhang mbs = a->compressedrow.nrows; 61526e093fcSHong Zhang ii = a->compressedrow.i; 6167b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 61726e093fcSHong Zhang } else { 61826e093fcSHong Zhang mbs = a->mbs; 6192d61bbb3SSatish Balay ii = a->i; 62026e093fcSHong Zhang z = zarray; 62126e093fcSHong Zhang } 622218c64b6SSatish Balay 6232d61bbb3SSatish Balay if (!a->mult_work) { 624*d0f46423SBarry Smith k = PetscMax(A->rmap->n,A->cmap->n); 62587828ca2SBarry Smith ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 6262d61bbb3SSatish Balay } 6272d61bbb3SSatish Balay work = a->mult_work; 6282d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 6292d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 6302d61bbb3SSatish Balay ncols = n*bs; 6312d61bbb3SSatish Balay workt = work; 63298c9bda7SSatish Balay nonzerorow += (n>0); 6332d61bbb3SSatish Balay for (j=0; j<n; j++) { 6342d61bbb3SSatish Balay xb = x + bs*(*idx++); 6352d61bbb3SSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 6362d61bbb3SSatish Balay workt += bs; 6372d61bbb3SSatish Balay } 6387b2bb3b9SHong Zhang if (usecprow) z = zarray + bs*ridx[i]; 639737d121aSSatish Balay Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 64071044d3cSBarry Smith /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 6412d61bbb3SSatish Balay v += n*bs2; 64226e093fcSHong Zhang if (!usecprow) z += bs; 6432d61bbb3SSatish Balay } 6441ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 64526e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 64698c9bda7SSatish Balay ierr = PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr); 6472d61bbb3SSatish Balay PetscFunctionReturn(0); 6482d61bbb3SSatish Balay } 6492d61bbb3SSatish Balay 6506a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec); 6514a2ae208SSatish Balay #undef __FUNCT__ 6524a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 653dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 6542d61bbb3SSatish Balay { 6552d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 656dcf5cc72SBarry Smith PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 6573f1db9ecSBarry Smith MatScalar *v; 658dfbe8321SBarry Smith PetscErrorCode ierr; 6594eb6d288SHong Zhang PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 66026e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 6612d61bbb3SSatish Balay 6622d61bbb3SSatish Balay PetscFunctionBegin; 6631ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 66426e093fcSHong Zhang ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 6652e8a6d31SBarry Smith if (zz != yy) { 66626e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 6672e8a6d31SBarry Smith } else { 66826e093fcSHong Zhang zarray = yarray; 6692e8a6d31SBarry Smith } 6702d61bbb3SSatish Balay 6712d61bbb3SSatish Balay idx = a->j; 6722d61bbb3SSatish Balay v = a->a; 67326e093fcSHong Zhang if (usecprow){ 6744eb6d288SHong Zhang if (zz != yy){ 6754eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 6764eb6d288SHong Zhang } 67726e093fcSHong Zhang mbs = a->compressedrow.nrows; 67826e093fcSHong Zhang ii = a->compressedrow.i; 6797b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 68026e093fcSHong Zhang } else { 6812d61bbb3SSatish Balay ii = a->i; 68226e093fcSHong Zhang y = yarray; 68326e093fcSHong Zhang z = zarray; 68426e093fcSHong Zhang } 6852d61bbb3SSatish Balay 6862d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 6872d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 68826e093fcSHong Zhang if (usecprow){ 6897b2bb3b9SHong Zhang z = zarray + ridx[i]; 6907b2bb3b9SHong Zhang y = yarray + ridx[i]; 69126e093fcSHong Zhang } 69226e093fcSHong Zhang sum = y[0]; 6932d61bbb3SSatish Balay while (n--) sum += *v++ * x[*idx++]; 69426e093fcSHong Zhang z[0] = sum; 69526e093fcSHong Zhang if (!usecprow){ 69626e093fcSHong Zhang z++; y++; 69726e093fcSHong Zhang } 6982d61bbb3SSatish Balay } 6991ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 70026e093fcSHong Zhang ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 7012e8a6d31SBarry Smith if (zz != yy) { 70226e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 7032e8a6d31SBarry Smith } 704efee365bSSatish Balay ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 7052d61bbb3SSatish Balay PetscFunctionReturn(0); 7062d61bbb3SSatish Balay } 7072d61bbb3SSatish Balay 7084a2ae208SSatish Balay #undef __FUNCT__ 7094a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 710dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 7112d61bbb3SSatish Balay { 7122d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 713dcf5cc72SBarry Smith PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 71426e093fcSHong Zhang PetscScalar x1,x2,*yarray,*zarray; 7153f1db9ecSBarry Smith MatScalar *v; 716dfbe8321SBarry Smith PetscErrorCode ierr; 7174eb6d288SHong Zhang PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 71826e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 7192d61bbb3SSatish Balay 7202d61bbb3SSatish Balay PetscFunctionBegin; 7211ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 72226e093fcSHong Zhang ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 7232e8a6d31SBarry Smith if (zz != yy) { 72426e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 7252e8a6d31SBarry Smith } else { 72626e093fcSHong Zhang zarray = yarray; 7272e8a6d31SBarry Smith } 7282d61bbb3SSatish Balay 7292d61bbb3SSatish Balay idx = a->j; 7302d61bbb3SSatish Balay v = a->a; 73126e093fcSHong Zhang if (usecprow){ 7324eb6d288SHong Zhang if (zz != yy){ 7334eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 7344eb6d288SHong Zhang } 73526e093fcSHong Zhang mbs = a->compressedrow.nrows; 73626e093fcSHong Zhang ii = a->compressedrow.i; 7377b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 7384eb6d288SHong Zhang if (zz != yy){ 7394eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 7404eb6d288SHong Zhang } 74126e093fcSHong Zhang } else { 7422d61bbb3SSatish Balay ii = a->i; 74326e093fcSHong Zhang y = yarray; 74426e093fcSHong Zhang z = zarray; 74526e093fcSHong Zhang } 7462d61bbb3SSatish Balay 7472d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 7482d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 74926e093fcSHong Zhang if (usecprow){ 7507b2bb3b9SHong Zhang z = zarray + 2*ridx[i]; 7517b2bb3b9SHong Zhang y = yarray + 2*ridx[i]; 75226e093fcSHong Zhang } 7532d61bbb3SSatish Balay sum1 = y[0]; sum2 = y[1]; 7542d61bbb3SSatish Balay for (j=0; j<n; j++) { 7552d61bbb3SSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 7562d61bbb3SSatish Balay sum1 += v[0]*x1 + v[2]*x2; 7572d61bbb3SSatish Balay sum2 += v[1]*x1 + v[3]*x2; 7582d61bbb3SSatish Balay v += 4; 7592d61bbb3SSatish Balay } 7602d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; 76126e093fcSHong Zhang if (!usecprow){ 7622d61bbb3SSatish Balay z += 2; y += 2; 7632d61bbb3SSatish Balay } 76426e093fcSHong Zhang } 7651ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 76626e093fcSHong Zhang ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 7672e8a6d31SBarry Smith if (zz != yy) { 76826e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 7692e8a6d31SBarry Smith } 770efee365bSSatish Balay ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 7712d61bbb3SSatish Balay PetscFunctionReturn(0); 7722d61bbb3SSatish Balay } 7732d61bbb3SSatish Balay 7744a2ae208SSatish Balay #undef __FUNCT__ 7754a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 776dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 7772d61bbb3SSatish Balay { 7782d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 779dcf5cc72SBarry Smith PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 7803f1db9ecSBarry Smith MatScalar *v; 781dfbe8321SBarry Smith PetscErrorCode ierr; 7824eb6d288SHong Zhang PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 78326e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 7842d61bbb3SSatish Balay 7852d61bbb3SSatish Balay PetscFunctionBegin; 7861ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 78726e093fcSHong Zhang ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 7882e8a6d31SBarry Smith if (zz != yy) { 78926e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 7902e8a6d31SBarry Smith } else { 79126e093fcSHong Zhang zarray = yarray; 7922e8a6d31SBarry Smith } 7932d61bbb3SSatish Balay 7942d61bbb3SSatish Balay idx = a->j; 7952d61bbb3SSatish Balay v = a->a; 79626e093fcSHong Zhang if (usecprow){ 7974eb6d288SHong Zhang if (zz != yy){ 7984eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 7994eb6d288SHong Zhang } 80026e093fcSHong Zhang mbs = a->compressedrow.nrows; 80126e093fcSHong Zhang ii = a->compressedrow.i; 8027b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 80326e093fcSHong Zhang } else { 8042d61bbb3SSatish Balay ii = a->i; 80526e093fcSHong Zhang y = yarray; 80626e093fcSHong Zhang z = zarray; 80726e093fcSHong Zhang } 8082d61bbb3SSatish Balay 8092d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 8102d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 81126e093fcSHong Zhang if (usecprow){ 8127b2bb3b9SHong Zhang z = zarray + 3*ridx[i]; 8137b2bb3b9SHong Zhang y = yarray + 3*ridx[i]; 81426e093fcSHong Zhang } 8152d61bbb3SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 8162d61bbb3SSatish Balay for (j=0; j<n; j++) { 8172d61bbb3SSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 8182d61bbb3SSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 8192d61bbb3SSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 8202d61bbb3SSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 8212d61bbb3SSatish Balay v += 9; 8222d61bbb3SSatish Balay } 8232d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 82426e093fcSHong Zhang if (!usecprow){ 8252d61bbb3SSatish Balay z += 3; y += 3; 8262d61bbb3SSatish Balay } 82726e093fcSHong Zhang } 8281ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 82926e093fcSHong Zhang ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 8302e8a6d31SBarry Smith if (zz != yy) { 83126e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 8322e8a6d31SBarry Smith } 833efee365bSSatish Balay ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 8342d61bbb3SSatish Balay PetscFunctionReturn(0); 8352d61bbb3SSatish Balay } 8362d61bbb3SSatish Balay 8374a2ae208SSatish Balay #undef __FUNCT__ 8384a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 839dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 8402d61bbb3SSatish Balay { 8412d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 842dcf5cc72SBarry Smith PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 8433f1db9ecSBarry Smith MatScalar *v; 844dfbe8321SBarry Smith PetscErrorCode ierr; 8454eb6d288SHong Zhang PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 84626e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 8472d61bbb3SSatish Balay 8482d61bbb3SSatish Balay PetscFunctionBegin; 8491ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 85026e093fcSHong Zhang ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 8512e8a6d31SBarry Smith if (zz != yy) { 85226e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 8532e8a6d31SBarry Smith } else { 85426e093fcSHong Zhang zarray = yarray; 8552e8a6d31SBarry Smith } 8562d61bbb3SSatish Balay 8572d61bbb3SSatish Balay idx = a->j; 8582d61bbb3SSatish Balay v = a->a; 85926e093fcSHong Zhang if (usecprow){ 8604eb6d288SHong Zhang if (zz != yy){ 8614eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 8624eb6d288SHong Zhang } 86326e093fcSHong Zhang mbs = a->compressedrow.nrows; 86426e093fcSHong Zhang ii = a->compressedrow.i; 8657b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 86626e093fcSHong Zhang } else { 8672d61bbb3SSatish Balay ii = a->i; 86826e093fcSHong Zhang y = yarray; 86926e093fcSHong Zhang z = zarray; 87026e093fcSHong Zhang } 8712d61bbb3SSatish Balay 8722d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 8732d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 87426e093fcSHong Zhang if (usecprow){ 8757b2bb3b9SHong Zhang z = zarray + 4*ridx[i]; 8767b2bb3b9SHong Zhang y = yarray + 4*ridx[i]; 87726e093fcSHong Zhang } 8782d61bbb3SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 8792d61bbb3SSatish Balay for (j=0; j<n; j++) { 8802d61bbb3SSatish Balay xb = x + 4*(*idx++); 8812d61bbb3SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 8822d61bbb3SSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 8832d61bbb3SSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 8842d61bbb3SSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 8852d61bbb3SSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 8862d61bbb3SSatish Balay v += 16; 8872d61bbb3SSatish Balay } 8882d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 88926e093fcSHong Zhang if (!usecprow){ 8902d61bbb3SSatish Balay z += 4; y += 4; 8912d61bbb3SSatish Balay } 89226e093fcSHong Zhang } 8931ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 89426e093fcSHong Zhang ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 8952e8a6d31SBarry Smith if (zz != yy) { 89626e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 8972e8a6d31SBarry Smith } 898efee365bSSatish Balay ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 8992d61bbb3SSatish Balay PetscFunctionReturn(0); 9002d61bbb3SSatish Balay } 9012d61bbb3SSatish Balay 9024a2ae208SSatish Balay #undef __FUNCT__ 9034a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 904dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 9052d61bbb3SSatish Balay { 9062d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907dcf5cc72SBarry Smith PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 90826e093fcSHong Zhang PetscScalar *yarray,*zarray; 9093f1db9ecSBarry Smith MatScalar *v; 910dfbe8321SBarry Smith PetscErrorCode ierr; 9114eb6d288SHong Zhang PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 91226e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 9132d61bbb3SSatish Balay 9142d61bbb3SSatish Balay PetscFunctionBegin; 9151ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 91626e093fcSHong Zhang ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 9172e8a6d31SBarry Smith if (zz != yy) { 91826e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 9192e8a6d31SBarry Smith } else { 92026e093fcSHong Zhang zarray = yarray; 9212e8a6d31SBarry Smith } 9222d61bbb3SSatish Balay 9232d61bbb3SSatish Balay idx = a->j; 9242d61bbb3SSatish Balay v = a->a; 92526e093fcSHong Zhang if (usecprow){ 9264eb6d288SHong Zhang if (zz != yy){ 9274eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 9284eb6d288SHong Zhang } 92926e093fcSHong Zhang mbs = a->compressedrow.nrows; 93026e093fcSHong Zhang ii = a->compressedrow.i; 9317b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 93226e093fcSHong Zhang } else { 9332d61bbb3SSatish Balay ii = a->i; 93426e093fcSHong Zhang y = yarray; 93526e093fcSHong Zhang z = zarray; 93626e093fcSHong Zhang } 9372d61bbb3SSatish Balay 9382d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 9392d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 94026e093fcSHong Zhang if (usecprow){ 9417b2bb3b9SHong Zhang z = zarray + 5*ridx[i]; 9427b2bb3b9SHong Zhang y = yarray + 5*ridx[i]; 94326e093fcSHong Zhang } 9442d61bbb3SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 9452d61bbb3SSatish Balay for (j=0; j<n; j++) { 9462d61bbb3SSatish Balay xb = x + 5*(*idx++); 9472d61bbb3SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 9482d61bbb3SSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 9492d61bbb3SSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 9502d61bbb3SSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 9512d61bbb3SSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 9522d61bbb3SSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 9532d61bbb3SSatish Balay v += 25; 9542d61bbb3SSatish Balay } 9552d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 95626e093fcSHong Zhang if (!usecprow){ 9572d61bbb3SSatish Balay z += 5; y += 5; 9582d61bbb3SSatish Balay } 95926e093fcSHong Zhang } 9601ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 96126e093fcSHong Zhang ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 9622e8a6d31SBarry Smith if (zz != yy) { 96326e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 9642e8a6d31SBarry Smith } 965efee365bSSatish Balay ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr); 9662d61bbb3SSatish Balay PetscFunctionReturn(0); 9672d61bbb3SSatish Balay } 9684a2ae208SSatish Balay #undef __FUNCT__ 9694a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 970dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 97115091d37SBarry Smith { 97215091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973dcf5cc72SBarry Smith PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 97426e093fcSHong Zhang PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 97515091d37SBarry Smith MatScalar *v; 976dfbe8321SBarry Smith PetscErrorCode ierr; 9774eb6d288SHong Zhang PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 97826e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 97915091d37SBarry Smith 98015091d37SBarry Smith PetscFunctionBegin; 9811ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 98226e093fcSHong Zhang ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 98315091d37SBarry Smith if (zz != yy) { 98426e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 98515091d37SBarry Smith } else { 98626e093fcSHong Zhang zarray = yarray; 98715091d37SBarry Smith } 98815091d37SBarry Smith 98915091d37SBarry Smith idx = a->j; 99015091d37SBarry Smith v = a->a; 99126e093fcSHong Zhang if (usecprow){ 9924eb6d288SHong Zhang if (zz != yy){ 9934eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 9944eb6d288SHong Zhang } 99526e093fcSHong Zhang mbs = a->compressedrow.nrows; 99626e093fcSHong Zhang ii = a->compressedrow.i; 9977b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 99826e093fcSHong Zhang } else { 99915091d37SBarry Smith ii = a->i; 100026e093fcSHong Zhang y = yarray; 100126e093fcSHong Zhang z = zarray; 100226e093fcSHong Zhang } 100315091d37SBarry Smith 100415091d37SBarry Smith for (i=0; i<mbs; i++) { 100515091d37SBarry Smith n = ii[1] - ii[0]; ii++; 100626e093fcSHong Zhang if (usecprow){ 10077b2bb3b9SHong Zhang z = zarray + 6*ridx[i]; 10087b2bb3b9SHong Zhang y = yarray + 6*ridx[i]; 100926e093fcSHong Zhang } 101015091d37SBarry Smith sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 101115091d37SBarry Smith for (j=0; j<n; j++) { 10123b95cb0eSSatish Balay xb = x + 6*(*idx++); 101315091d37SBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 101415091d37SBarry Smith sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 101515091d37SBarry Smith sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 101615091d37SBarry Smith sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 101715091d37SBarry Smith sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 101815091d37SBarry Smith sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 101915091d37SBarry Smith sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 102015091d37SBarry Smith v += 36; 102115091d37SBarry Smith } 102215091d37SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 102326e093fcSHong Zhang if (!usecprow){ 102415091d37SBarry Smith z += 6; y += 6; 102515091d37SBarry Smith } 102626e093fcSHong Zhang } 10271ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 102826e093fcSHong Zhang ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 102915091d37SBarry Smith if (zz != yy) { 103026e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 103115091d37SBarry Smith } 1032efee365bSSatish Balay ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr); 103315091d37SBarry Smith PetscFunctionReturn(0); 103415091d37SBarry Smith } 10352d61bbb3SSatish Balay 10364a2ae208SSatish Balay #undef __FUNCT__ 10374a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1038dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 10392d61bbb3SSatish Balay { 10402d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1041dcf5cc72SBarry Smith PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 104226e093fcSHong Zhang PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 10433f1db9ecSBarry Smith MatScalar *v; 1044dfbe8321SBarry Smith PetscErrorCode ierr; 10457b2bb3b9SHong Zhang PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 104626e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 10472d61bbb3SSatish Balay 10482d61bbb3SSatish Balay PetscFunctionBegin; 10491ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 105026e093fcSHong Zhang ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 10512e8a6d31SBarry Smith if (zz != yy) { 105226e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 10532e8a6d31SBarry Smith } else { 105426e093fcSHong Zhang zarray = yarray; 10552e8a6d31SBarry Smith } 10562d61bbb3SSatish Balay 10572d61bbb3SSatish Balay idx = a->j; 10582d61bbb3SSatish Balay v = a->a; 105926e093fcSHong Zhang if (usecprow){ 10604eb6d288SHong Zhang if (zz != yy){ 10614eb6d288SHong Zhang ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 10624eb6d288SHong Zhang } 106326e093fcSHong Zhang mbs = a->compressedrow.nrows; 106426e093fcSHong Zhang ii = a->compressedrow.i; 10657b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 106626e093fcSHong Zhang } else { 10672d61bbb3SSatish Balay ii = a->i; 106826e093fcSHong Zhang y = yarray; 106926e093fcSHong Zhang z = zarray; 107026e093fcSHong Zhang } 10712d61bbb3SSatish Balay 10722d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 10732d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 107426e093fcSHong Zhang if (usecprow){ 10757b2bb3b9SHong Zhang z = zarray + 7*ridx[i]; 10767b2bb3b9SHong Zhang y = yarray + 7*ridx[i]; 107726e093fcSHong Zhang } 10782d61bbb3SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 10792d61bbb3SSatish Balay for (j=0; j<n; j++) { 10802d61bbb3SSatish Balay xb = x + 7*(*idx++); 10812d61bbb3SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 10822d61bbb3SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 10832d61bbb3SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 10842d61bbb3SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 10852d61bbb3SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 10862d61bbb3SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 10872d61bbb3SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 10882d61bbb3SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 10892d61bbb3SSatish Balay v += 49; 10902d61bbb3SSatish Balay } 10912d61bbb3SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 109226e093fcSHong Zhang if (!usecprow){ 10932d61bbb3SSatish Balay z += 7; y += 7; 10942d61bbb3SSatish Balay } 109526e093fcSHong Zhang } 10961ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 109726e093fcSHong Zhang ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 10982e8a6d31SBarry Smith if (zz != yy) { 109926e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 11002e8a6d31SBarry Smith } 1101efee365bSSatish Balay ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr); 11022d61bbb3SSatish Balay PetscFunctionReturn(0); 11032d61bbb3SSatish Balay } 1104218c64b6SSatish Balay 11054a2ae208SSatish Balay #undef __FUNCT__ 11064a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1107dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 11082d61bbb3SSatish Balay { 11092d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1110bba805e6SBarry Smith PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 11113f1db9ecSBarry Smith MatScalar *v; 11126849ba73SBarry Smith PetscErrorCode ierr; 1113*d0f46423SBarry Smith PetscInt mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 11147b2bb3b9SHong Zhang PetscInt ncols,k,*ridx=PETSC_NULL; 111526e093fcSHong Zhang PetscTruth usecprow=a->compressedrow.use; 1116218c64b6SSatish Balay 11172d61bbb3SSatish Balay PetscFunctionBegin; 11186a65c766SHong Zhang ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 11191ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 112026e093fcSHong Zhang ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 11212d61bbb3SSatish Balay 11222d61bbb3SSatish Balay idx = a->j; 11232d61bbb3SSatish Balay v = a->a; 112426e093fcSHong Zhang if (usecprow){ 112526e093fcSHong Zhang mbs = a->compressedrow.nrows; 112626e093fcSHong Zhang ii = a->compressedrow.i; 11277b2bb3b9SHong Zhang ridx = a->compressedrow.rindex; 112826e093fcSHong Zhang } else { 112926e093fcSHong Zhang mbs = a->mbs; 11302d61bbb3SSatish Balay ii = a->i; 113126e093fcSHong Zhang z = zarray; 113226e093fcSHong Zhang } 11332d61bbb3SSatish Balay 11342d61bbb3SSatish Balay if (!a->mult_work) { 1135*d0f46423SBarry Smith k = PetscMax(A->rmap->n,A->cmap->n); 113687828ca2SBarry Smith ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 11372d61bbb3SSatish Balay } 11382d61bbb3SSatish Balay work = a->mult_work; 11392d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 11402d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 11412d61bbb3SSatish Balay ncols = n*bs; 11422d61bbb3SSatish Balay workt = work; 11432d61bbb3SSatish Balay for (j=0; j<n; j++) { 11442d61bbb3SSatish Balay xb = x + bs*(*idx++); 11452d61bbb3SSatish Balay for (k=0; k<bs; k++) workt[k] = xb[k]; 11462d61bbb3SSatish Balay workt += bs; 11472d61bbb3SSatish Balay } 11487b2bb3b9SHong Zhang if (usecprow) z = zarray + bs*ridx[i]; 11493f1db9ecSBarry Smith Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 115071044d3cSBarry Smith /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 11512d61bbb3SSatish Balay v += n*bs2; 115226e093fcSHong Zhang if (!usecprow){ 11532d61bbb3SSatish Balay z += bs; 11542d61bbb3SSatish Balay } 115526e093fcSHong Zhang } 11561ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 115726e093fcSHong Zhang ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1158efee365bSSatish Balay ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr); 11592d61bbb3SSatish Balay PetscFunctionReturn(0); 11602d61bbb3SSatish Balay } 11612d61bbb3SSatish Balay 11624a2ae208SSatish Balay #undef __FUNCT__ 11634a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1164dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 11652d61bbb3SSatish Balay { 11663447b6efSHong Zhang PetscScalar zero = 0.0; 11676849ba73SBarry Smith PetscErrorCode ierr; 11682d61bbb3SSatish Balay 11692d61bbb3SSatish Balay PetscFunctionBegin; 11702dcb1b2aSMatthew Knepley ierr = VecSet(zz,zero);CHKERRQ(ierr); 11713447b6efSHong Zhang ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 11722d61bbb3SSatish Balay PetscFunctionReturn(0); 11732d61bbb3SSatish Balay } 11742d61bbb3SSatish Balay 11754a2ae208SSatish Balay #undef __FUNCT__ 11764a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1177dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 11782d61bbb3SSatish Balay 11792d61bbb3SSatish Balay { 11802d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1181dcf5cc72SBarry Smith PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 11823f1db9ecSBarry Smith MatScalar *v; 11836849ba73SBarry Smith PetscErrorCode ierr; 1184*d0f46423SBarry Smith PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 11853447b6efSHong Zhang Mat_CompressedRow cprow = a->compressedrow; 11863447b6efSHong Zhang PetscTruth usecprow=cprow.use; 11872d61bbb3SSatish Balay 11882d61bbb3SSatish Balay PetscFunctionBegin; 11896a65c766SHong Zhang if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 11903447b6efSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 11913447b6efSHong Zhang ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 11922d61bbb3SSatish Balay 11932d61bbb3SSatish Balay idx = a->j; 11942d61bbb3SSatish Balay v = a->a; 11953447b6efSHong Zhang if (usecprow){ 11963447b6efSHong Zhang mbs = cprow.nrows; 11973447b6efSHong Zhang ii = cprow.i; 11987b2bb3b9SHong Zhang ridx = cprow.rindex; 11993447b6efSHong Zhang } else { 12003447b6efSHong Zhang mbs=a->mbs; 12012d61bbb3SSatish Balay ii = a->i; 1202f1af5d2fSBarry Smith xb = x; 12033447b6efSHong Zhang } 12042d61bbb3SSatish Balay 12052d61bbb3SSatish Balay switch (bs) { 12062d61bbb3SSatish Balay case 1: 12072d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 12087b2bb3b9SHong Zhang if (usecprow) xb = x + ridx[i]; 1209f1af5d2fSBarry Smith x1 = xb[0]; 12103447b6efSHong Zhang ib = idx + ii[0]; 12113447b6efSHong Zhang n = ii[1] - ii[0]; ii++; 12122d61bbb3SSatish Balay for (j=0; j<n; j++) { 12132d61bbb3SSatish Balay rval = ib[j]; 1214f1af5d2fSBarry Smith z[rval] += *v * x1; 1215f1af5d2fSBarry Smith v++; 12162d61bbb3SSatish Balay } 12173447b6efSHong Zhang if (!usecprow) xb++; 12182d61bbb3SSatish Balay } 12192d61bbb3SSatish Balay break; 12202d61bbb3SSatish Balay case 2: 12212d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 12227b2bb3b9SHong Zhang if (usecprow) xb = x + 2*ridx[i]; 1223f1af5d2fSBarry Smith x1 = xb[0]; x2 = xb[1]; 12243447b6efSHong Zhang ib = idx + ii[0]; 12253447b6efSHong Zhang n = ii[1] - ii[0]; ii++; 12262d61bbb3SSatish Balay for (j=0; j<n; j++) { 12272d61bbb3SSatish Balay rval = ib[j]*2; 12282d61bbb3SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 12292d61bbb3SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 12302d61bbb3SSatish Balay v += 4; 12312d61bbb3SSatish Balay } 12323447b6efSHong Zhang if (!usecprow) xb += 2; 12332d61bbb3SSatish Balay } 12342d61bbb3SSatish Balay break; 12352d61bbb3SSatish Balay case 3: 12362d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 12377b2bb3b9SHong Zhang if (usecprow) xb = x + 3*ridx[i]; 1238f1af5d2fSBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 12393447b6efSHong Zhang ib = idx + ii[0]; 12403447b6efSHong Zhang n = ii[1] - ii[0]; ii++; 12412d61bbb3SSatish Balay for (j=0; j<n; j++) { 12422d61bbb3SSatish Balay rval = ib[j]*3; 12432d61bbb3SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 12442d61bbb3SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 12452d61bbb3SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 12462d61bbb3SSatish Balay v += 9; 12472d61bbb3SSatish Balay } 12483447b6efSHong Zhang if (!usecprow) xb += 3; 12492d61bbb3SSatish Balay } 12502d61bbb3SSatish Balay break; 12512d61bbb3SSatish Balay case 4: 12522d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 12537b2bb3b9SHong Zhang if (usecprow) xb = x + 4*ridx[i]; 1254f1af5d2fSBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 12553447b6efSHong Zhang ib = idx + ii[0]; 12563447b6efSHong Zhang n = ii[1] - ii[0]; ii++; 12572d61bbb3SSatish Balay for (j=0; j<n; j++) { 12582d61bbb3SSatish Balay rval = ib[j]*4; 12592d61bbb3SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 12602d61bbb3SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 12612d61bbb3SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 12622d61bbb3SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 12632d61bbb3SSatish Balay v += 16; 12642d61bbb3SSatish Balay } 12653447b6efSHong Zhang if (!usecprow) xb += 4; 12662d61bbb3SSatish Balay } 12672d61bbb3SSatish Balay break; 12682d61bbb3SSatish Balay case 5: 12692d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 12707b2bb3b9SHong Zhang if (usecprow) xb = x + 5*ridx[i]; 1271f1af5d2fSBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 12722d61bbb3SSatish Balay x4 = xb[3]; x5 = xb[4]; 12733447b6efSHong Zhang ib = idx + ii[0]; 12743447b6efSHong Zhang n = ii[1] - ii[0]; ii++; 12752d61bbb3SSatish Balay for (j=0; j<n; j++) { 12762d61bbb3SSatish Balay rval = ib[j]*5; 12772d61bbb3SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 12782d61bbb3SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 12792d61bbb3SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 12802d61bbb3SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 12812d61bbb3SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 12822d61bbb3SSatish Balay v += 25; 12832d61bbb3SSatish Balay } 12843447b6efSHong Zhang if (!usecprow) xb += 5; 12852d61bbb3SSatish Balay } 12862d61bbb3SSatish Balay break; 1287f1af5d2fSBarry Smith default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1288690b6cddSBarry Smith PetscInt ncols,k; 12893447b6efSHong Zhang PetscScalar *work,*workt,*xtmp; 12903f1db9ecSBarry Smith 12912d61bbb3SSatish Balay if (!a->mult_work) { 1292*d0f46423SBarry Smith k = PetscMax(A->rmap->n,A->cmap->n); 129387828ca2SBarry Smith ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 12942d61bbb3SSatish Balay } 12952d61bbb3SSatish Balay work = a->mult_work; 12963447b6efSHong Zhang xtmp = x; 12972d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 12982d61bbb3SSatish Balay n = ii[1] - ii[0]; ii++; 12992d61bbb3SSatish Balay ncols = n*bs; 130087828ca2SBarry Smith ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 13013447b6efSHong Zhang if (usecprow) { 13027b2bb3b9SHong Zhang xtmp = x + bs*ridx[i]; 13033447b6efSHong Zhang } 13043447b6efSHong Zhang Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 130571044d3cSBarry Smith /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 13062d61bbb3SSatish Balay v += n*bs2; 13073447b6efSHong Zhang if (!usecprow) xtmp += bs; 13082d61bbb3SSatish Balay workt = work; 13092d61bbb3SSatish Balay for (j=0; j<n; j++) { 13102d61bbb3SSatish Balay zb = z + bs*(*idx++); 13112d61bbb3SSatish Balay for (k=0; k<bs; k++) zb[k] += workt[k] ; 13122d61bbb3SSatish Balay workt += bs; 13132d61bbb3SSatish Balay } 13142d61bbb3SSatish Balay } 13152d61bbb3SSatish Balay } 13162d61bbb3SSatish Balay } 13173447b6efSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13183447b6efSHong Zhang ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1319efee365bSSatish Balay ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr); 13202d61bbb3SSatish Balay PetscFunctionReturn(0); 13212d61bbb3SSatish Balay } 13222d61bbb3SSatish Balay 13234a2ae208SSatish Balay #undef __FUNCT__ 13244a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqBAIJ" 1325f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 13262d61bbb3SSatish Balay { 13272d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1328690b6cddSBarry Smith PetscInt totalnz = a->bs2*a->nz; 1329f4df32b1SMatthew Knepley PetscScalar oalpha = alpha; 1330efee365bSSatish Balay PetscErrorCode ierr; 13310805154bSBarry Smith PetscBLASInt one = 1,tnz = PetscBLASIntCast(totalnz); 13322d61bbb3SSatish Balay 13332d61bbb3SSatish Balay PetscFunctionBegin; 1334f4df32b1SMatthew Knepley BLASscal_(&tnz,&oalpha,a->a,&one); 1335efee365bSSatish Balay ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 13362d61bbb3SSatish Balay PetscFunctionReturn(0); 13372d61bbb3SSatish Balay } 13382d61bbb3SSatish Balay 13394a2ae208SSatish Balay #undef __FUNCT__ 13404a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqBAIJ" 1341dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 13422d61bbb3SSatish Balay { 13438a62d963SHong Zhang PetscErrorCode ierr; 13442d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13453f1db9ecSBarry Smith MatScalar *v = a->a; 1346329f5518SBarry Smith PetscReal sum = 0.0; 1347*d0f46423SBarry Smith PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1; 13482d61bbb3SSatish Balay 13492d61bbb3SSatish Balay PetscFunctionBegin; 13502d61bbb3SSatish Balay if (type == NORM_FROBENIUS) { 13512d61bbb3SSatish Balay for (i=0; i< bs2*nz; i++) { 1352aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 1353329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 13542d61bbb3SSatish Balay #else 13552d61bbb3SSatish Balay sum += (*v)*(*v); v++; 13562d61bbb3SSatish Balay #endif 13572d61bbb3SSatish Balay } 13582d61bbb3SSatish Balay *norm = sqrt(sum); 13598a62d963SHong Zhang } else if (type == NORM_1) { /* maximum column sum */ 13608a62d963SHong Zhang PetscReal *tmp; 13618a62d963SHong Zhang PetscInt *bcol = a->j; 1362*d0f46423SBarry Smith ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1363*d0f46423SBarry Smith ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr); 13648a62d963SHong Zhang for (i=0; i<nz; i++){ 13658a62d963SHong Zhang for (j=0; j<bs; j++){ 13668a62d963SHong Zhang k1 = bs*(*bcol) + j; /* column index */ 13678a62d963SHong Zhang for (k=0; k<bs; k++){ 13688a62d963SHong Zhang tmp[k1] += PetscAbsScalar(*v); v++; 13698a62d963SHong Zhang } 13708a62d963SHong Zhang } 13718a62d963SHong Zhang bcol++; 13728a62d963SHong Zhang } 13738a62d963SHong Zhang *norm = 0.0; 1374*d0f46423SBarry Smith for (j=0; j<A->cmap->n; j++) { 13758a62d963SHong Zhang if (tmp[j] > *norm) *norm = tmp[j]; 13768a62d963SHong Zhang } 13778a62d963SHong Zhang ierr = PetscFree(tmp);CHKERRQ(ierr); 1378596552b5SBarry Smith } else if (type == NORM_INFINITY) { /* maximum row sum */ 1379596552b5SBarry Smith *norm = 0.0; 1380596552b5SBarry Smith for (k=0; k<bs; k++) { 138174f84c7bSSatish Balay for (j=0; j<a->mbs; j++) { 1382596552b5SBarry Smith v = a->a + bs2*a->i[j] + k; 1383596552b5SBarry Smith sum = 0.0; 1384596552b5SBarry Smith for (i=0; i<a->i[j+1]-a->i[j]; i++) { 13850e90e235SBarry Smith for (k1=0; k1<bs; k1++){ 1386596552b5SBarry Smith sum += PetscAbsScalar(*v); 1387596552b5SBarry Smith v += bs; 13882d61bbb3SSatish Balay } 13890e90e235SBarry Smith } 1390596552b5SBarry Smith if (sum > *norm) *norm = sum; 1391596552b5SBarry Smith } 1392596552b5SBarry Smith } 1393596552b5SBarry Smith } else { 139429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 13952d61bbb3SSatish Balay } 13962d61bbb3SSatish Balay PetscFunctionReturn(0); 13972d61bbb3SSatish Balay } 13982d61bbb3SSatish Balay 13992d61bbb3SSatish Balay 14004a2ae208SSatish Balay #undef __FUNCT__ 14014a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqBAIJ" 1402dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 14032d61bbb3SSatish Balay { 14042d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1405dfbe8321SBarry Smith PetscErrorCode ierr; 14062d61bbb3SSatish Balay 14072d61bbb3SSatish Balay PetscFunctionBegin; 14082d61bbb3SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1409*d0f46423SBarry Smith if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { 1410273d9f13SBarry Smith *flg = PETSC_FALSE; 1411273d9f13SBarry Smith PetscFunctionReturn(0); 14122d61bbb3SSatish Balay } 14132d61bbb3SSatish Balay 14142d61bbb3SSatish Balay /* if the a->i are the same */ 1415690b6cddSBarry Smith ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1416abc0a331SBarry Smith if (!*flg) { 14170f5bd95cSBarry Smith PetscFunctionReturn(0); 14182d61bbb3SSatish Balay } 14192d61bbb3SSatish Balay 14202d61bbb3SSatish Balay /* if a->j are the same */ 1421690b6cddSBarry Smith ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1422abc0a331SBarry Smith if (!*flg) { 14230f5bd95cSBarry Smith PetscFunctionReturn(0); 14242d61bbb3SSatish Balay } 14252d61bbb3SSatish Balay /* if a->a are the same */ 1426*d0f46423SBarry Smith ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 14272d61bbb3SSatish Balay PetscFunctionReturn(0); 14282d61bbb3SSatish Balay 14292d61bbb3SSatish Balay } 14302d61bbb3SSatish Balay 14314a2ae208SSatish Balay #undef __FUNCT__ 14324a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1433dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 14342d61bbb3SSatish Balay { 14352d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1436dfbe8321SBarry Smith PetscErrorCode ierr; 1437690b6cddSBarry Smith PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 143887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 14393f1db9ecSBarry Smith MatScalar *aa,*aa_j; 14402d61bbb3SSatish Balay 14412d61bbb3SSatish Balay PetscFunctionBegin; 144229bbc08cSBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1443*d0f46423SBarry Smith bs = A->rmap->bs; 14442d61bbb3SSatish Balay aa = a->a; 14452d61bbb3SSatish Balay ai = a->i; 14462d61bbb3SSatish Balay aj = a->j; 14472d61bbb3SSatish Balay ambs = a->mbs; 14482d61bbb3SSatish Balay bs2 = a->bs2; 14492d61bbb3SSatish Balay 14502dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 14511ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1452e1311b90SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1453*d0f46423SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 14542d61bbb3SSatish Balay for (i=0; i<ambs; i++) { 14552d61bbb3SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 14562d61bbb3SSatish Balay if (aj[j] == i) { 14572d61bbb3SSatish Balay row = i*bs; 14582d61bbb3SSatish Balay aa_j = aa+j*bs2; 14592d61bbb3SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 14602d61bbb3SSatish Balay break; 14612d61bbb3SSatish Balay } 14622d61bbb3SSatish Balay } 14632d61bbb3SSatish Balay } 14641ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 14652d61bbb3SSatish Balay PetscFunctionReturn(0); 14662d61bbb3SSatish Balay } 14672d61bbb3SSatish Balay 14684a2ae208SSatish Balay #undef __FUNCT__ 14694a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1470dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 14712d61bbb3SSatish Balay { 14722d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 147387828ca2SBarry Smith PetscScalar *l,*r,x,*li,*ri; 14743f1db9ecSBarry Smith MatScalar *aa,*v; 1475dfbe8321SBarry Smith PetscErrorCode ierr; 1476690b6cddSBarry Smith PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 14772d61bbb3SSatish Balay 14782d61bbb3SSatish Balay PetscFunctionBegin; 14792d61bbb3SSatish Balay ai = a->i; 14802d61bbb3SSatish Balay aj = a->j; 14812d61bbb3SSatish Balay aa = a->a; 1482*d0f46423SBarry Smith m = A->rmap->n; 1483*d0f46423SBarry Smith n = A->cmap->n; 1484*d0f46423SBarry Smith bs = A->rmap->bs; 14852d61bbb3SSatish Balay mbs = a->mbs; 14862d61bbb3SSatish Balay bs2 = a->bs2; 14872d61bbb3SSatish Balay if (ll) { 14881ebc52fbSHong Zhang ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1489e1311b90SBarry Smith ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 149029bbc08cSBarry Smith if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 14912d61bbb3SSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 14922d61bbb3SSatish Balay M = ai[i+1] - ai[i]; 14932d61bbb3SSatish Balay li = l + i*bs; 14942d61bbb3SSatish Balay v = aa + bs2*ai[i]; 14952d61bbb3SSatish Balay for (j=0; j<M; j++) { /* for each block */ 14962d61bbb3SSatish Balay for (k=0; k<bs2; k++) { 14972d61bbb3SSatish Balay (*v++) *= li[k%bs]; 14982d61bbb3SSatish Balay } 14992d61bbb3SSatish Balay } 15002d61bbb3SSatish Balay } 15011ebc52fbSHong Zhang ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1502efee365bSSatish Balay ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 15032d61bbb3SSatish Balay } 15042d61bbb3SSatish Balay 15052d61bbb3SSatish Balay if (rr) { 15061ebc52fbSHong Zhang ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1507e1311b90SBarry Smith ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 150829bbc08cSBarry Smith if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 15092d61bbb3SSatish Balay for (i=0; i<mbs; i++) { /* for each block row */ 15102d61bbb3SSatish Balay M = ai[i+1] - ai[i]; 15112d61bbb3SSatish Balay v = aa + bs2*ai[i]; 15122d61bbb3SSatish Balay for (j=0; j<M; j++) { /* for each block */ 15132d61bbb3SSatish Balay ri = r + bs*aj[ai[i]+j]; 15142d61bbb3SSatish Balay for (k=0; k<bs; k++) { 15152d61bbb3SSatish Balay x = ri[k]; 15162d61bbb3SSatish Balay for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 15172d61bbb3SSatish Balay } 15182d61bbb3SSatish Balay } 15192d61bbb3SSatish Balay } 15201ebc52fbSHong Zhang ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1521efee365bSSatish Balay ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 15222d61bbb3SSatish Balay } 15232d61bbb3SSatish Balay PetscFunctionReturn(0); 15242d61bbb3SSatish Balay } 15252d61bbb3SSatish Balay 15262d61bbb3SSatish Balay 15274a2ae208SSatish Balay #undef __FUNCT__ 15284a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1529dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 15302d61bbb3SSatish Balay { 15312d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 15322d61bbb3SSatish Balay 15332d61bbb3SSatish Balay PetscFunctionBegin; 1534*d0f46423SBarry Smith info->rows_global = (double)A->rmap->N; 1535*d0f46423SBarry Smith info->columns_global = (double)A->cmap->n; 1536*d0f46423SBarry Smith info->rows_local = (double)A->rmap->n; 1537*d0f46423SBarry Smith info->columns_local = (double)A->cmap->n; 15382d61bbb3SSatish Balay info->block_size = a->bs2; 15392d61bbb3SSatish Balay info->nz_allocated = a->maxnz; 15402d61bbb3SSatish Balay info->nz_used = a->bs2*a->nz; 15412d61bbb3SSatish Balay info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 15422d61bbb3SSatish Balay info->assemblies = A->num_ass; 15432d61bbb3SSatish Balay info->mallocs = a->reallocs; 15447adad957SLisandro Dalcin info->memory = ((PetscObject)A)->mem; 15452d61bbb3SSatish Balay if (A->factor) { 15462d61bbb3SSatish Balay info->fill_ratio_given = A->info.fill_ratio_given; 15472d61bbb3SSatish Balay info->fill_ratio_needed = A->info.fill_ratio_needed; 15482d61bbb3SSatish Balay info->factor_mallocs = A->info.factor_mallocs; 15492d61bbb3SSatish Balay } else { 15502d61bbb3SSatish Balay info->fill_ratio_given = 0; 15512d61bbb3SSatish Balay info->fill_ratio_needed = 0; 15522d61bbb3SSatish Balay info->factor_mallocs = 0; 15532d61bbb3SSatish Balay } 15542d61bbb3SSatish Balay PetscFunctionReturn(0); 15552d61bbb3SSatish Balay } 15562d61bbb3SSatish Balay 15572d61bbb3SSatish Balay 15584a2ae208SSatish Balay #undef __FUNCT__ 15594a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1560dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 15612d61bbb3SSatish Balay { 15622d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1563dfbe8321SBarry Smith PetscErrorCode ierr; 15642d61bbb3SSatish Balay 15652d61bbb3SSatish Balay PetscFunctionBegin; 1566549d3d68SSatish Balay ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 15672d61bbb3SSatish Balay PetscFunctionReturn(0); 15682d61bbb3SSatish Balay } 1569