#include <../src/mat/impls/baij/seq/baij.h> #include <../src/mat/impls/dense/seq/dense.h> #include <../src/mat/impls/sbaij/seq/sbaij.h> #include #include #include PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2; const PetscInt *idx; PetscBT table_out,table_in; PetscFunctionBegin; PetscCheck(ov >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); mbs = a->mbs; ai = a->i; aj = a->j; bs = A->rmap->bs; PetscCall(PetscBTCreate(mbs,&table_out)); PetscCall(PetscMalloc1(mbs+1,&nidx)); PetscCall(PetscMalloc1(A->rmap->N+1,&nidx2)); PetscCall(PetscBTCreate(mbs,&table_in)); for (i=0; i= n) break; /* for (brow=0; brow bcol_max) break; if (PetscBTLookup(table_in,bcol)) { if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow; break; /* for l = start; lops->mult = NULL; Bseq->ops->multadd = NULL; Bseq->ops->multtranspose = NULL; Bseq->ops->multtransposeadd = NULL; Bseq->ops->lufactor = NULL; Bseq->ops->choleskyfactor = NULL; Bseq->ops->lufactorsymbolic = NULL; Bseq->ops->choleskyfactorsymbolic = NULL; Bseq->ops->getinertia = NULL; PetscFunctionReturn(0); } /* same as MatCreateSubMatrices_SeqBAIJ(), except cast Mat_SeqSBAIJ */ PetscErrorCode MatCreateSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; const PetscInt *irow,*icol; PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; PetscInt *aj = a->j,*ai = a->i; MatScalar *mat_a; Mat C; PetscBool flag; PetscFunctionBegin; PetscCall(ISGetIndices(isrow,&irow)); PetscCall(ISGetIndices(iscol,&icol)); PetscCall(ISGetLocalSize(isrow,&nrows)); PetscCall(ISGetLocalSize(iscol,&ncols)); PetscCall(PetscCalloc1(1+oldcols,&smap)); ssmap = smap; PetscCall(PetscMalloc1(1+nrows,&lens)); for (i=0; iilen[irow[i]]; lens[i] = 0; for (k=kstart; kdata); PetscCheck(c->mbs == nrows && c->nbs == ncols && (*B)->rmap->bs == bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); PetscCall(PetscArraycmp(c->ilen,lens,c->mbs,&flag)); PetscCheck(flag,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); PetscCall(PetscArrayzero(c->ilen,c->mbs)); C = *B; } else { PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C)); PetscCall(MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE)); PetscCall(MatSetType(C,((PetscObject)A)->type_name)); PetscCall(MatSeqSBAIJSetPreallocation(C,bs,0,lens)); } c = (Mat_SeqSBAIJ*)(C->data); for (i=0; iilen[row]; mat_i = c->i[i]; mat_j = c->j + mat_i; mat_a = c->a + mat_i*bs2; mat_ilen = c->ilen + i; for (k=kstart; kj[k]])) { *mat_j++ = tcol - 1; PetscCall(PetscArraycpy(mat_a,a->a+k*bs2,bs2)); mat_a += bs2; (*mat_ilen)++; } } } /* sort */ { MatScalar *work; PetscCall(PetscMalloc1(bs2,&work)); for (i=0; ii[i]; mat_j = c->j + mat_i; mat_a = c->a + mat_i*bs2; ilen = c->ilen[i]; PetscCall(PetscSortIntWithDataArray(ilen,mat_j,mat_a,bs2*sizeof(MatScalar),work)); } PetscCall(PetscFree(work)); } /* Free work space */ PetscCall(ISRestoreIndices(iscol,&icol)); PetscCall(PetscFree(smap)); PetscCall(PetscFree(lens)); PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); PetscCall(ISRestoreIndices(isrow,&irow)); *B = C; PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS is1,is2; PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count,maxmnbs; const PetscInt *irow,*icol; PetscFunctionBegin; PetscCall(ISGetIndices(isrow,&irow)); PetscCall(ISGetIndices(iscol,&icol)); PetscCall(ISGetLocalSize(isrow,&nrows)); PetscCall(ISGetLocalSize(iscol,&ncols)); /* Verify if the indices corespond to each element in a block and form the IS with compressed IS */ maxmnbs = PetscMax(a->mbs,a->nbs); PetscCall(PetscMalloc2(maxmnbs,&vary,maxmnbs,&iary)); PetscCall(PetscArrayzero(vary,a->mbs)); for (i=0; imbs; i++) { PetscCheck(vary[i] == 0 || vary[i] == bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); } count = 0; for (i=0; inbs)); for (i=0; inbs; i++) { PetscCheck(vary[i] == 0 || vary[i] == bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal error in PETSc"); } count = 0; for (i=0; idata; PetscScalar *z,x1,x2,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecSet(zz,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[2*i] += v[0]*x1 + v[2]*x2; z[2*i+1] += v[2]*x1 + v[3]*x2; v += 4; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj = a->j,*ai = a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecSet(zz,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; v += 9; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj = a->j,*ai = a->i,*ib; PetscInt nonzerorow = 0; PetscFunctionBegin; PetscCall(VecSet(zz,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; v += 16; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj = a->j,*ai = a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecSet(zz,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; v += 25; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecSet(zz,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; v += 36; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6,x7,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecSet(zz,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; v += 49; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow)); PetscFunctionReturn(0); } /* This will not work with MatScalar == float because it calls the BLAS */ PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,*z_ptr,*zb,*work,*workt,zero=0.0; const PetscScalar *x,*x_ptr,*xb; const MatScalar *v; PetscInt mbs =a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; const PetscInt *idx,*aj,*ii; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecSet(zz,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); x_ptr = x; z_ptr = z; aj = a->j; v = a->a; ii = a->i; if (!a->mult_work) { PetscCall(PetscMalloc1(A->rmap->N+1,&a->mult_work)); } work = a->mult_work; for (i=0; i0); /* upper triangular part */ for (j=0; j 0) { workt = work; PetscCall(PetscArrayzero(workt,ncols)); PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); for (j=0; jnz*2.0 - nonzerorow)*bs2 - nonzerorow)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs =a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; #if defined(PETSC_USE_COMPLEX) const int aconj = A->hermitian == PETSC_BOOL3_TRUE; #else const int aconj = 0; #endif PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (n && *ib == i) { /* (diag of A)*x */ z[i] += *v++ * x[*ib++]; jmin++; } if (aconj) { for (j=jmin; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs =a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (n && *ib == i) { /* (diag of A)*x */ z[2*i] += v[0]*x1 + v[2]*x2; z[2*i+1] += v[2]*x1 + v[3]*x2; v += 4; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (n && *ib == i) { /* (diag of A)*x */ z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; v += 9; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (n && *ib == i) { /* (diag of A)*x */ z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; v += 16; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (n && *ib == i) { /* (diag of A)*x */ z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; v += 25; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (n && *ib == i) { /* (diag of A)*x */ z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; v += 36; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6,x7; const PetscScalar *x,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); PetscCall(VecGetArray(zz,&z)); v = a->a; xb = x; for (i=0; i0); if (n && *ib == i) { /* (diag of A)*x */ z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; v += 49; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,*z_ptr=NULL,*zb,*work,*workt; const PetscScalar *x,*x_ptr,*xb; const MatScalar *v; PetscInt mbs = a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; const PetscInt *idx,*aj,*ii; PetscInt nonzerorow=0; PetscFunctionBegin; PetscCall(VecCopy(yy,zz)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArrayRead(xx,&x)); x_ptr=x; PetscCall(VecGetArray(zz,&z)); z_ptr=z; aj = a->j; v = a->a; ii = a->i; if (!a->mult_work) { PetscCall(PetscMalloc1(A->rmap->n+1,&a->mult_work)); } work = a->mult_work; for (i=0; i0); /* upper triangular part */ for (j=0; j 0) { workt = work; PetscCall(PetscArrayzero(workt,ncols)); PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); for (j=0; jnz*2.0 - nonzerorow))); PetscFunctionReturn(0); } PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; PetscScalar oalpha = alpha; PetscBLASInt one = 1,totalnz; PetscFunctionBegin; PetscCall(PetscBLASIntCast(a->bs2*a->nz,&totalnz)); PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one)); PetscCall(PetscLogFlops(totalnz)); PetscFunctionReturn(0); } PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; const MatScalar *v = a->a; PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,jmin,jmax,nexti,ik,*jl,*il; const PetscInt *aj=a->j,*col; PetscFunctionBegin; if (!a->nz) { *norm = 0.0; PetscFunctionReturn(0); } if (type == NORM_FROBENIUS) { for (k=0; ki[k]; jmax = a->i[k+1]; col = aj + jmin; if (jmax-jmin > 0 && *col == k) { /* diagonal block */ for (i=0; inz)); } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */ PetscCall(PetscMalloc3(bs,&sum,mbs,&il,mbs,&jl)); for (i=0; ia + ik*bs2 + j*bs; for (k1=0; k1i[i+1]; if (jmin < jmax) { il[i] = jmin; j = a->j[jmin]; jl[i] = jl[j]; jl[j]=i; } i = nexti; } /*-- row sum --*/ jmin = a->i[k]; jmax = a->i[k+1]; for (i=jmin; ia + i*bs2 + j; for (k1=0; k1 0 && *col == k) jmin++; if (jmin < jmax) { il[k] = jmin; j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k; } for (j=0; j *norm) *norm = sum[j]; } } PetscCall(PetscFree3(sum,il,jl)); PetscCall(PetscLogFlops(PetscMax(mbs*a->nz-1,0))); } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); PetscFunctionReturn(0); } PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data; PetscFunctionBegin; /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } /* if the a->i are the same */ PetscCall(PetscArraycmp(a->i,b->i,a->mbs+1,flg)); if (!*flg) PetscFunctionReturn(0); /* if a->j are the same */ PetscCall(PetscArraycmp(a->j,b->j,a->nz,flg)); if (!*flg) PetscFunctionReturn(0); /* if a->a are the same */ PetscCall(PetscArraycmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs),flg)); PetscFunctionReturn(0); } PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscInt i,j,k,row,bs,ambs,bs2; const PetscInt *ai,*aj; PetscScalar *x,zero = 0.0; const MatScalar *aa,*aa_j; PetscFunctionBegin; bs = A->rmap->bs; PetscCheck(!A->factortype || bs <= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); aa = a->a; ambs = a->mbs; if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) { PetscInt *diag=a->diag; aa = a->a; ambs = a->mbs; PetscCall(VecGetArray(v,&x)); for (i=0; ii; aj = a->j; bs2 = a->bs2; PetscCall(VecSet(v,zero)); if (!a->nz) PetscFunctionReturn(0); PetscCall(VecGetArray(v,&x)); for (i=0; idata; PetscScalar x; const PetscScalar *l,*li,*ri; MatScalar *aa,*v; PetscInt i,j,k,lm,M,m,mbs,tmp,bs,bs2; const PetscInt *ai,*aj; PetscBool flg; PetscFunctionBegin; if (ll != rr) { PetscCall(VecEqual(ll,rr,&flg)); PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same"); } if (!ll) PetscFunctionReturn(0); ai = a->i; aj = a->j; aa = a->a; m = A->rmap->N; bs = A->rmap->bs; mbs = a->mbs; bs2 = a->bs2; PetscCall(VecGetArrayRead(ll,&l)); PetscCall(VecGetLocalSize(ll,&lm)); PetscCheck(lm == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); for (i=0; inz)); PetscFunctionReturn(0); } PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscFunctionBegin; info->block_size = a->bs2; info->nz_allocated = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */ info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ info->nz_unneeded = info->nz_allocated - info->nz_used; info->assemblies = A->num_ass; info->mallocs = A->info.mallocs; info->memory = ((PetscObject)A)->mem; if (A->factortype) { info->fill_ratio_given = A->info.fill_ratio_given; info->fill_ratio_needed = A->info.fill_ratio_needed; info->factor_mallocs = A->info.factor_mallocs; } else { info->fill_ratio_given = 0; info->fill_ratio_needed = 0; info->factor_mallocs = 0; } PetscFunctionReturn(0); } PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscFunctionBegin; PetscCall(PetscArrayzero(a->a,a->bs2*a->i[a->mbs])); PetscFunctionReturn(0); } PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[]) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscInt i,j,n,row,col,bs,mbs; const PetscInt *ai,*aj; PetscReal atmp; const MatScalar *aa; PetscScalar *x; PetscInt ncols,brow,bcol,krow,kcol; PetscFunctionBegin; PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); bs = A->rmap->bs; aa = a->a; ai = a->i; aj = a->j; mbs = a->mbs; PetscCall(VecSet(v,0.0)); PetscCall(VecGetArray(v,&x)); PetscCall(VecGetLocalSize(v,&n)); PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); for (i=0; i i && PetscRealPart(x[col]) < atmp) x[col] = atmp; } } aj++; } } PetscCall(VecRestoreArray(v,&x)); PetscFunctionReturn(0); } PetscErrorCode MatMatMultSymbolic_SeqSBAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) { PetscFunctionBegin; PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C)); C->ops->matmultnumeric = MatMatMultNumeric_SeqSBAIJ_SeqDense; PetscFunctionReturn(0); } PetscErrorCode MatMatMult_SeqSBAIJ_1_Private(Mat A,PetscScalar* b,PetscInt bm,PetscScalar* c,PetscInt cm,PetscInt cn) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z = c; const PetscScalar *xb; PetscScalar x1; const MatScalar *v = a->a,*vv; PetscInt mbs = a->mbs,i,*idx = a->j,*ii = a->i,j,*jj,n,k; #if defined(PETSC_USE_COMPLEX) const int aconj = A->hermitian == PETSC_BOOL3_TRUE; #else const int aconj = 0; #endif PetscFunctionBegin; for (i=0; idata; PetscScalar *z = c; const PetscScalar *xb; PetscScalar x1,x2; const MatScalar *v = a->a,*vv; PetscInt mbs = a->mbs,i,*idx = a->j,*ii = a->i,j,*jj,n,k; PetscFunctionBegin; for (i=0; idata; PetscScalar *z = c; const PetscScalar *xb; PetscScalar x1,x2,x3; const MatScalar *v = a->a,*vv; PetscInt mbs = a->mbs,i,*idx = a->j,*ii = a->i,j,*jj,n,k; PetscFunctionBegin; for (i=0; idata; PetscScalar *z = c; const PetscScalar *xb; PetscScalar x1,x2,x3,x4; const MatScalar *v = a->a,*vv; PetscInt mbs = a->mbs,i,*idx = a->j,*ii = a->i,j,*jj,n,k; PetscFunctionBegin; for (i=0; idata; PetscScalar *z = c; const PetscScalar *xb; PetscScalar x1,x2,x3,x4,x5; const MatScalar *v = a->a,*vv; PetscInt mbs = a->mbs,i,*idx = a->j,*ii = a->i,j,*jj,n,k; PetscFunctionBegin; for (i=0; idata; Mat_SeqDense *bd = (Mat_SeqDense*)B->data; Mat_SeqDense *cd = (Mat_SeqDense*)C->data; PetscInt cm=cd->lda,cn=B->cmap->n,bm=bd->lda; PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; PetscBLASInt bbs,bcn,bbm,bcm; PetscScalar *z = NULL; PetscScalar *c,*b; const MatScalar *v; const PetscInt *idx,*ii; PetscScalar _DOne=1.0; PetscFunctionBegin; if (!cm || !cn) PetscFunctionReturn(0); PetscCheck(B->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,B->rmap->n); PetscCheck(A->rmap->n == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,C->rmap->n,A->rmap->n); PetscCheck(B->cmap->n == C->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT,B->cmap->n,C->cmap->n); b = bd->v; PetscCall(MatZeroEntries(C)); PetscCall(MatDenseGetArray(C,&c)); switch (bs) { case 1: PetscCall(MatMatMult_SeqSBAIJ_1_Private(A, b, bm, c, cm, cn)); break; case 2: PetscCall(MatMatMult_SeqSBAIJ_2_Private(A, b, bm, c, cm, cn)); break; case 3: PetscCall(MatMatMult_SeqSBAIJ_3_Private(A, b, bm, c, cm, cn)); break; case 4: PetscCall(MatMatMult_SeqSBAIJ_4_Private(A, b, bm, c, cm, cn)); break; case 5: PetscCall(MatMatMult_SeqSBAIJ_5_Private(A, b, bm, c, cm, cn)); break; default: /* block sizes larger than 5 by 5 are handled by BLAS */ PetscCall(PetscBLASIntCast(bs,&bbs)); PetscCall(PetscBLASIntCast(cn,&bcn)); PetscCall(PetscBLASIntCast(bm,&bbm)); PetscCall(PetscBLASIntCast(cm,&bcm)); idx = a->j; v = a->a; mbs = a->mbs; ii = a->i; z = c; for (i=0; inz*2.0 - a->nonzerorowcnt)*bs2 - a->nonzerorowcnt)*cn)); PetscFunctionReturn(0); }