Lines Matching refs:bs
6 __global__ static void MatMultBatched(PetscInt bs, PetscInt mbs, const PetscScalar *A, const PetscS… in MatMultBatched() argument
10 const PetscInt bs2 = bs * bs; in MatMultBatched()
13 for (; row < bs * mbs; row += gridSize) { in MatMultBatched()
18 k = row / bs; /* k-th block */ in MatMultBatched()
19 i = row % bs; /* this thread deals with i-th row of the block */ in MatMultBatched()
20 Ap = &A[bs2 * k + i * (transpose ? bs : 1)]; /* Ap points to the first entry of i-th row */ in MatMultBatched()
21 xp = &x[bs * k]; in MatMultBatched()
22 yp = &y[bs * k]; in MatMultBatched()
25 for (j = 0; j < bs; j++) { in MatMultBatched()
27 Ap += (transpose ? 1 : bs); /* block is in column major order */ in MatMultBatched()
40 const PetscInt bs = jac->bs, mbs = jac->mbs; in PCApplyOrTranspose_PBJacobi_CUDA() local
51 …llCUBLAS(cublasXgemvStridedBatched(handle, op, bs, bs, &alpha, A, bs, bs * bs, xx, 1, bs, &beta, y… in PCApplyOrTranspose_PBJacobi_CUDA()
53 PetscInt gridSize = PetscMin((bs * mbs + 255) / 256, 2147483647); /* <= 2^31-1 */ in PCApplyOrTranspose_PBJacobi_CUDA()
54 …MatMultBatched<<<gridSize, 256>>>(bs, mbs, A, xx, yy, op == CUBLAS_OP_T ? PETSC_TRUE : PETSC_FALSE… in PCApplyOrTranspose_PBJacobi_CUDA()
59 PetscCall(PetscLogGpuFlops(bs * bs * mbs * 2)); in PCApplyOrTranspose_PBJacobi_CUDA()
94 size = sizeof(PetscScalar) * jac->bs * jac->bs * jac->mbs; in PCSetUp_PBJacobi_CUDA()