1*d52a580bSJunchao Zhang #include <petscconf.h> 2*d52a580bSJunchao Zhang #include <petscdevice.h> 3*d52a580bSJunchao Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/ 4*d52a580bSJunchao Zhang 5*d52a580bSJunchao Zhang static PetscErrorCode MatMPISELLSetPreallocation_MPISELLHIP(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[]) 6*d52a580bSJunchao Zhang { 7*d52a580bSJunchao Zhang Mat_MPISELL *b = (Mat_MPISELL *)B->data; 8*d52a580bSJunchao Zhang 9*d52a580bSJunchao Zhang PetscFunctionBegin; 10*d52a580bSJunchao Zhang PetscCall(PetscLayoutSetUp(B->rmap)); 11*d52a580bSJunchao Zhang PetscCall(PetscLayoutSetUp(B->cmap)); 12*d52a580bSJunchao Zhang 13*d52a580bSJunchao Zhang if (!B->preallocated) { 14*d52a580bSJunchao Zhang /* Explicitly create 2 MATSEQSELLHIP matrices. */ 15*d52a580bSJunchao Zhang PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 16*d52a580bSJunchao Zhang PetscCall(MatBindToCPU(b->A, B->boundtocpu)); 17*d52a580bSJunchao Zhang PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 18*d52a580bSJunchao Zhang PetscCall(MatSetType(b->A, MATSEQSELLHIP)); 19*d52a580bSJunchao Zhang PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 20*d52a580bSJunchao Zhang PetscCall(MatBindToCPU(b->B, B->boundtocpu)); 21*d52a580bSJunchao Zhang PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N)); 22*d52a580bSJunchao Zhang PetscCall(MatSetType(b->B, MATSEQSELLHIP)); 23*d52a580bSJunchao Zhang } 24*d52a580bSJunchao Zhang PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen)); 25*d52a580bSJunchao Zhang PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen)); 26*d52a580bSJunchao Zhang B->preallocated = PETSC_TRUE; 27*d52a580bSJunchao Zhang B->was_assembled = PETSC_FALSE; 28*d52a580bSJunchao Zhang B->assembled = PETSC_FALSE; 29*d52a580bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 30*d52a580bSJunchao Zhang } 31*d52a580bSJunchao Zhang 32*d52a580bSJunchao Zhang static PetscErrorCode MatSetFromOptions_MPISELLHIP(Mat, PetscOptionItems) 33*d52a580bSJunchao Zhang { 34*d52a580bSJunchao Zhang return PETSC_SUCCESS; 35*d52a580bSJunchao Zhang } 36*d52a580bSJunchao Zhang 37*d52a580bSJunchao Zhang static PetscErrorCode MatAssemblyEnd_MPISELLHIP(Mat A, MatAssemblyType mode) 38*d52a580bSJunchao Zhang { 39*d52a580bSJunchao Zhang PetscFunctionBegin; 40*d52a580bSJunchao Zhang PetscCall(MatAssemblyEnd_MPISELL(A, mode)); 41*d52a580bSJunchao Zhang if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQHIP)); 42*d52a580bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 43*d52a580bSJunchao Zhang } 44*d52a580bSJunchao Zhang 45*d52a580bSJunchao Zhang static PetscErrorCode MatDestroy_MPISELLHIP(Mat A) 46*d52a580bSJunchao Zhang { 47*d52a580bSJunchao Zhang PetscFunctionBegin; 48*d52a580bSJunchao Zhang PetscCall(MatDestroy_MPISELL(A)); 49*d52a580bSJunchao Zhang PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL)); 50*d52a580bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 51*d52a580bSJunchao Zhang } 52*d52a580bSJunchao Zhang 53*d52a580bSJunchao Zhang PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLHIP(Mat B, MatType, MatReuse reuse, Mat *newmat) 54*d52a580bSJunchao Zhang { 55*d52a580bSJunchao Zhang Mat_MPISELL *a; 56*d52a580bSJunchao Zhang Mat A; 57*d52a580bSJunchao Zhang 58*d52a580bSJunchao Zhang PetscFunctionBegin; 59*d52a580bSJunchao Zhang PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP)); 60*d52a580bSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat)); 61*d52a580bSJunchao Zhang else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN)); 62*d52a580bSJunchao Zhang A = *newmat; 63*d52a580bSJunchao Zhang A->boundtocpu = PETSC_FALSE; 64*d52a580bSJunchao Zhang PetscCall(PetscFree(A->defaultvectype)); 65*d52a580bSJunchao Zhang PetscCall(PetscStrallocpy(VECHIP, &A->defaultvectype)); 66*d52a580bSJunchao Zhang 67*d52a580bSJunchao Zhang a = (Mat_MPISELL *)A->data; 68*d52a580bSJunchao Zhang if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLHIP)); 69*d52a580bSJunchao Zhang if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLHIP)); 70*d52a580bSJunchao Zhang if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQHIP)); 71*d52a580bSJunchao Zhang 72*d52a580bSJunchao Zhang A->ops->assemblyend = MatAssemblyEnd_MPISELLHIP; 73*d52a580bSJunchao Zhang A->ops->setfromoptions = MatSetFromOptions_MPISELLHIP; 74*d52a580bSJunchao Zhang A->ops->destroy = MatDestroy_MPISELLHIP; 75*d52a580bSJunchao Zhang 76*d52a580bSJunchao Zhang PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLHIP)); 77*d52a580bSJunchao Zhang PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLHIP)); 78*d52a580bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 79*d52a580bSJunchao Zhang } 80*d52a580bSJunchao Zhang 81*d52a580bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPISELLHIP(Mat A) 82*d52a580bSJunchao Zhang { 83*d52a580bSJunchao Zhang PetscFunctionBegin; 84*d52a580bSJunchao Zhang PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP)); 85*d52a580bSJunchao Zhang PetscCall(MatCreate_MPISELL(A)); 86*d52a580bSJunchao Zhang PetscCall(MatConvert_MPISELL_MPISELLHIP(A, MATMPISELLHIP, MAT_INPLACE_MATRIX, &A)); 87*d52a580bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 88*d52a580bSJunchao Zhang } 89*d52a580bSJunchao Zhang 90*d52a580bSJunchao Zhang /*@ 91*d52a580bSJunchao Zhang MatCreateSELLHIP - Creates a sparse matrix in SELL format. 92*d52a580bSJunchao Zhang This matrix will be ultimately pushed down to GPUs. 93*d52a580bSJunchao Zhang 94*d52a580bSJunchao Zhang Collective 95*d52a580bSJunchao Zhang 96*d52a580bSJunchao Zhang Input Parameters: 97*d52a580bSJunchao Zhang + comm - MPI communicator, set to `PETSC_COMM_SELF` 98*d52a580bSJunchao Zhang . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 99*d52a580bSJunchao Zhang This value should be the same as the local size used in creating the 100*d52a580bSJunchao Zhang y vector for the matrix-vector product $ y = Ax $. 101*d52a580bSJunchao Zhang . n - This value should be the same as the local size used in creating the 102*d52a580bSJunchao Zhang x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 103*d52a580bSJunchao Zhang calculated if `N` is given) For square matrices `n` is almost always `m`. 104*d52a580bSJunchao Zhang . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 105*d52a580bSJunchao Zhang . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 106*d52a580bSJunchao Zhang . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 107*d52a580bSJunchao Zhang (same value is used for all local rows) 108*d52a580bSJunchao Zhang . d_nnz - array containing the number of nonzeros in the various rows of the 109*d52a580bSJunchao Zhang DIAGONAL portion of the local submatrix (possibly different for each row) 110*d52a580bSJunchao Zhang or `NULL`, if `d_nz` is used to specify the nonzero structure. 111*d52a580bSJunchao Zhang The size of this array is equal to the number of local rows, i.e `m`. 112*d52a580bSJunchao Zhang For matrices you plan to factor you must leave room for the diagonal entry and 113*d52a580bSJunchao Zhang put in the entry even if it is zero. 114*d52a580bSJunchao Zhang . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 115*d52a580bSJunchao Zhang submatrix (same value is used for all local rows). 116*d52a580bSJunchao Zhang - o_nnz - array containing the number of nonzeros in the various rows of the 117*d52a580bSJunchao Zhang OFF-DIAGONAL portion of the local submatrix (possibly different for 118*d52a580bSJunchao Zhang each row) or `NULL`, if `o_nz` is used to specify the nonzero 119*d52a580bSJunchao Zhang structure. The size of this array is equal to the number 120*d52a580bSJunchao Zhang of local rows, i.e `m`. 121*d52a580bSJunchao Zhang 122*d52a580bSJunchao Zhang Output Parameter: 123*d52a580bSJunchao Zhang . A - the matrix 124*d52a580bSJunchao Zhang 125*d52a580bSJunchao Zhang Level: intermediate 126*d52a580bSJunchao Zhang 127*d52a580bSJunchao Zhang Notes: 128*d52a580bSJunchao Zhang If `nnz` is given then `nz` is ignored 129*d52a580bSJunchao Zhang 130*d52a580bSJunchao Zhang Specify the preallocated storage with either `nz` or `nnz` (not both). 131*d52a580bSJunchao Zhang Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 132*d52a580bSJunchao Zhang allocation. 133*d52a580bSJunchao Zhang 134*d52a580bSJunchao Zhang .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLHIP`, `MATSELLHIP` 135*d52a580bSJunchao Zhang @*/ 136*d52a580bSJunchao Zhang PetscErrorCode MatCreateSELLHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 137*d52a580bSJunchao Zhang { 138*d52a580bSJunchao Zhang PetscMPIInt size; 139*d52a580bSJunchao Zhang 140*d52a580bSJunchao Zhang PetscFunctionBegin; 141*d52a580bSJunchao Zhang PetscCall(MatCreate(comm, A)); 142*d52a580bSJunchao Zhang PetscCall(MatSetSizes(*A, m, n, M, N)); 143*d52a580bSJunchao Zhang PetscCallMPI(MPI_Comm_size(comm, &size)); 144*d52a580bSJunchao Zhang if (size > 1) { 145*d52a580bSJunchao Zhang PetscCall(MatSetType(*A, MATMPISELLHIP)); 146*d52a580bSJunchao Zhang PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 147*d52a580bSJunchao Zhang } else { 148*d52a580bSJunchao Zhang PetscCall(MatSetType(*A, MATSEQSELLHIP)); 149*d52a580bSJunchao Zhang PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz)); 150*d52a580bSJunchao Zhang } 151*d52a580bSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 152*d52a580bSJunchao Zhang } 153*d52a580bSJunchao Zhang 154*d52a580bSJunchao Zhang /*MC 155*d52a580bSJunchao Zhang MATSELLHIP - "sellhip" = "mpisellhip" - A matrix type to be used for sparse matrices on AMD GPUs 156*d52a580bSJunchao Zhang 157*d52a580bSJunchao Zhang Sliced ELLPACK matrix type whose data resides on GPUs. 158*d52a580bSJunchao Zhang 159*d52a580bSJunchao Zhang This matrix type is identical to `MATSEQSELLHIP` when constructed with a single process communicator, 160*d52a580bSJunchao Zhang and `MATMPISELLHIP` otherwise. As a result, for single process communicators, 161*d52a580bSJunchao Zhang `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported 162*d52a580bSJunchao Zhang for communicators controlling multiple processes. It is recommended that you call both of 163*d52a580bSJunchao Zhang the above preallocation routines for simplicity. 164*d52a580bSJunchao Zhang 165*d52a580bSJunchao Zhang Options Database Key: 166*d52a580bSJunchao Zhang . -mat_type sellhip - sets the matrix type to `MATSELLHIP` during a call to MatSetFromOptions() 167*d52a580bSJunchao Zhang 168*d52a580bSJunchao Zhang Level: beginner 169*d52a580bSJunchao Zhang 170*d52a580bSJunchao Zhang .seealso: `MatCreateSELLHIP()`, `MATSEQSELLHIP`, `MatCreateSeqSELLHIP()`, `MatHIPFormatOperation()` 171*d52a580bSJunchao Zhang M*/ 172