1c4762a1bSJed Brown static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv) 6d71ae5a4SJacob Faibussowitsch { 7c4762a1bSJed Brown PetscMPIInt rank, size; 8c4762a1bSJed Brown Mat A; 9c4762a1bSJed Brown PetscInt i, j, k, n = 3, vstart, rstart, rend, margin; 10c4762a1bSJed Brown Vec x, y; 11c4762a1bSJed Brown 12327415f7SBarry Smith PetscFunctionBeginUser; 13c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 16c4762a1bSJed Brown 179566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 189566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, n, n, PETSC_DECIDE, PETSC_DECIDE)); 199566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 20c4762a1bSJed Brown 219566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, 0, NULL)); 229566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 239566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 249566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &x, &y)); 259566063dSJacob Faibussowitsch PetscCall(VecSet(x, 1.0)); 26c4762a1bSJed Brown 27c4762a1bSJed Brown /* 28a3234186SStefano Zampini Matrix A only has nonzeros in the diagonal block, which is of size 3x3. 29c4762a1bSJed Brown We do three successive assemblies on A. The first two have the same non-zero 30c4762a1bSJed Brown pattern but different values, and the third breaks the non-zero pattern. The 31c4762a1bSJed Brown first two assemblies have enough zero-rows that triggers compressed-row storage 32c4762a1bSJed Brown in MATAIJ and MATAIJCUSPARSE. 33c4762a1bSJed Brown 34c4762a1bSJed Brown These settings are used to test memory management and correctness in MatMult 35c4762a1bSJed Brown and MatMultTransposeAdd. 36c4762a1bSJed Brown */ 37c4762a1bSJed Brown 38c4762a1bSJed Brown for (k = 0; k < 3; k++) { /* Three assemblies */ 39c4762a1bSJed Brown vstart = (size * k + rank) * n * n + 1; 40c4762a1bSJed Brown margin = (k == 2) ? 0 : 2; /* Create two zero-rows in the first two assemblies */ 41c4762a1bSJed Brown for (i = rstart; i < rend - margin; i++) { 42c4762a1bSJed Brown for (j = rstart; j < rend; j++) { 439566063dSJacob Faibussowitsch PetscCall(MatSetValue(A, i, j, (PetscScalar)vstart, INSERT_VALUES)); 44c4762a1bSJed Brown vstart++; 45c4762a1bSJed Brown } 46c4762a1bSJed Brown } 479566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 489566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 499566063dSJacob Faibussowitsch PetscCall(MatMult(A, x, y)); 509566063dSJacob Faibussowitsch PetscCall(MatMultTransposeAdd(A, x, y, y)); /* y[i] = sum of row i and column i of A */ 519566063dSJacob Faibussowitsch PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); 52c4762a1bSJed Brown } 53c4762a1bSJed Brown 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 559566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 579566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 58c4762a1bSJed Brown 59*74df5e01SJunchao Zhang /* Uncomment this line if you want to use "compute-sanitizer --tool memcheck [sanitizer_options] app_name [app_options]" to check this program */ 60c4762a1bSJed Brown /*cudaDeviceReset();*/ 61b122ec5aSJacob Faibussowitsch return 0; 62c4762a1bSJed Brown } 63c4762a1bSJed Brown 64c4762a1bSJed Brown /*TEST 65c4762a1bSJed Brown 66c4762a1bSJed Brown testset: 67c4762a1bSJed Brown nsize: 2 68c4762a1bSJed Brown output_file: output/ex236_1.out 69c4762a1bSJed Brown filter: grep -v type 70c4762a1bSJed Brown 71c4762a1bSJed Brown test: 72c4762a1bSJed Brown args: -mat_type aij 73c4762a1bSJed Brown 74c4762a1bSJed Brown test: 75c4762a1bSJed Brown requires: cuda 76c4762a1bSJed Brown suffix: cuda 77c4762a1bSJed Brown args: -mat_type aijcusparse 78c4762a1bSJed Brown TEST*/ 79