1c4762a1bSJed Brown static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown int main(int argc,char **argv) 6c4762a1bSJed Brown { 7c4762a1bSJed Brown PetscErrorCode ierr; 8c4762a1bSJed Brown PetscMPIInt rank,size; 9c4762a1bSJed Brown Mat A; 10c4762a1bSJed Brown PetscInt i,j,k,n=3,vstart,rstart,rend,margin; 11c4762a1bSJed Brown Vec x,y; 12c4762a1bSJed Brown 13c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 14*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 15*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 16c4762a1bSJed Brown 17*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A)); 18*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,n,n,PETSC_DECIDE,PETSC_DECIDE)); 19*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetFromOptions(A)); 20c4762a1bSJed Brown 21*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,n,NULL,0,NULL)); 22*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 23*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend)); 24*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(A,&x,&y)); 25*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSet(x,1.0)); 26c4762a1bSJed Brown 27c4762a1bSJed Brown /* 28a3234186SStefano Zampini Matrix A only has nonzeros in the diagonal block, which is of size 3x3. 29c4762a1bSJed Brown We do three successive assemblies on A. The first two have the same non-zero 30c4762a1bSJed Brown pattern but different values, and the third breaks the non-zero pattern. The 31c4762a1bSJed Brown first two assemblies have enough zero-rows that triggers compressed-row storage 32c4762a1bSJed Brown in MATAIJ and MATAIJCUSPARSE. 33c4762a1bSJed Brown 34c4762a1bSJed Brown These settings are used to test memory management and correctness in MatMult 35c4762a1bSJed Brown and MatMultTransposeAdd. 36c4762a1bSJed Brown */ 37c4762a1bSJed Brown 38c4762a1bSJed Brown for (k=0; k<3; k++) { /* Three assemblies */ 39c4762a1bSJed Brown vstart = (size*k + rank)*n*n+1; 40c4762a1bSJed Brown margin = (k == 2)? 0 : 2; /* Create two zero-rows in the first two assemblies */ 41c4762a1bSJed Brown for (i=rstart; i<rend-margin; i++) { 42c4762a1bSJed Brown for (j=rstart; j<rend; j++) { 43*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValue(A,i,j,(PetscScalar)vstart,INSERT_VALUES)); 44c4762a1bSJed Brown vstart++; 45c4762a1bSJed Brown } 46c4762a1bSJed Brown } 47*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 48*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 49*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMult(A,x,y)); 50*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultTransposeAdd(A,x,y,y)); /* y[i] = sum of row i and column i of A */ 51*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 52c4762a1bSJed Brown } 53c4762a1bSJed Brown 54*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 55*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 56*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&y)); 57c4762a1bSJed Brown ierr = PetscFinalize(); 58c4762a1bSJed Brown 59c4762a1bSJed Brown /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */ 60c4762a1bSJed Brown /*cudaDeviceReset();*/ 61c4762a1bSJed Brown return ierr; 62c4762a1bSJed Brown } 63c4762a1bSJed Brown 64c4762a1bSJed Brown /*TEST 65c4762a1bSJed Brown 66c4762a1bSJed Brown testset: 67c4762a1bSJed Brown nsize: 2 68c4762a1bSJed Brown output_file: output/ex236_1.out 69c4762a1bSJed Brown filter: grep -v type 70c4762a1bSJed Brown 71c4762a1bSJed Brown test: 72c4762a1bSJed Brown args: -mat_type aij 73c4762a1bSJed Brown 74c4762a1bSJed Brown test: 75c4762a1bSJed Brown requires: cuda 76c4762a1bSJed Brown suffix: cuda 77c4762a1bSJed Brown args: -mat_type aijcusparse 78c4762a1bSJed Brown TEST*/ 79