1c4762a1bSJed Brown static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown int main(int argc,char **argv) 6c4762a1bSJed Brown { 7c4762a1bSJed Brown PetscMPIInt rank,size; 8c4762a1bSJed Brown Mat A; 9c4762a1bSJed Brown PetscInt i,j,k,n = 3,vstart,rstart,rend,margin; 10c4762a1bSJed Brown Vec x,y; 11c4762a1bSJed Brown 12*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help)); 135f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 145f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 15c4762a1bSJed Brown 165f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A)); 175f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,n,n,PETSC_DECIDE,PETSC_DECIDE)); 185f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetFromOptions(A)); 19c4762a1bSJed Brown 205f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,n,NULL,0,NULL)); 215f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 225f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend)); 235f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(A,&x,&y)); 245f80ce2aSJacob Faibussowitsch CHKERRQ(VecSet(x,1.0)); 25c4762a1bSJed Brown 26c4762a1bSJed Brown /* 27a3234186SStefano Zampini Matrix A only has nonzeros in the diagonal block, which is of size 3x3. 28c4762a1bSJed Brown We do three successive assemblies on A. The first two have the same non-zero 29c4762a1bSJed Brown pattern but different values, and the third breaks the non-zero pattern. The 30c4762a1bSJed Brown first two assemblies have enough zero-rows that triggers compressed-row storage 31c4762a1bSJed Brown in MATAIJ and MATAIJCUSPARSE. 32c4762a1bSJed Brown 33c4762a1bSJed Brown These settings are used to test memory management and correctness in MatMult 34c4762a1bSJed Brown and MatMultTransposeAdd. 35c4762a1bSJed Brown */ 36c4762a1bSJed Brown 37c4762a1bSJed Brown for (k=0; k<3; k++) { /* Three assemblies */ 38c4762a1bSJed Brown vstart = (size*k + rank)*n*n+1; 39c4762a1bSJed Brown margin = (k == 2)? 0 : 2; /* Create two zero-rows in the first two assemblies */ 40c4762a1bSJed Brown for (i=rstart; i<rend-margin; i++) { 41c4762a1bSJed Brown for (j=rstart; j<rend; j++) { 425f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValue(A,i,j,(PetscScalar)vstart,INSERT_VALUES)); 43c4762a1bSJed Brown vstart++; 44c4762a1bSJed Brown } 45c4762a1bSJed Brown } 465f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 475f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 485f80ce2aSJacob Faibussowitsch CHKERRQ(MatMult(A,x,y)); 495f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultTransposeAdd(A,x,y,y)); /* y[i] = sum of row i and column i of A */ 505f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 51c4762a1bSJed Brown } 52c4762a1bSJed Brown 535f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 545f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 555f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&y)); 56*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 57c4762a1bSJed Brown 58c4762a1bSJed Brown /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */ 59c4762a1bSJed Brown /*cudaDeviceReset();*/ 60*b122ec5aSJacob Faibussowitsch return 0; 61c4762a1bSJed Brown } 62c4762a1bSJed Brown 63c4762a1bSJed Brown /*TEST 64c4762a1bSJed Brown 65c4762a1bSJed Brown testset: 66c4762a1bSJed Brown nsize: 2 67c4762a1bSJed Brown output_file: output/ex236_1.out 68c4762a1bSJed Brown filter: grep -v type 69c4762a1bSJed Brown 70c4762a1bSJed Brown test: 71c4762a1bSJed Brown args: -mat_type aij 72c4762a1bSJed Brown 73c4762a1bSJed Brown test: 74c4762a1bSJed Brown requires: cuda 75c4762a1bSJed Brown suffix: cuda 76c4762a1bSJed Brown args: -mat_type aijcusparse 77c4762a1bSJed Brown TEST*/ 78