1c4762a1bSJed Brown static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5c4762a1bSJed Brown int main(int argc,char **argv) 6c4762a1bSJed Brown { 7c4762a1bSJed Brown PetscMPIInt rank,size; 8c4762a1bSJed Brown Mat A; 9c4762a1bSJed Brown PetscInt i,j,k,n = 3,vstart,rstart,rend,margin; 10c4762a1bSJed Brown Vec x,y; 11c4762a1bSJed Brown 12*9566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 13*9566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 14*9566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 15c4762a1bSJed Brown 16*9566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 17*9566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,n,n,PETSC_DECIDE,PETSC_DECIDE)); 18*9566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 19c4762a1bSJed Brown 20*9566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,n,NULL,0,NULL)); 21*9566063dSJacob Faibussowitsch PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 22*9566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 23*9566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A,&x,&y)); 24*9566063dSJacob Faibussowitsch PetscCall(VecSet(x,1.0)); 25c4762a1bSJed Brown 26c4762a1bSJed Brown /* 27a3234186SStefano Zampini Matrix A only has nonzeros in the diagonal block, which is of size 3x3. 28c4762a1bSJed Brown We do three successive assemblies on A. The first two have the same non-zero 29c4762a1bSJed Brown pattern but different values, and the third breaks the non-zero pattern. The 30c4762a1bSJed Brown first two assemblies have enough zero-rows that triggers compressed-row storage 31c4762a1bSJed Brown in MATAIJ and MATAIJCUSPARSE. 32c4762a1bSJed Brown 33c4762a1bSJed Brown These settings are used to test memory management and correctness in MatMult 34c4762a1bSJed Brown and MatMultTransposeAdd. 35c4762a1bSJed Brown */ 36c4762a1bSJed Brown 37c4762a1bSJed Brown for (k=0; k<3; k++) { /* Three assemblies */ 38c4762a1bSJed Brown vstart = (size*k + rank)*n*n+1; 39c4762a1bSJed Brown margin = (k == 2)? 0 : 2; /* Create two zero-rows in the first two assemblies */ 40c4762a1bSJed Brown for (i=rstart; i<rend-margin; i++) { 41c4762a1bSJed Brown for (j=rstart; j<rend; j++) { 42*9566063dSJacob Faibussowitsch PetscCall(MatSetValue(A,i,j,(PetscScalar)vstart,INSERT_VALUES)); 43c4762a1bSJed Brown vstart++; 44c4762a1bSJed Brown } 45c4762a1bSJed Brown } 46*9566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 47*9566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 48*9566063dSJacob Faibussowitsch PetscCall(MatMult(A,x,y)); 49*9566063dSJacob Faibussowitsch PetscCall(MatMultTransposeAdd(A,x,y,y)); /* y[i] = sum of row i and column i of A */ 50*9566063dSJacob Faibussowitsch PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 51c4762a1bSJed Brown } 52c4762a1bSJed Brown 53*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 54*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 55*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 56*9566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 57c4762a1bSJed Brown 58c4762a1bSJed Brown /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */ 59c4762a1bSJed Brown /*cudaDeviceReset();*/ 60b122ec5aSJacob Faibussowitsch return 0; 61c4762a1bSJed Brown } 62c4762a1bSJed Brown 63c4762a1bSJed Brown /*TEST 64c4762a1bSJed Brown 65c4762a1bSJed Brown testset: 66c4762a1bSJed Brown nsize: 2 67c4762a1bSJed Brown output_file: output/ex236_1.out 68c4762a1bSJed Brown filter: grep -v type 69c4762a1bSJed Brown 70c4762a1bSJed Brown test: 71c4762a1bSJed Brown args: -mat_type aij 72c4762a1bSJed Brown 73c4762a1bSJed Brown test: 74c4762a1bSJed Brown requires: cuda 75c4762a1bSJed Brown suffix: cuda 76c4762a1bSJed Brown args: -mat_type aijcusparse 77c4762a1bSJed Brown TEST*/ 78