xref: /petsc/src/mat/tests/ex236.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1c4762a1bSJed Brown static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n";
2c4762a1bSJed Brown 
3c4762a1bSJed Brown #include <petscmat.h>
4c4762a1bSJed Brown 
5c4762a1bSJed Brown int main(int argc,char **argv)
6c4762a1bSJed Brown {
7c4762a1bSJed Brown   PetscErrorCode ierr;
8c4762a1bSJed Brown   PetscMPIInt    rank,size;
9c4762a1bSJed Brown   Mat            A;
10c4762a1bSJed Brown   PetscInt       i,j,k,n=3,vstart,rstart,rend,margin;
11c4762a1bSJed Brown   Vec            x,y;
12c4762a1bSJed Brown 
13c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
14*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank));
15*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
16c4762a1bSJed Brown 
17*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A));
18*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(A,n,n,PETSC_DECIDE,PETSC_DECIDE));
19*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetFromOptions(A));
20c4762a1bSJed Brown 
21*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMPIAIJSetPreallocation(A,n,NULL,0,NULL));
22*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
23*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend));
24*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreateVecs(A,&x,&y));
25*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecSet(x,1.0));
26c4762a1bSJed Brown 
27c4762a1bSJed Brown   /*
28a3234186SStefano Zampini     Matrix A only has nonzeros in the diagonal block, which is of size 3x3.
29c4762a1bSJed Brown     We do three successive assemblies on A. The first two have the same non-zero
30c4762a1bSJed Brown     pattern but different values, and the third breaks the non-zero pattern. The
31c4762a1bSJed Brown     first two assemblies have enough zero-rows that triggers compressed-row storage
32c4762a1bSJed Brown     in MATAIJ and MATAIJCUSPARSE.
33c4762a1bSJed Brown 
34c4762a1bSJed Brown     These settings are used to test memory management and correctness in MatMult
35c4762a1bSJed Brown     and MatMultTransposeAdd.
36c4762a1bSJed Brown   */
37c4762a1bSJed Brown 
38c4762a1bSJed Brown   for (k=0; k<3; k++) { /* Three assemblies */
39c4762a1bSJed Brown     vstart = (size*k + rank)*n*n+1;
40c4762a1bSJed Brown     margin = (k == 2)? 0 : 2; /* Create two zero-rows in the first two assemblies */
41c4762a1bSJed Brown     for (i=rstart; i<rend-margin; i++) {
42c4762a1bSJed Brown       for (j=rstart; j<rend; j++) {
43*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSetValue(A,i,j,(PetscScalar)vstart,INSERT_VALUES));
44c4762a1bSJed Brown         vstart++;
45c4762a1bSJed Brown       }
46c4762a1bSJed Brown     }
47*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
48*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
49*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMult(A,x,y));
50*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMultTransposeAdd(A,x,y,y)); /* y[i] = sum of row i and column i of A */
51*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD));
52c4762a1bSJed Brown   }
53c4762a1bSJed Brown 
54*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A));
55*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&x));
56*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&y));
57c4762a1bSJed Brown   ierr = PetscFinalize();
58c4762a1bSJed Brown 
59c4762a1bSJed Brown   /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */
60c4762a1bSJed Brown   /*cudaDeviceReset();*/
61c4762a1bSJed Brown   return ierr;
62c4762a1bSJed Brown }
63c4762a1bSJed Brown 
64c4762a1bSJed Brown /*TEST
65c4762a1bSJed Brown 
66c4762a1bSJed Brown    testset:
67c4762a1bSJed Brown      nsize: 2
68c4762a1bSJed Brown      output_file: output/ex236_1.out
69c4762a1bSJed Brown      filter: grep -v type
70c4762a1bSJed Brown 
71c4762a1bSJed Brown      test:
72c4762a1bSJed Brown        args: -mat_type aij
73c4762a1bSJed Brown 
74c4762a1bSJed Brown      test:
75c4762a1bSJed Brown        requires: cuda
76c4762a1bSJed Brown        suffix: cuda
77c4762a1bSJed Brown        args: -mat_type aijcusparse
78c4762a1bSJed Brown TEST*/
79