xref: /petsc/src/mat/tests/ex236.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n";
2*c4762a1bSJed Brown 
3*c4762a1bSJed Brown #include <petscmat.h>
4*c4762a1bSJed Brown 
5*c4762a1bSJed Brown int main(int argc,char **argv)
6*c4762a1bSJed Brown {
7*c4762a1bSJed Brown   PetscErrorCode ierr;
8*c4762a1bSJed Brown   PetscMPIInt    rank,size;
9*c4762a1bSJed Brown   Mat            A;
10*c4762a1bSJed Brown   PetscInt       i,j,k,n=3,vstart,rstart,rend,margin;
11*c4762a1bSJed Brown   Vec            x,y;
12*c4762a1bSJed Brown 
13*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
14*c4762a1bSJed Brown   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
15*c4762a1bSJed Brown   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
16*c4762a1bSJed Brown 
17*c4762a1bSJed Brown   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
18*c4762a1bSJed Brown   ierr = MatSetSizes(A,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
19*c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
20*c4762a1bSJed Brown 
21*c4762a1bSJed Brown   ierr = MatMPIAIJSetPreallocation(A,n,NULL,0,NULL);CHKERRQ(ierr);
22*c4762a1bSJed Brown   ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
23*c4762a1bSJed Brown   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
24*c4762a1bSJed Brown   ierr = MatCreateVecs(A,&x,&y);CHKERRQ(ierr);
25*c4762a1bSJed Brown   ierr = VecSet(x,1.0);CHKERRQ(ierr);
26*c4762a1bSJed Brown 
27*c4762a1bSJed Brown   /*
28*c4762a1bSJed Brown     Matirx A only has nonzeros in the diagonal block, which is of size 3x3.
29*c4762a1bSJed Brown     We do three successive assemblies on A. The first two have the same non-zero
30*c4762a1bSJed Brown     pattern but different values, and the third breaks the non-zero pattern. The
31*c4762a1bSJed Brown     first two assemblies have enough zero-rows that triggers compressed-row storage
32*c4762a1bSJed Brown     in MATAIJ and MATAIJCUSPARSE.
33*c4762a1bSJed Brown 
34*c4762a1bSJed Brown     These settings are used to test memory management and correctness in MatMult
35*c4762a1bSJed Brown     and MatMultTransposeAdd.
36*c4762a1bSJed Brown   */
37*c4762a1bSJed Brown 
38*c4762a1bSJed Brown   for (k=0; k<3; k++) { /* Three assemblies */
39*c4762a1bSJed Brown     vstart = (size*k + rank)*n*n+1;
40*c4762a1bSJed Brown     margin = (k == 2)? 0 : 2; /* Create two zero-rows in the first two assemblies */
41*c4762a1bSJed Brown     for (i=rstart; i<rend-margin; i++) {
42*c4762a1bSJed Brown       for (j=rstart; j<rend; j++) {
43*c4762a1bSJed Brown         ierr = MatSetValue(A,i,j,(PetscScalar)vstart,INSERT_VALUES);CHKERRQ(ierr);
44*c4762a1bSJed Brown         vstart++;
45*c4762a1bSJed Brown       }
46*c4762a1bSJed Brown     }
47*c4762a1bSJed Brown     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
48*c4762a1bSJed Brown     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
49*c4762a1bSJed Brown     ierr = MatMult(A,x,y);CHKERRQ(ierr);
50*c4762a1bSJed Brown     ierr = MatMultTransposeAdd(A,x,y,y);CHKERRQ(ierr); /* y[i] = sum of row i and column i of A */
51*c4762a1bSJed Brown     ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
52*c4762a1bSJed Brown   }
53*c4762a1bSJed Brown 
54*c4762a1bSJed Brown   ierr = MatDestroy(&A);CHKERRQ(ierr);
55*c4762a1bSJed Brown   ierr = VecDestroy(&x);CHKERRQ(ierr);
56*c4762a1bSJed Brown   ierr = VecDestroy(&y);CHKERRQ(ierr);
57*c4762a1bSJed Brown   ierr = PetscFinalize();
58*c4762a1bSJed Brown 
59*c4762a1bSJed Brown   /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */
60*c4762a1bSJed Brown   /*cudaDeviceReset();*/
61*c4762a1bSJed Brown   return ierr;
62*c4762a1bSJed Brown }
63*c4762a1bSJed Brown 
64*c4762a1bSJed Brown /*TEST
65*c4762a1bSJed Brown 
66*c4762a1bSJed Brown    testset:
67*c4762a1bSJed Brown      nsize: 2
68*c4762a1bSJed Brown      output_file: output/ex236_1.out
69*c4762a1bSJed Brown      filter: grep -v type
70*c4762a1bSJed Brown 
71*c4762a1bSJed Brown      test:
72*c4762a1bSJed Brown        args: -mat_type aij
73*c4762a1bSJed Brown 
74*c4762a1bSJed Brown      test:
75*c4762a1bSJed Brown        requires: cuda
76*c4762a1bSJed Brown        suffix: cuda
77*c4762a1bSJed Brown        args: -mat_type aijcusparse
78*c4762a1bSJed Brown TEST*/
79