xref: /petsc/src/mat/tests/ex32.c (revision 2c71b3e237ead271e4f3aa1505f92bf476e3413d)
1c4762a1bSJed Brown 
2c4762a1bSJed Brown static char help[] = "Tests MATSEQDENSECUDA\n\n";
3c4762a1bSJed Brown 
4c4762a1bSJed Brown #include <petscmat.h>
5c4762a1bSJed Brown 
6c4762a1bSJed Brown int main(int argc,char **argv)
7c4762a1bSJed Brown {
8c4762a1bSJed Brown   Mat            A,AC,B;
9c4762a1bSJed Brown   PetscErrorCode ierr;
10c4762a1bSJed Brown   PetscInt       m = 10,n = 10;
11c4762a1bSJed Brown   PetscReal      r,tol = 10*PETSC_SMALL;
12c4762a1bSJed Brown 
13c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
14c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr);
15c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
16c4762a1bSJed Brown   ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr);
17c4762a1bSJed Brown   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr);
18c4762a1bSJed Brown   ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
19c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
20c4762a1bSJed Brown   ierr = MatSeqDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
21c4762a1bSJed Brown   ierr = MatSetRandom(A,NULL);CHKERRQ(ierr);
22c20d7725SJed Brown #if 0
23c20d7725SJed Brown   PetscInt       i,j;
24c20d7725SJed Brown   PetscScalar    val;
25c20d7725SJed Brown   for (i=0; i<m; i++) {
26c20d7725SJed Brown     for (j=0; j<n; j++) {
27c20d7725SJed Brown       val = (PetscScalar)(i+j);
28c20d7725SJed Brown       ierr = MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES);CHKERRQ(ierr);
29c20d7725SJed Brown     }
30c20d7725SJed Brown   }
31c20d7725SJed Brown   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32c20d7725SJed Brown   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33c20d7725SJed Brown #endif
34c4762a1bSJed Brown 
35c4762a1bSJed Brown   /* Create a CUDA version of A */
36c20d7725SJed Brown #if defined(PETSC_HAVE_CUDA)
37c4762a1bSJed Brown   ierr = MatConvert(A,MATSEQDENSECUDA,MAT_INITIAL_MATRIX,&AC);CHKERRQ(ierr);
38c20d7725SJed Brown #else
39c20d7725SJed Brown   ierr = MatDuplicate(A,MAT_COPY_VALUES,&AC);CHKERRQ(ierr);
40c20d7725SJed Brown #endif
41c4762a1bSJed Brown   ierr = MatDuplicate(AC,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
42c4762a1bSJed Brown 
43c4762a1bSJed Brown   /* full CUDA AXPY */
44c4762a1bSJed Brown   ierr = MatAXPY(B,-1.0,AC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
45c4762a1bSJed Brown   ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr);
46*2c71b3e2SJacob Faibussowitsch   PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY %g",(double)r);
47c4762a1bSJed Brown 
48c4762a1bSJed Brown   /* test Copy */
49c4762a1bSJed Brown   ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
50c4762a1bSJed Brown 
51c4762a1bSJed Brown   /* call MatAXPY_Basic since B is CUDA, A is CPU,  */
52c4762a1bSJed Brown   ierr = MatAXPY(B,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
53c4762a1bSJed Brown   ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr);
54*2c71b3e2SJacob Faibussowitsch   PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY_Basic %g",(double)r);
55c4762a1bSJed Brown 
56c4762a1bSJed Brown   if (m == n) {
57c4762a1bSJed Brown     Mat B1,B2;
58c4762a1bSJed Brown 
59c4762a1bSJed Brown     ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
60c4762a1bSJed Brown     /* full CUDA PtAP */
61c4762a1bSJed Brown     ierr = MatPtAP(B,AC,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr);
62c20d7725SJed Brown 
63c4762a1bSJed Brown     /* CPU PtAP since A is on the CPU only */
64c4762a1bSJed Brown     ierr = MatPtAP(B,A,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr);
65c20d7725SJed Brown 
66c4762a1bSJed Brown     ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
67c4762a1bSJed Brown     ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr);
68*2c71b3e2SJacob Faibussowitsch     PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r);
69c4762a1bSJed Brown 
70c4762a1bSJed Brown     /* test reuse */
71c4762a1bSJed Brown     ierr = MatPtAP(B,AC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr);
72c4762a1bSJed Brown     ierr = MatPtAP(B,A,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr);
73c4762a1bSJed Brown     ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
74c4762a1bSJed Brown     ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr);
75*2c71b3e2SJacob Faibussowitsch     PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r);
76c4762a1bSJed Brown 
77c4762a1bSJed Brown     ierr = MatDestroy(&B1);CHKERRQ(ierr);
78c4762a1bSJed Brown     ierr = MatDestroy(&B2);CHKERRQ(ierr);
79c4762a1bSJed Brown   }
80c4762a1bSJed Brown 
81c4762a1bSJed Brown   ierr = MatDestroy(&B);CHKERRQ(ierr);
82c4762a1bSJed Brown   ierr = MatDestroy(&AC);CHKERRQ(ierr);
83c4762a1bSJed Brown   ierr = MatDestroy(&A);CHKERRQ(ierr);
84c4762a1bSJed Brown   ierr = PetscFinalize();
85c4762a1bSJed Brown   return ierr;
86c4762a1bSJed Brown }
87c4762a1bSJed Brown 
88c4762a1bSJed Brown /*TEST
89c4762a1bSJed Brown 
90c4762a1bSJed Brown    build:
91c4762a1bSJed Brown      requires: cuda
92c4762a1bSJed Brown 
93c4762a1bSJed Brown    test:
94c4762a1bSJed Brown      output_file: output/ex32_1.out
95c4762a1bSJed Brown      args: -m {{3 5 12}} -n {{3 5 12}}
96c4762a1bSJed Brown      suffix: seqdensecuda
97c4762a1bSJed Brown 
98c4762a1bSJed Brown TEST*/
99