xref: /petsc/src/mat/tests/ex32.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown 
2*c4762a1bSJed Brown static char help[] = "Tests MATSEQDENSECUDA\n\n";
3*c4762a1bSJed Brown 
4*c4762a1bSJed Brown #include <petscmat.h>
5*c4762a1bSJed Brown 
6*c4762a1bSJed Brown int main(int argc,char **argv)
7*c4762a1bSJed Brown {
8*c4762a1bSJed Brown   Mat            A,AC,B;
9*c4762a1bSJed Brown   PetscErrorCode ierr;
10*c4762a1bSJed Brown   PetscInt       m = 10,n = 10;
11*c4762a1bSJed Brown   PetscReal      r,tol = 10*PETSC_SMALL;
12*c4762a1bSJed Brown 
13*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
14*c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr);
15*c4762a1bSJed Brown   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
16*c4762a1bSJed Brown   ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr);
17*c4762a1bSJed Brown   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr);
18*c4762a1bSJed Brown   ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
19*c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
20*c4762a1bSJed Brown   ierr = MatSeqDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
21*c4762a1bSJed Brown   ierr = MatSetRandom(A,NULL);CHKERRQ(ierr);
22*c4762a1bSJed Brown 
23*c4762a1bSJed Brown   /* Create a CUDA version of A */
24*c4762a1bSJed Brown   ierr = MatConvert(A,MATSEQDENSECUDA,MAT_INITIAL_MATRIX,&AC);CHKERRQ(ierr);
25*c4762a1bSJed Brown   ierr = MatDuplicate(AC,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
26*c4762a1bSJed Brown 
27*c4762a1bSJed Brown   /* full CUDA AXPY */
28*c4762a1bSJed Brown   ierr = MatAXPY(B,-1.0,AC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
29*c4762a1bSJed Brown   ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr);
30*c4762a1bSJed Brown   if (r != 0.0) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY %g",(double)r);
31*c4762a1bSJed Brown 
32*c4762a1bSJed Brown   /* test Copy */
33*c4762a1bSJed Brown   ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
34*c4762a1bSJed Brown 
35*c4762a1bSJed Brown   /* call MatAXPY_Basic since B is CUDA, A is CPU,  */
36*c4762a1bSJed Brown   ierr = MatAXPY(B,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
37*c4762a1bSJed Brown   ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr);
38*c4762a1bSJed Brown   if (r != 0.0) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY_Basic %g",(double)r);
39*c4762a1bSJed Brown 
40*c4762a1bSJed Brown   if (m == n) {
41*c4762a1bSJed Brown     Mat B1,B2;
42*c4762a1bSJed Brown 
43*c4762a1bSJed Brown     ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
44*c4762a1bSJed Brown     /* full CUDA PtAP */
45*c4762a1bSJed Brown     ierr = MatPtAP(B,AC,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr);
46*c4762a1bSJed Brown     /* CPU PtAP since A is on the CPU only */
47*c4762a1bSJed Brown     ierr = MatPtAP(B,A,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr);
48*c4762a1bSJed Brown     ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
49*c4762a1bSJed Brown     ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr);
50*c4762a1bSJed Brown     if (r > tol) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r);
51*c4762a1bSJed Brown 
52*c4762a1bSJed Brown     /* test reuse */
53*c4762a1bSJed Brown     ierr = MatPtAP(B,AC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr);
54*c4762a1bSJed Brown     ierr = MatPtAP(B,A,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr);
55*c4762a1bSJed Brown     ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
56*c4762a1bSJed Brown     ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr);
57*c4762a1bSJed Brown     if (r > tol) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r);
58*c4762a1bSJed Brown 
59*c4762a1bSJed Brown     ierr = MatDestroy(&B1);CHKERRQ(ierr);
60*c4762a1bSJed Brown     ierr = MatDestroy(&B2);CHKERRQ(ierr);
61*c4762a1bSJed Brown   }
62*c4762a1bSJed Brown 
63*c4762a1bSJed Brown   ierr = MatDestroy(&B);CHKERRQ(ierr);
64*c4762a1bSJed Brown   ierr = MatDestroy(&AC);CHKERRQ(ierr);
65*c4762a1bSJed Brown   ierr = MatDestroy(&A);CHKERRQ(ierr);
66*c4762a1bSJed Brown   ierr = PetscFinalize();
67*c4762a1bSJed Brown   return ierr;
68*c4762a1bSJed Brown }
69*c4762a1bSJed Brown 
70*c4762a1bSJed Brown /*TEST
71*c4762a1bSJed Brown 
72*c4762a1bSJed Brown    build:
73*c4762a1bSJed Brown      requires: cuda
74*c4762a1bSJed Brown 
75*c4762a1bSJed Brown    test:
76*c4762a1bSJed Brown      output_file: output/ex32_1.out
77*c4762a1bSJed Brown      args: -m {{3 5 12}} -n {{3 5 12}}
78*c4762a1bSJed Brown      suffix: seqdensecuda
79*c4762a1bSJed Brown 
80*c4762a1bSJed Brown TEST*/
81