1*c4762a1bSJed Brown 2*c4762a1bSJed Brown static char help[] = "Tests MATSEQDENSECUDA\n\n"; 3*c4762a1bSJed Brown 4*c4762a1bSJed Brown #include <petscmat.h> 5*c4762a1bSJed Brown 6*c4762a1bSJed Brown int main(int argc,char **argv) 7*c4762a1bSJed Brown { 8*c4762a1bSJed Brown Mat A,AC,B; 9*c4762a1bSJed Brown PetscErrorCode ierr; 10*c4762a1bSJed Brown PetscInt m = 10,n = 10; 11*c4762a1bSJed Brown PetscReal r,tol = 10*PETSC_SMALL; 12*c4762a1bSJed Brown 13*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr; 14*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); 15*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 16*c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 17*c4762a1bSJed Brown ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr); 18*c4762a1bSJed Brown ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 19*c4762a1bSJed Brown ierr = MatSetFromOptions(A);CHKERRQ(ierr); 20*c4762a1bSJed Brown ierr = MatSeqDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 21*c4762a1bSJed Brown ierr = MatSetRandom(A,NULL);CHKERRQ(ierr); 22*c4762a1bSJed Brown 23*c4762a1bSJed Brown /* Create a CUDA version of A */ 24*c4762a1bSJed Brown ierr = MatConvert(A,MATSEQDENSECUDA,MAT_INITIAL_MATRIX,&AC);CHKERRQ(ierr); 25*c4762a1bSJed Brown ierr = MatDuplicate(AC,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 26*c4762a1bSJed Brown 27*c4762a1bSJed Brown /* full CUDA AXPY */ 28*c4762a1bSJed Brown ierr = MatAXPY(B,-1.0,AC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 29*c4762a1bSJed Brown ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr); 30*c4762a1bSJed Brown if (r != 0.0) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY %g",(double)r); 31*c4762a1bSJed Brown 32*c4762a1bSJed Brown /* test Copy */ 33*c4762a1bSJed Brown ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 34*c4762a1bSJed Brown 35*c4762a1bSJed Brown /* call MatAXPY_Basic since B is CUDA, A is CPU, */ 36*c4762a1bSJed Brown ierr = MatAXPY(B,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 37*c4762a1bSJed Brown ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr); 38*c4762a1bSJed Brown if (r != 0.0) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY_Basic %g",(double)r); 39*c4762a1bSJed Brown 40*c4762a1bSJed Brown if (m == n) { 41*c4762a1bSJed Brown Mat B1,B2; 42*c4762a1bSJed Brown 43*c4762a1bSJed Brown ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 44*c4762a1bSJed Brown /* full CUDA PtAP */ 45*c4762a1bSJed Brown ierr = MatPtAP(B,AC,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr); 46*c4762a1bSJed Brown /* CPU PtAP since A is on the CPU only */ 47*c4762a1bSJed Brown ierr = MatPtAP(B,A,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr); 48*c4762a1bSJed Brown ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 49*c4762a1bSJed Brown ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr); 50*c4762a1bSJed Brown if (r > tol) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 51*c4762a1bSJed Brown 52*c4762a1bSJed Brown /* test reuse */ 53*c4762a1bSJed Brown ierr = MatPtAP(B,AC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr); 54*c4762a1bSJed Brown ierr = MatPtAP(B,A,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr); 55*c4762a1bSJed Brown ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 56*c4762a1bSJed Brown ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr); 57*c4762a1bSJed Brown if (r > tol) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 58*c4762a1bSJed Brown 59*c4762a1bSJed Brown ierr = MatDestroy(&B1);CHKERRQ(ierr); 60*c4762a1bSJed Brown ierr = MatDestroy(&B2);CHKERRQ(ierr); 61*c4762a1bSJed Brown } 62*c4762a1bSJed Brown 63*c4762a1bSJed Brown ierr = MatDestroy(&B);CHKERRQ(ierr); 64*c4762a1bSJed Brown ierr = MatDestroy(&AC);CHKERRQ(ierr); 65*c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 66*c4762a1bSJed Brown ierr = PetscFinalize(); 67*c4762a1bSJed Brown return ierr; 68*c4762a1bSJed Brown } 69*c4762a1bSJed Brown 70*c4762a1bSJed Brown /*TEST 71*c4762a1bSJed Brown 72*c4762a1bSJed Brown build: 73*c4762a1bSJed Brown requires: cuda 74*c4762a1bSJed Brown 75*c4762a1bSJed Brown test: 76*c4762a1bSJed Brown output_file: output/ex32_1.out 77*c4762a1bSJed Brown args: -m {{3 5 12}} -n {{3 5 12}} 78*c4762a1bSJed Brown suffix: seqdensecuda 79*c4762a1bSJed Brown 80*c4762a1bSJed Brown TEST*/ 81