1c4762a1bSJed Brown 2c4762a1bSJed Brown static char help[] = "Tests MATSEQDENSECUDA\n\n"; 3c4762a1bSJed Brown 4c4762a1bSJed Brown #include <petscmat.h> 5c4762a1bSJed Brown 6c4762a1bSJed Brown int main(int argc,char **argv) 7c4762a1bSJed Brown { 8c4762a1bSJed Brown Mat A,AC,B; 9c4762a1bSJed Brown PetscInt m = 10,n = 10; 10c4762a1bSJed Brown PetscReal r,tol = 10*PETSC_SMALL; 11c4762a1bSJed Brown 12*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,(char*) 0,help)); 135f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 145f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 155f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PETSC_COMM_SELF,&A)); 165f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n)); 175f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(A,MATSEQDENSE)); 185f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetFromOptions(A)); 195f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqDenseSetPreallocation(A,NULL)); 205f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetRandom(A,NULL)); 21c20d7725SJed Brown #if 0 22c20d7725SJed Brown PetscInt i,j; 23c20d7725SJed Brown PetscScalar val; 24c20d7725SJed Brown for (i=0; i<m; i++) { 25c20d7725SJed Brown for (j=0; j<n; j++) { 26c20d7725SJed Brown val = (PetscScalar)(i+j); 275f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES)); 28c20d7725SJed Brown } 29c20d7725SJed Brown } 305f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 315f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 32c20d7725SJed Brown #endif 33c4762a1bSJed Brown 34c4762a1bSJed Brown /* Create a CUDA version of A */ 35c20d7725SJed Brown #if defined(PETSC_HAVE_CUDA) 365f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(A,MATSEQDENSECUDA,MAT_INITIAL_MATRIX,&AC)); 37c20d7725SJed Brown #else 385f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&AC)); 39c20d7725SJed Brown #endif 405f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(AC,MAT_COPY_VALUES,&B)); 41c4762a1bSJed Brown 42c4762a1bSJed Brown /* full CUDA AXPY */ 435f80ce2aSJacob Faibussowitsch CHKERRQ(MatAXPY(B,-1.0,AC,SAME_NONZERO_PATTERN)); 445f80ce2aSJacob Faibussowitsch CHKERRQ(MatNorm(B,NORM_INFINITY,&r)); 452c71b3e2SJacob Faibussowitsch PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY %g",(double)r); 46c4762a1bSJed Brown 47c4762a1bSJed Brown /* test Copy */ 485f80ce2aSJacob Faibussowitsch CHKERRQ(MatCopy(AC,B,SAME_NONZERO_PATTERN)); 49c4762a1bSJed Brown 50c4762a1bSJed Brown /* call MatAXPY_Basic since B is CUDA, A is CPU, */ 515f80ce2aSJacob Faibussowitsch CHKERRQ(MatAXPY(B,-1.0,A,SAME_NONZERO_PATTERN)); 525f80ce2aSJacob Faibussowitsch CHKERRQ(MatNorm(B,NORM_INFINITY,&r)); 532c71b3e2SJacob Faibussowitsch PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY_Basic %g",(double)r); 54c4762a1bSJed Brown 55c4762a1bSJed Brown if (m == n) { 56c4762a1bSJed Brown Mat B1,B2; 57c4762a1bSJed Brown 585f80ce2aSJacob Faibussowitsch CHKERRQ(MatCopy(AC,B,SAME_NONZERO_PATTERN)); 59c4762a1bSJed Brown /* full CUDA PtAP */ 605f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(B,AC,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B1)); 61c20d7725SJed Brown 62c4762a1bSJed Brown /* CPU PtAP since A is on the CPU only */ 635f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(B,A,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B2)); 64c20d7725SJed Brown 655f80ce2aSJacob Faibussowitsch CHKERRQ(MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN)); 665f80ce2aSJacob Faibussowitsch CHKERRQ(MatNorm(B2,NORM_INFINITY,&r)); 672c71b3e2SJacob Faibussowitsch PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 68c4762a1bSJed Brown 69c4762a1bSJed Brown /* test reuse */ 705f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(B,AC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B1)); 715f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(B,A,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B2)); 725f80ce2aSJacob Faibussowitsch CHKERRQ(MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN)); 735f80ce2aSJacob Faibussowitsch CHKERRQ(MatNorm(B2,NORM_INFINITY,&r)); 742c71b3e2SJacob Faibussowitsch PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 75c4762a1bSJed Brown 765f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&B1)); 775f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&B2)); 78c4762a1bSJed Brown } 79c4762a1bSJed Brown 805f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&B)); 815f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&AC)); 825f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 83*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 84*b122ec5aSJacob Faibussowitsch return 0; 85c4762a1bSJed Brown } 86c4762a1bSJed Brown 87c4762a1bSJed Brown /*TEST 88c4762a1bSJed Brown 89c4762a1bSJed Brown build: 90c4762a1bSJed Brown requires: cuda 91c4762a1bSJed Brown 92c4762a1bSJed Brown test: 93c4762a1bSJed Brown output_file: output/ex32_1.out 94c4762a1bSJed Brown args: -m {{3 5 12}} -n {{3 5 12}} 95c4762a1bSJed Brown suffix: seqdensecuda 96c4762a1bSJed Brown 97c4762a1bSJed Brown TEST*/ 98