1c4762a1bSJed Brown 2c4762a1bSJed Brown static char help[] = "Tests MATSEQDENSECUDA\n\n"; 3c4762a1bSJed Brown 4c4762a1bSJed Brown #include <petscmat.h> 5c4762a1bSJed Brown 6c4762a1bSJed Brown int main(int argc,char **argv) 7c4762a1bSJed Brown { 8c4762a1bSJed Brown Mat A,AC,B; 9c4762a1bSJed Brown PetscErrorCode ierr; 10c4762a1bSJed Brown PetscInt m = 10,n = 10; 11c4762a1bSJed Brown PetscReal r,tol = 10*PETSC_SMALL; 12c4762a1bSJed Brown 13c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr; 14c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); 15c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 16c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); 17c4762a1bSJed Brown ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr); 18c4762a1bSJed Brown ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 19c4762a1bSJed Brown ierr = MatSetFromOptions(A);CHKERRQ(ierr); 20c4762a1bSJed Brown ierr = MatSeqDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 21c4762a1bSJed Brown ierr = MatSetRandom(A,NULL);CHKERRQ(ierr); 22c20d7725SJed Brown #if 0 23c20d7725SJed Brown PetscInt i,j; 24c20d7725SJed Brown PetscScalar val; 25c20d7725SJed Brown for (i=0; i<m; i++) { 26c20d7725SJed Brown for (j=0; j<n; j++) { 27c20d7725SJed Brown val = (PetscScalar)(i+j); 28c20d7725SJed Brown ierr = MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES);CHKERRQ(ierr); 29c20d7725SJed Brown } 30c20d7725SJed Brown } 31c20d7725SJed Brown ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32c20d7725SJed Brown ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33c20d7725SJed Brown #endif 34c4762a1bSJed Brown 35c4762a1bSJed Brown /* Create a CUDA version of A */ 36c20d7725SJed Brown #if defined(PETSC_HAVE_CUDA) 37c4762a1bSJed Brown ierr = MatConvert(A,MATSEQDENSECUDA,MAT_INITIAL_MATRIX,&AC);CHKERRQ(ierr); 38c20d7725SJed Brown #else 39c20d7725SJed Brown ierr = MatDuplicate(A,MAT_COPY_VALUES,&AC);CHKERRQ(ierr); 40c20d7725SJed Brown #endif 41c4762a1bSJed Brown ierr = MatDuplicate(AC,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 42c4762a1bSJed Brown 43c4762a1bSJed Brown /* full CUDA AXPY */ 44c4762a1bSJed Brown ierr = MatAXPY(B,-1.0,AC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 45c4762a1bSJed Brown ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr); 46*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY %g",(double)r); 47c4762a1bSJed Brown 48c4762a1bSJed Brown /* test Copy */ 49c4762a1bSJed Brown ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 50c4762a1bSJed Brown 51c4762a1bSJed Brown /* call MatAXPY_Basic since B is CUDA, A is CPU, */ 52c4762a1bSJed Brown ierr = MatAXPY(B,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 53c4762a1bSJed Brown ierr = MatNorm(B,NORM_INFINITY,&r);CHKERRQ(ierr); 54*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY_Basic %g",(double)r); 55c4762a1bSJed Brown 56c4762a1bSJed Brown if (m == n) { 57c4762a1bSJed Brown Mat B1,B2; 58c4762a1bSJed Brown 59c4762a1bSJed Brown ierr = MatCopy(AC,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 60c4762a1bSJed Brown /* full CUDA PtAP */ 61c4762a1bSJed Brown ierr = MatPtAP(B,AC,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr); 62c20d7725SJed Brown 63c4762a1bSJed Brown /* CPU PtAP since A is on the CPU only */ 64c4762a1bSJed Brown ierr = MatPtAP(B,A,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr); 65c20d7725SJed Brown 66c4762a1bSJed Brown ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 67c4762a1bSJed Brown ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr); 68*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 69c4762a1bSJed Brown 70c4762a1bSJed Brown /* test reuse */ 71c4762a1bSJed Brown ierr = MatPtAP(B,AC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B1);CHKERRQ(ierr); 72c4762a1bSJed Brown ierr = MatPtAP(B,A,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B2);CHKERRQ(ierr); 73c4762a1bSJed Brown ierr = MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 74c4762a1bSJed Brown ierr = MatNorm(B2,NORM_INFINITY,&r);CHKERRQ(ierr); 75*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 76c4762a1bSJed Brown 77c4762a1bSJed Brown ierr = MatDestroy(&B1);CHKERRQ(ierr); 78c4762a1bSJed Brown ierr = MatDestroy(&B2);CHKERRQ(ierr); 79c4762a1bSJed Brown } 80c4762a1bSJed Brown 81c4762a1bSJed Brown ierr = MatDestroy(&B);CHKERRQ(ierr); 82c4762a1bSJed Brown ierr = MatDestroy(&AC);CHKERRQ(ierr); 83c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 84c4762a1bSJed Brown ierr = PetscFinalize(); 85c4762a1bSJed Brown return ierr; 86c4762a1bSJed Brown } 87c4762a1bSJed Brown 88c4762a1bSJed Brown /*TEST 89c4762a1bSJed Brown 90c4762a1bSJed Brown build: 91c4762a1bSJed Brown requires: cuda 92c4762a1bSJed Brown 93c4762a1bSJed Brown test: 94c4762a1bSJed Brown output_file: output/ex32_1.out 95c4762a1bSJed Brown args: -m {{3 5 12}} -n {{3 5 12}} 96c4762a1bSJed Brown suffix: seqdensecuda 97c4762a1bSJed Brown 98c4762a1bSJed Brown TEST*/ 99