1 2 static char help[] = "Tests MATSEQDENSECUDA\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc,char **argv) 7 { 8 Mat A,AC,B; 9 PetscInt m = 10,n = 10; 10 PetscReal r,tol = 10*PETSC_SMALL; 11 12 CHKERRQ(PetscInitialize(&argc,&argv,(char*) 0,help)); 13 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 14 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 15 CHKERRQ(MatCreate(PETSC_COMM_SELF,&A)); 16 CHKERRQ(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n)); 17 CHKERRQ(MatSetType(A,MATSEQDENSE)); 18 CHKERRQ(MatSetFromOptions(A)); 19 CHKERRQ(MatSeqDenseSetPreallocation(A,NULL)); 20 CHKERRQ(MatSetRandom(A,NULL)); 21 #if 0 22 PetscInt i,j; 23 PetscScalar val; 24 for (i=0; i<m; i++) { 25 for (j=0; j<n; j++) { 26 val = (PetscScalar)(i+j); 27 CHKERRQ(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES)); 28 } 29 } 30 CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 31 CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 32 #endif 33 34 /* Create a CUDA version of A */ 35 #if defined(PETSC_HAVE_CUDA) 36 CHKERRQ(MatConvert(A,MATSEQDENSECUDA,MAT_INITIAL_MATRIX,&AC)); 37 #else 38 CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&AC)); 39 #endif 40 CHKERRQ(MatDuplicate(AC,MAT_COPY_VALUES,&B)); 41 42 /* full CUDA AXPY */ 43 CHKERRQ(MatAXPY(B,-1.0,AC,SAME_NONZERO_PATTERN)); 44 CHKERRQ(MatNorm(B,NORM_INFINITY,&r)); 45 PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY %g",(double)r); 46 47 /* test Copy */ 48 CHKERRQ(MatCopy(AC,B,SAME_NONZERO_PATTERN)); 49 50 /* call MatAXPY_Basic since B is CUDA, A is CPU, */ 51 CHKERRQ(MatAXPY(B,-1.0,A,SAME_NONZERO_PATTERN)); 52 CHKERRQ(MatNorm(B,NORM_INFINITY,&r)); 53 PetscCheckFalse(r != 0.0,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatDuplicate + MatCopy + MatAXPY_Basic %g",(double)r); 54 55 if (m == n) { 56 Mat B1,B2; 57 58 CHKERRQ(MatCopy(AC,B,SAME_NONZERO_PATTERN)); 59 /* full CUDA PtAP */ 60 CHKERRQ(MatPtAP(B,AC,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B1)); 61 62 /* CPU PtAP since A is on the CPU only */ 63 CHKERRQ(MatPtAP(B,A,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B2)); 64 65 CHKERRQ(MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN)); 66 CHKERRQ(MatNorm(B2,NORM_INFINITY,&r)); 67 PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 68 69 /* test reuse */ 70 CHKERRQ(MatPtAP(B,AC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B1)); 71 CHKERRQ(MatPtAP(B,A,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B2)); 72 CHKERRQ(MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN)); 73 CHKERRQ(MatNorm(B2,NORM_INFINITY,&r)); 74 PetscCheckFalse(r > tol,PetscObjectComm((PetscObject)B),PETSC_ERR_PLIB,"Error MatPtAP %g",(double)r); 75 76 CHKERRQ(MatDestroy(&B1)); 77 CHKERRQ(MatDestroy(&B2)); 78 } 79 80 CHKERRQ(MatDestroy(&B)); 81 CHKERRQ(MatDestroy(&AC)); 82 CHKERRQ(MatDestroy(&A)); 83 CHKERRQ(PetscFinalize()); 84 return 0; 85 } 86 87 /*TEST 88 89 build: 90 requires: cuda 91 92 test: 93 output_file: output/ex32_1.out 94 args: -m {{3 5 12}} -n {{3 5 12}} 95 suffix: seqdensecuda 96 97 TEST*/ 98