1c4762a1bSJed Brown static char help[] = "Tests MATSEQDENSECUDA\n\n"; 2c4762a1bSJed Brown 3c4762a1bSJed Brown #include <petscmat.h> 4c4762a1bSJed Brown 5d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv) 6d71ae5a4SJacob Faibussowitsch { 7c4762a1bSJed Brown Mat A, AC, B; 8c4762a1bSJed Brown PetscInt m = 10, n = 10; 9c4762a1bSJed Brown PetscReal r, tol = 10 * PETSC_SMALL; 10c4762a1bSJed Brown 11327415f7SBarry Smith PetscFunctionBeginUser; 12c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 139566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 149566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 159566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &A)); 169566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m, n)); 179566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQDENSE)); 189566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 199566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 209566063dSJacob Faibussowitsch PetscCall(MatSetRandom(A, NULL)); 21c20d7725SJed Brown #if 0 22c20d7725SJed Brown PetscInt i,j; 23c20d7725SJed Brown PetscScalar val; 24c20d7725SJed Brown for (i=0; i<m; i++) { 25c20d7725SJed Brown for (j=0; j<n; j++) { 26c20d7725SJed Brown val = (PetscScalar)(i+j); 279566063dSJacob Faibussowitsch PetscCall(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES)); 28c20d7725SJed Brown } 29c20d7725SJed Brown } 309566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 319566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 32c20d7725SJed Brown #endif 33c4762a1bSJed Brown 34c4762a1bSJed Brown /* Create a CUDA version of A */ 35c20d7725SJed Brown #if defined(PETSC_HAVE_CUDA) 369566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQDENSECUDA, MAT_INITIAL_MATRIX, &AC)); 37c20d7725SJed Brown #else 389566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &AC)); 39c20d7725SJed Brown #endif 409566063dSJacob Faibussowitsch PetscCall(MatDuplicate(AC, MAT_COPY_VALUES, &B)); 41c4762a1bSJed Brown 42c4762a1bSJed Brown /* full CUDA AXPY */ 439566063dSJacob Faibussowitsch PetscCall(MatAXPY(B, -1.0, AC, SAME_NONZERO_PATTERN)); 449566063dSJacob Faibussowitsch PetscCall(MatNorm(B, NORM_INFINITY, &r)); 4508401ef6SPierre Jolivet PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY %g", (double)r); 46c4762a1bSJed Brown 47c4762a1bSJed Brown /* test Copy */ 489566063dSJacob Faibussowitsch PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN)); 49c4762a1bSJed Brown 50c4762a1bSJed Brown /* call MatAXPY_Basic since B is CUDA, A is CPU, */ 519566063dSJacob Faibussowitsch PetscCall(MatAXPY(B, -1.0, A, SAME_NONZERO_PATTERN)); 529566063dSJacob Faibussowitsch PetscCall(MatNorm(B, NORM_INFINITY, &r)); 5308401ef6SPierre Jolivet PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY_Basic %g", (double)r); 54c4762a1bSJed Brown 55c4762a1bSJed Brown if (m == n) { 56c4762a1bSJed Brown Mat B1, B2; 57c4762a1bSJed Brown 589566063dSJacob Faibussowitsch PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN)); 59c4762a1bSJed Brown /* full CUDA PtAP */ 60fb842aefSJose E. Roman PetscCall(MatPtAP(B, AC, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B1)); 61c20d7725SJed Brown 62c4762a1bSJed Brown /* CPU PtAP since A is on the CPU only */ 63fb842aefSJose E. Roman PetscCall(MatPtAP(B, A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B2)); 64c20d7725SJed Brown 659566063dSJacob Faibussowitsch PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN)); 669566063dSJacob Faibussowitsch PetscCall(MatNorm(B2, NORM_INFINITY, &r)); 6708401ef6SPierre Jolivet PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r); 68c4762a1bSJed Brown 69c4762a1bSJed Brown /* test reuse */ 70fb842aefSJose E. Roman PetscCall(MatPtAP(B, AC, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B1)); 71fb842aefSJose E. Roman PetscCall(MatPtAP(B, A, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B2)); 729566063dSJacob Faibussowitsch PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN)); 739566063dSJacob Faibussowitsch PetscCall(MatNorm(B2, NORM_INFINITY, &r)); 7408401ef6SPierre Jolivet PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r); 75c4762a1bSJed Brown 769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B1)); 779566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B2)); 78c4762a1bSJed Brown } 79c4762a1bSJed Brown 809566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 819566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AC)); 829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 839566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 84b122ec5aSJacob Faibussowitsch return 0; 85c4762a1bSJed Brown } 86c4762a1bSJed Brown 87c4762a1bSJed Brown /*TEST 88c4762a1bSJed Brown 89c4762a1bSJed Brown build: 90c4762a1bSJed Brown requires: cuda 91c4762a1bSJed Brown 92c4762a1bSJed Brown test: 93*3886731fSPierre Jolivet output_file: output/empty.out 94c4762a1bSJed Brown args: -m {{3 5 12}} -n {{3 5 12}} 95c4762a1bSJed Brown suffix: seqdensecuda 96c4762a1bSJed Brown 97c4762a1bSJed Brown TEST*/ 98