1a78de62eSJunchao Zhang static char help[] = "Test device/host memory allocation in MatDenseSeqCUDA()\n\n"; 2a78de62eSJunchao Zhang 3a78de62eSJunchao Zhang /* Contributed by: Victor Eijkhout <eijkhout@tacc.utexas.edu> */ 4a78de62eSJunchao Zhang 5a78de62eSJunchao Zhang #include <petscmat.h> 6a78de62eSJunchao Zhang int main(int argc, char** argv) 7a78de62eSJunchao Zhang { 8a78de62eSJunchao Zhang PetscInt global_size = 100; 9a78de62eSJunchao Zhang Mat cuda_matrix; 10a78de62eSJunchao Zhang Vec input,output; 11a78de62eSJunchao Zhang MPI_Comm comm = PETSC_COMM_SELF; 12a78de62eSJunchao Zhang PetscReal nrm = 1; 13a78de62eSJunchao Zhang 14*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,NULL,help)); 155f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateDenseCUDA(comm,global_size,global_size,global_size,global_size,NULL,&cuda_matrix)); 165f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(cuda_matrix,MAT_FINAL_ASSEMBLY)); 175f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(cuda_matrix,MAT_FINAL_ASSEMBLY)); 18a78de62eSJunchao Zhang 195f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateSeqCUDA(comm,global_size,&input)); 205f80ce2aSJacob Faibussowitsch CHKERRQ(VecDuplicate(input,&output)); 215f80ce2aSJacob Faibussowitsch CHKERRQ(VecSet(input,1.)); 225f80ce2aSJacob Faibussowitsch CHKERRQ(VecSet(output,2.)); 235f80ce2aSJacob Faibussowitsch CHKERRQ(MatMult(cuda_matrix,input,output)); 245f80ce2aSJacob Faibussowitsch CHKERRQ(VecNorm(output,NORM_2,&nrm)); 252c71b3e2SJacob Faibussowitsch PetscCheckFalse(nrm > PETSC_SMALL,PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc generated wrong result. Should be 0, but is %g",(double)nrm); 265f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&input)); 275f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&output)); 285f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&cuda_matrix)); 29*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 30*b122ec5aSJacob Faibussowitsch return 0; 31a78de62eSJunchao Zhang } 32a78de62eSJunchao Zhang 33a78de62eSJunchao Zhang /*TEST 34a78de62eSJunchao Zhang build: 35a78de62eSJunchao Zhang requires: cuda 36a78de62eSJunchao Zhang 37a78de62eSJunchao Zhang test: 38a78de62eSJunchao Zhang nsize: 1 39a78de62eSJunchao Zhang 40a78de62eSJunchao Zhang TEST*/ 41