1*a78de62eSJunchao Zhang static char help[] = "Test device/host memory allocation in MatDenseSeqCUDA()\n\n"; 2*a78de62eSJunchao Zhang 3*a78de62eSJunchao Zhang /* Contributed by: Victor Eijkhout <eijkhout@tacc.utexas.edu> */ 4*a78de62eSJunchao Zhang 5*a78de62eSJunchao Zhang #include <petscmat.h> 6*a78de62eSJunchao Zhang int main(int argc, char** argv) 7*a78de62eSJunchao Zhang { 8*a78de62eSJunchao Zhang PetscErrorCode ierr; 9*a78de62eSJunchao Zhang PetscInt global_size=100; 10*a78de62eSJunchao Zhang Mat cuda_matrix; 11*a78de62eSJunchao Zhang Vec input,output; 12*a78de62eSJunchao Zhang MPI_Comm comm = PETSC_COMM_SELF; 13*a78de62eSJunchao Zhang PetscReal nrm = 1; 14*a78de62eSJunchao Zhang 15*a78de62eSJunchao Zhang ierr = PetscInitialize(&argc,&argv,NULL,help);if (ierr) return ierr; 16*a78de62eSJunchao Zhang ierr = MatCreateDenseCUDA(comm,global_size,global_size,global_size,global_size,NULL,&cuda_matrix);CHKERRQ(ierr); 17*a78de62eSJunchao Zhang ierr = MatAssemblyBegin(cuda_matrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18*a78de62eSJunchao Zhang ierr = MatAssemblyEnd(cuda_matrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 19*a78de62eSJunchao Zhang 20*a78de62eSJunchao Zhang ierr = VecCreateSeqCUDA(comm,global_size,&input);CHKERRQ(ierr); 21*a78de62eSJunchao Zhang ierr = VecDuplicate(input,&output);CHKERRQ(ierr); 22*a78de62eSJunchao Zhang ierr = VecSet(input,1.);CHKERRQ(ierr); 23*a78de62eSJunchao Zhang ierr = VecSet(output,2.);CHKERRQ(ierr); 24*a78de62eSJunchao Zhang ierr = MatMult(cuda_matrix,input,output);CHKERRQ(ierr); 25*a78de62eSJunchao Zhang ierr = VecNorm(output,NORM_2,&nrm);CHKERRQ(ierr); 26*a78de62eSJunchao Zhang if (nrm > PETSC_SMALL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc generated wrong result. Should be 0, but is %g",(double)nrm); 27*a78de62eSJunchao Zhang ierr = VecDestroy(&input);CHKERRQ(ierr); 28*a78de62eSJunchao Zhang ierr = VecDestroy(&output);CHKERRQ(ierr); 29*a78de62eSJunchao Zhang ierr = MatDestroy(&cuda_matrix);CHKERRQ(ierr); 30*a78de62eSJunchao Zhang ierr = PetscFinalize(); 31*a78de62eSJunchao Zhang return ierr; 32*a78de62eSJunchao Zhang } 33*a78de62eSJunchao Zhang 34*a78de62eSJunchao Zhang /*TEST 35*a78de62eSJunchao Zhang build: 36*a78de62eSJunchao Zhang requires: cuda 37*a78de62eSJunchao Zhang 38*a78de62eSJunchao Zhang test: 39*a78de62eSJunchao Zhang nsize: 1 40*a78de62eSJunchao Zhang 41*a78de62eSJunchao Zhang TEST*/ 42