1*c0558f20SBarry Smith 2*c0558f20SBarry Smith 3*c0558f20SBarry Smith static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 4*c0558f20SBarry Smith 5*c0558f20SBarry Smith /* 6*c0558f20SBarry Smith This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions 7*c0558f20SBarry Smith */ 8*c0558f20SBarry Smith 9*c0558f20SBarry Smith /* 10*c0558f20SBarry Smith Include "petscdm.h" so that we can use data management objects (DMs) 11*c0558f20SBarry Smith Include "petscdmda.h" so that we can use distributed arrays (DMDAs). 12*c0558f20SBarry Smith Include "petscsnes.h" so that we can use SNES solvers. Note that this 13*c0558f20SBarry Smith */ 14*c0558f20SBarry Smith 15*c0558f20SBarry Smith #include <petscdm.h> 16*c0558f20SBarry Smith #include <petscdmda.h> 17*c0558f20SBarry Smith #include <petscsnes.h> 18*c0558f20SBarry Smith 19*c0558f20SBarry Smith /* 20*c0558f20SBarry Smith Include Kokkos files 21*c0558f20SBarry Smith 22*c0558f20SBarry Smith */ 23*c0558f20SBarry Smith #include <Kokkos_Core.hpp> 24*c0558f20SBarry Smith #include <Kokkos_OffsetView.hpp> 25*c0558f20SBarry Smith 26*c0558f20SBarry Smith /* 27*c0558f20SBarry Smith Application-defined routines. 28*c0558f20SBarry Smith */ 29*c0558f20SBarry Smith PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*); 30*c0558f20SBarry Smith PetscErrorCode FormFunction(SNES,Vec,Vec,void*); 31*c0558f20SBarry Smith PetscErrorCode FormInitialGuess(Vec); 32*c0558f20SBarry Smith 33*c0558f20SBarry Smith /* 34*c0558f20SBarry Smith User-defined application context 35*c0558f20SBarry Smith */ 36*c0558f20SBarry Smith typedef struct { 37*c0558f20SBarry Smith DM da; /* distributed array */ 38*c0558f20SBarry Smith Vec F; /* right-hand-side of PDE */ 39*c0558f20SBarry Smith PetscReal h; /* mesh spacing */ 40*c0558f20SBarry Smith } ApplicationCtx; 41*c0558f20SBarry Smith 42*c0558f20SBarry Smith int main(int argc,char **argv) 43*c0558f20SBarry Smith { 44*c0558f20SBarry Smith SNES snes; /* SNES context */ 45*c0558f20SBarry Smith Mat J; /* Jacobian matrix */ 46*c0558f20SBarry Smith ApplicationCtx ctx; /* user-defined context */ 47*c0558f20SBarry Smith Vec x,r,U,F; /* vectors */ 48*c0558f20SBarry Smith PetscScalar *FF,*UU,none = -1.0; 49*c0558f20SBarry Smith PetscErrorCode ierr; 50*c0558f20SBarry Smith PetscInt its,N = 5,maxit,maxf,xs,xm; 51*c0558f20SBarry Smith PetscReal abstol,rtol,stol,norm; 52*c0558f20SBarry Smith PetscBool viewinitial = PETSC_FALSE; 53*c0558f20SBarry Smith PetscBool view_kokkos_configuration = PETSC_TRUE; 54*c0558f20SBarry Smith 55*c0558f20SBarry Smith 56*c0558f20SBarry Smith ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 57*c0558f20SBarry Smith ierr = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr); 58*c0558f20SBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr); 59*c0558f20SBarry Smith ctx.h = 1.0/(N-1); 60*c0558f20SBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr); 61*c0558f20SBarry Smith 62*c0558f20SBarry Smith /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 63*c0558f20SBarry Smith Create nonlinear solver context 64*c0558f20SBarry Smith - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 65*c0558f20SBarry Smith 66*c0558f20SBarry Smith ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 67*c0558f20SBarry Smith 68*c0558f20SBarry Smith /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 69*c0558f20SBarry Smith Create vector data structures; set function evaluation routine 70*c0558f20SBarry Smith - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 71*c0558f20SBarry Smith 72*c0558f20SBarry Smith /* 73*c0558f20SBarry Smith Create distributed array (DMDA) to manage parallel grid and vectors 74*c0558f20SBarry Smith */ 75*c0558f20SBarry Smith ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr); 76*c0558f20SBarry Smith ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr); 77*c0558f20SBarry Smith ierr = DMSetUp(ctx.da);CHKERRQ(ierr); 78*c0558f20SBarry Smith 79*c0558f20SBarry Smith /* 80*c0558f20SBarry Smith Extract global and local vectors from DMDA; then duplicate for remaining 81*c0558f20SBarry Smith vectors that are the same types 82*c0558f20SBarry Smith */ 83*c0558f20SBarry Smith ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr); 84*c0558f20SBarry Smith ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr); 85*c0558f20SBarry Smith ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 86*c0558f20SBarry Smith ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F; 87*c0558f20SBarry Smith ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr); 88*c0558f20SBarry Smith ierr = VecDuplicate(x,&U);CHKERRQ(ierr); 89*c0558f20SBarry Smith ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr); 90*c0558f20SBarry Smith 91*c0558f20SBarry Smith /* 92*c0558f20SBarry Smith Set function evaluation routine and vector. Whenever the nonlinear 93*c0558f20SBarry Smith solver needs to compute the nonlinear function, it will call this 94*c0558f20SBarry Smith routine. 95*c0558f20SBarry Smith - Note that the final routine argument is the user-defined 96*c0558f20SBarry Smith context that provides application-specific data for the 97*c0558f20SBarry Smith function evaluation routine. 98*c0558f20SBarry Smith */ 99*c0558f20SBarry Smith ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr); 100*c0558f20SBarry Smith 101*c0558f20SBarry Smith /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 102*c0558f20SBarry Smith Create matrix data structure; set Jacobian evaluation routine 103*c0558f20SBarry Smith - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 104*c0558f20SBarry Smith 105*c0558f20SBarry Smith ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr); 106*c0558f20SBarry Smith 107*c0558f20SBarry Smith /* 108*c0558f20SBarry Smith Set Jacobian matrix data structure and default Jacobian evaluation 109*c0558f20SBarry Smith routine. Whenever the nonlinear solver needs to compute the 110*c0558f20SBarry Smith Jacobian matrix, it will call this routine. 111*c0558f20SBarry Smith - Note that the final routine argument is the user-defined 112*c0558f20SBarry Smith context that provides application-specific data for the 113*c0558f20SBarry Smith Jacobian evaluation routine. 114*c0558f20SBarry Smith */ 115*c0558f20SBarry Smith ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr); 116*c0558f20SBarry Smith ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 117*c0558f20SBarry Smith ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr); 118*c0558f20SBarry Smith ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr); 119*c0558f20SBarry Smith 120*c0558f20SBarry Smith /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 121*c0558f20SBarry Smith Initialize application: 122*c0558f20SBarry Smith Store forcing function of PDE and exact solution 123*c0558f20SBarry Smith - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 124*c0558f20SBarry Smith 125*c0558f20SBarry Smith /* 126*c0558f20SBarry Smith Get local grid boundaries (for 1-dimensional DMDA): 127*c0558f20SBarry Smith xs, xm - starting grid index, width of local grid (no ghost points) 128*c0558f20SBarry Smith */ 129*c0558f20SBarry Smith ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 130*c0558f20SBarry Smith 131*c0558f20SBarry Smith /* 132*c0558f20SBarry Smith Get pointers to vector data 133*c0558f20SBarry Smith */ 134*c0558f20SBarry Smith ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr); 135*c0558f20SBarry Smith ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr); 136*c0558f20SBarry Smith 137*c0558f20SBarry Smith /* -------------------------------------------------------------------------------------------------- */ 138*c0558f20SBarry Smith /* ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */ 139*c0558f20SBarry Smith /* export OMP_PROC_BIND="threads" export OMP_PROC_BIND="spread" */ 140*c0558f20SBarry Smith 141*c0558f20SBarry Smith if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1); 142*c0558f20SBarry Smith if (!view_kokkos_configuration) { 143*c0558f20SBarry Smith /* use private routine to turn off warnings about negative number of cores etc */ 144*c0558f20SBarry Smith Kokkos::Impl::pre_initialize(Kokkos::InitArguments(-1, -1, -1, true)); 145*c0558f20SBarry Smith } 146*c0558f20SBarry Smith Kokkos::initialize( argc, argv ); 147*c0558f20SBarry Smith if (view_kokkos_configuration) { 148*c0558f20SBarry Smith Kokkos::print_configuration(std::cout, true); 149*c0558f20SBarry Smith } 150*c0558f20SBarry Smith 151*c0558f20SBarry Smith /* introduce a view object; reference like object */ 152*c0558f20SBarry Smith Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs}); 153*c0558f20SBarry Smith 154*c0558f20SBarry Smith PetscReal xpbase = xs*ctx.h; 155*c0558f20SBarry Smith Kokkos:: parallel_for(Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j ) { 156*c0558f20SBarry Smith PetscReal xp = xpbase + j*ctx.h; 157*c0558f20SBarry Smith xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 158*c0558f20SBarry Smith xUU(j) = xp*xp*xp; 159*c0558f20SBarry Smith }); 160*c0558f20SBarry Smith 161*c0558f20SBarry Smith Kokkos::finalize(); 162*c0558f20SBarry Smith 163*c0558f20SBarry Smith /* 164*c0558f20SBarry Smith Restore vectors 165*c0558f20SBarry Smith */ 166*c0558f20SBarry Smith ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr); 167*c0558f20SBarry Smith ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr); 168*c0558f20SBarry Smith if (viewinitial) { 169*c0558f20SBarry Smith ierr = VecView(U,NULL);CHKERRQ(ierr); 170*c0558f20SBarry Smith ierr = VecView(F,NULL);CHKERRQ(ierr); 171*c0558f20SBarry Smith } 172*c0558f20SBarry Smith 173*c0558f20SBarry Smith /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 174*c0558f20SBarry Smith Evaluate initial guess; then solve nonlinear system 175*c0558f20SBarry Smith - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 176*c0558f20SBarry Smith 177*c0558f20SBarry Smith /* 178*c0558f20SBarry Smith Note: The user should initialize the vector, x, with the initial guess 179*c0558f20SBarry Smith for the nonlinear solver prior to calling SNESSolve(). In particular, 180*c0558f20SBarry Smith to employ an initial guess of zero, the user should explicitly set 181*c0558f20SBarry Smith this vector to zero by calling VecSet(). 182*c0558f20SBarry Smith */ 183*c0558f20SBarry Smith ierr = FormInitialGuess(x);CHKERRQ(ierr); 184*c0558f20SBarry Smith ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 185*c0558f20SBarry Smith ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 186*c0558f20SBarry Smith ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr); 187*c0558f20SBarry Smith 188*c0558f20SBarry Smith /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 189*c0558f20SBarry Smith Check solution and clean up 190*c0558f20SBarry Smith - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 191*c0558f20SBarry Smith 192*c0558f20SBarry Smith /* 193*c0558f20SBarry Smith Check the error 194*c0558f20SBarry Smith */ 195*c0558f20SBarry Smith ierr = VecAXPY(x,none,U);CHKERRQ(ierr); 196*c0558f20SBarry Smith ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); 197*c0558f20SBarry Smith ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr); 198*c0558f20SBarry Smith 199*c0558f20SBarry Smith /* 200*c0558f20SBarry Smith Free work space. All PETSc objects should be destroyed when they 201*c0558f20SBarry Smith are no longer needed. 202*c0558f20SBarry Smith */ 203*c0558f20SBarry Smith ierr = VecDestroy(&x);CHKERRQ(ierr); 204*c0558f20SBarry Smith ierr = VecDestroy(&r);CHKERRQ(ierr); 205*c0558f20SBarry Smith ierr = VecDestroy(&U);CHKERRQ(ierr); 206*c0558f20SBarry Smith ierr = VecDestroy(&F);CHKERRQ(ierr); 207*c0558f20SBarry Smith ierr = MatDestroy(&J);CHKERRQ(ierr); 208*c0558f20SBarry Smith ierr = SNESDestroy(&snes);CHKERRQ(ierr); 209*c0558f20SBarry Smith ierr = DMDestroy(&ctx.da);CHKERRQ(ierr); 210*c0558f20SBarry Smith ierr = PetscFinalize(); 211*c0558f20SBarry Smith return ierr; 212*c0558f20SBarry Smith } 213*c0558f20SBarry Smith 214*c0558f20SBarry Smith /* ------------------------------------------------------------------- */ 215*c0558f20SBarry Smith /* 216*c0558f20SBarry Smith FormInitialGuess - Computes initial guess. 217*c0558f20SBarry Smith 218*c0558f20SBarry Smith Input/Output Parameter: 219*c0558f20SBarry Smith . x - the solution vector 220*c0558f20SBarry Smith */ 221*c0558f20SBarry Smith PetscErrorCode FormInitialGuess(Vec x) 222*c0558f20SBarry Smith { 223*c0558f20SBarry Smith PetscErrorCode ierr; 224*c0558f20SBarry Smith PetscScalar pfive = .50; 225*c0558f20SBarry Smith 226*c0558f20SBarry Smith PetscFunctionBeginUser; 227*c0558f20SBarry Smith ierr = VecSet(x,pfive);CHKERRQ(ierr); 228*c0558f20SBarry Smith PetscFunctionReturn(0); 229*c0558f20SBarry Smith } 230*c0558f20SBarry Smith 231*c0558f20SBarry Smith /* ------------------------------------------------------------------- */ 232*c0558f20SBarry Smith /* 233*c0558f20SBarry Smith FormFunction - Evaluates nonlinear function, F(x). 234*c0558f20SBarry Smith 235*c0558f20SBarry Smith Input Parameters: 236*c0558f20SBarry Smith . snes - the SNES context 237*c0558f20SBarry Smith . x - input vector 238*c0558f20SBarry Smith . ctx - optional user-defined context, as set by SNESSetFunction() 239*c0558f20SBarry Smith 240*c0558f20SBarry Smith Output Parameter: 241*c0558f20SBarry Smith . f - function vector 242*c0558f20SBarry Smith 243*c0558f20SBarry Smith Note: 244*c0558f20SBarry Smith The user-defined context can contain any application-specific 245*c0558f20SBarry Smith data needed for the function evaluation. 246*c0558f20SBarry Smith */ 247*c0558f20SBarry Smith PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx) 248*c0558f20SBarry Smith { 249*c0558f20SBarry Smith ApplicationCtx *user = (ApplicationCtx*) ctx; 250*c0558f20SBarry Smith DM da = user->da; 251*c0558f20SBarry Smith PetscScalar *xx,*ff,*FF,d; 252*c0558f20SBarry Smith PetscErrorCode ierr; 253*c0558f20SBarry Smith PetscInt i,M,xs,xm; 254*c0558f20SBarry Smith Vec xlocal; 255*c0558f20SBarry Smith 256*c0558f20SBarry Smith PetscFunctionBeginUser; 257*c0558f20SBarry Smith ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr); 258*c0558f20SBarry Smith /* 259*c0558f20SBarry Smith Scatter ghost points to local vector, using the 2-step process 260*c0558f20SBarry Smith DMGlobalToLocalBegin(), DMGlobalToLocalEnd(). 261*c0558f20SBarry Smith By placing code between these two statements, computations can 262*c0558f20SBarry Smith be done while messages are in transition. 263*c0558f20SBarry Smith */ 264*c0558f20SBarry Smith ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 265*c0558f20SBarry Smith ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 266*c0558f20SBarry Smith 267*c0558f20SBarry Smith /* 268*c0558f20SBarry Smith Get pointers to vector data. 269*c0558f20SBarry Smith - The vector xlocal includes ghost point; the vectors x and f do 270*c0558f20SBarry Smith NOT include ghost points. 271*c0558f20SBarry Smith - Using DMDAVecGetArray() allows accessing the values using global ordering 272*c0558f20SBarry Smith */ 273*c0558f20SBarry Smith ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 274*c0558f20SBarry Smith ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr); 275*c0558f20SBarry Smith ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr); 276*c0558f20SBarry Smith 277*c0558f20SBarry Smith /* 278*c0558f20SBarry Smith Get local grid boundaries (for 1-dimensional DMDA): 279*c0558f20SBarry Smith xs, xm - starting grid index, width of local grid (no ghost points) 280*c0558f20SBarry Smith */ 281*c0558f20SBarry Smith ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 282*c0558f20SBarry Smith ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 283*c0558f20SBarry Smith 284*c0558f20SBarry Smith /* 285*c0558f20SBarry Smith Set function values for boundary points; define local interior grid point range: 286*c0558f20SBarry Smith xsi - starting interior grid index 287*c0558f20SBarry Smith xei - ending interior grid index 288*c0558f20SBarry Smith */ 289*c0558f20SBarry Smith if (xs == 0) { /* left boundary */ 290*c0558f20SBarry Smith ff[0] = xx[0]; 291*c0558f20SBarry Smith xs++;xm--; 292*c0558f20SBarry Smith } 293*c0558f20SBarry Smith if (xs+xm == M) { /* right boundary */ 294*c0558f20SBarry Smith ff[xs+xm-1] = xx[xs+xm-1] - 1.0; 295*c0558f20SBarry Smith xm--; 296*c0558f20SBarry Smith } 297*c0558f20SBarry Smith 298*c0558f20SBarry Smith /* 299*c0558f20SBarry Smith Compute function over locally owned part of the grid (interior points only) 300*c0558f20SBarry Smith */ 301*c0558f20SBarry Smith d = 1.0/(user->h*user->h); 302*c0558f20SBarry Smith for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i]; 303*c0558f20SBarry Smith 304*c0558f20SBarry Smith /* 305*c0558f20SBarry Smith Restore vectors 306*c0558f20SBarry Smith */ 307*c0558f20SBarry Smith ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 308*c0558f20SBarry Smith ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr); 309*c0558f20SBarry Smith ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr); 310*c0558f20SBarry Smith ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 311*c0558f20SBarry Smith PetscFunctionReturn(0); 312*c0558f20SBarry Smith } 313*c0558f20SBarry Smith 314*c0558f20SBarry Smith /* ------------------------------------------------------------------- */ 315*c0558f20SBarry Smith /* 316*c0558f20SBarry Smith FormJacobian - Evaluates Jacobian matrix. 317*c0558f20SBarry Smith 318*c0558f20SBarry Smith Input Parameters: 319*c0558f20SBarry Smith . snes - the SNES context 320*c0558f20SBarry Smith . x - input vector 321*c0558f20SBarry Smith . dummy - optional user-defined context (not used here) 322*c0558f20SBarry Smith 323*c0558f20SBarry Smith Output Parameters: 324*c0558f20SBarry Smith . jac - Jacobian matrix 325*c0558f20SBarry Smith . B - optionally different preconditioning matrix 326*c0558f20SBarry Smith . flag - flag indicating matrix structure 327*c0558f20SBarry Smith */ 328*c0558f20SBarry Smith PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 329*c0558f20SBarry Smith { 330*c0558f20SBarry Smith ApplicationCtx *user = (ApplicationCtx*) ctx; 331*c0558f20SBarry Smith PetscScalar *xx,d,A[3]; 332*c0558f20SBarry Smith PetscErrorCode ierr; 333*c0558f20SBarry Smith PetscInt i,j[3],M,xs,xm; 334*c0558f20SBarry Smith DM da = user->da; 335*c0558f20SBarry Smith 336*c0558f20SBarry Smith PetscFunctionBeginUser; 337*c0558f20SBarry Smith /* 338*c0558f20SBarry Smith Get pointer to vector data 339*c0558f20SBarry Smith */ 340*c0558f20SBarry Smith ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr); 341*c0558f20SBarry Smith ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 342*c0558f20SBarry Smith 343*c0558f20SBarry Smith /* 344*c0558f20SBarry Smith Get range of locally owned matrix 345*c0558f20SBarry Smith */ 346*c0558f20SBarry Smith ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 347*c0558f20SBarry Smith 348*c0558f20SBarry Smith /* 349*c0558f20SBarry Smith Determine starting and ending local indices for interior grid points. 350*c0558f20SBarry Smith Set Jacobian entries for boundary points. 351*c0558f20SBarry Smith */ 352*c0558f20SBarry Smith 353*c0558f20SBarry Smith if (xs == 0) { /* left boundary */ 354*c0558f20SBarry Smith i = 0; A[0] = 1.0; 355*c0558f20SBarry Smith 356*c0558f20SBarry Smith ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 357*c0558f20SBarry Smith xs++;xm--; 358*c0558f20SBarry Smith } 359*c0558f20SBarry Smith if (xs+xm == M) { /* right boundary */ 360*c0558f20SBarry Smith i = M-1; 361*c0558f20SBarry Smith A[0] = 1.0; 362*c0558f20SBarry Smith ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 363*c0558f20SBarry Smith xm--; 364*c0558f20SBarry Smith } 365*c0558f20SBarry Smith 366*c0558f20SBarry Smith /* 367*c0558f20SBarry Smith Interior grid points 368*c0558f20SBarry Smith - Note that in this case we set all elements for a particular 369*c0558f20SBarry Smith row at once. 370*c0558f20SBarry Smith */ 371*c0558f20SBarry Smith d = 1.0/(user->h*user->h); 372*c0558f20SBarry Smith for (i=xs; i<xs+xm; i++) { 373*c0558f20SBarry Smith j[0] = i - 1; j[1] = i; j[2] = i + 1; 374*c0558f20SBarry Smith A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 375*c0558f20SBarry Smith ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr); 376*c0558f20SBarry Smith } 377*c0558f20SBarry Smith 378*c0558f20SBarry Smith /* 379*c0558f20SBarry Smith Assemble matrix, using the 2-step process: 380*c0558f20SBarry Smith MatAssemblyBegin(), MatAssemblyEnd(). 381*c0558f20SBarry Smith By placing code between these two statements, computations can be 382*c0558f20SBarry Smith done while messages are in transition. 383*c0558f20SBarry Smith 384*c0558f20SBarry Smith Also, restore vector. 385*c0558f20SBarry Smith */ 386*c0558f20SBarry Smith 387*c0558f20SBarry Smith ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 388*c0558f20SBarry Smith ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr); 389*c0558f20SBarry Smith ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 390*c0558f20SBarry Smith 391*c0558f20SBarry Smith PetscFunctionReturn(0); 392*c0558f20SBarry Smith } 393*c0558f20SBarry Smith 394*c0558f20SBarry Smith /* 395*c0558f20SBarry Smith The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA. 396*c0558f20SBarry Smith 397*c0558f20SBarry Smith The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos 398*c0558f20SBarry Smith assumes all memory it is given is allocated with cudaMallocHost() and can be used with 399*c0558f20SBarry Smith cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory. 400*c0558f20SBarry Smith 401*c0558f20SBarry Smith The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi 402*c0558f20SBarry Smith */ 403*c0558f20SBarry Smith 404*c0558f20SBarry Smith /*TEST 405*c0558f20SBarry Smith 406*c0558f20SBarry Smith build: 407*c0558f20SBarry Smith requires: kokkos 408*c0558f20SBarry Smith 409*c0558f20SBarry Smith test: 410*c0558f20SBarry Smith requires: kokkos double !complex !single !cuda 411*c0558f20SBarry Smith args: -view_initial -view_kokkos_configuration false -snes_monitor 412*c0558f20SBarry Smith filter: grep -v type: 413*c0558f20SBarry Smith 414*c0558f20SBarry Smith test: 415*c0558f20SBarry Smith suffix: 2 416*c0558f20SBarry Smith requires: kokkos double !complex !single cuda 417*c0558f20SBarry Smith args: -dm_vec_type cuda -vec_pinned_memory_min 0 -use_gpu_aware_mpi 0 -view_initial -view_kokkos_configuration false -snes_monitor 418*c0558f20SBarry Smith output_file: output/ex3k_1.out 419*c0558f20SBarry Smith filter: grep -v type: 420*c0558f20SBarry Smith 421*c0558f20SBarry Smith test: 422*c0558f20SBarry Smith suffix: 3 423*c0558f20SBarry Smith requires: kokkos double !complex !single defined(PETSC_HAVE_cusparseCreateSolveAnalysisInfo) cuda 424*c0558f20SBarry Smith nsize: 2 425*c0558f20SBarry Smith args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -use_gpu_aware_mpi 0 -view_initial -view_kokkos_configuration false -snes_monitor 426*c0558f20SBarry Smith output_file: output/ex3k_1.out 427*c0558f20SBarry Smith filter: grep -v type: 428*c0558f20SBarry Smith 429*c0558f20SBarry Smith TEST*/ 430