xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision c0558f20726f97b90b63e37675ac49582a182ce6)
1*c0558f20SBarry Smith 
2*c0558f20SBarry Smith 
3*c0558f20SBarry Smith static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
4*c0558f20SBarry Smith 
5*c0558f20SBarry Smith /*
6*c0558f20SBarry Smith     This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions
7*c0558f20SBarry Smith */
8*c0558f20SBarry Smith 
9*c0558f20SBarry Smith /*
10*c0558f20SBarry Smith    Include "petscdm.h" so that we can use data management objects (DMs)
11*c0558f20SBarry Smith    Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
12*c0558f20SBarry Smith    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
13*c0558f20SBarry Smith */
14*c0558f20SBarry Smith 
15*c0558f20SBarry Smith #include <petscdm.h>
16*c0558f20SBarry Smith #include <petscdmda.h>
17*c0558f20SBarry Smith #include <petscsnes.h>
18*c0558f20SBarry Smith 
19*c0558f20SBarry Smith /*
20*c0558f20SBarry Smith     Include Kokkos files
21*c0558f20SBarry Smith 
22*c0558f20SBarry Smith */
23*c0558f20SBarry Smith #include <Kokkos_Core.hpp>
24*c0558f20SBarry Smith #include <Kokkos_OffsetView.hpp>
25*c0558f20SBarry Smith 
26*c0558f20SBarry Smith /*
27*c0558f20SBarry Smith    Application-defined routines.
28*c0558f20SBarry Smith */
29*c0558f20SBarry Smith PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*);
30*c0558f20SBarry Smith PetscErrorCode FormFunction(SNES,Vec,Vec,void*);
31*c0558f20SBarry Smith PetscErrorCode FormInitialGuess(Vec);
32*c0558f20SBarry Smith 
33*c0558f20SBarry Smith /*
34*c0558f20SBarry Smith    User-defined application context
35*c0558f20SBarry Smith */
36*c0558f20SBarry Smith typedef struct {
37*c0558f20SBarry Smith   DM          da;      /* distributed array */
38*c0558f20SBarry Smith   Vec         F;       /* right-hand-side of PDE */
39*c0558f20SBarry Smith   PetscReal   h;       /* mesh spacing */
40*c0558f20SBarry Smith } ApplicationCtx;
41*c0558f20SBarry Smith 
42*c0558f20SBarry Smith int main(int argc,char **argv)
43*c0558f20SBarry Smith {
44*c0558f20SBarry Smith   SNES           snes;                 /* SNES context */
45*c0558f20SBarry Smith   Mat            J;                    /* Jacobian matrix */
46*c0558f20SBarry Smith   ApplicationCtx ctx;                  /* user-defined context */
47*c0558f20SBarry Smith   Vec            x,r,U,F;              /* vectors */
48*c0558f20SBarry Smith   PetscScalar    *FF,*UU,none = -1.0;
49*c0558f20SBarry Smith   PetscErrorCode ierr;
50*c0558f20SBarry Smith   PetscInt       its,N = 5,maxit,maxf,xs,xm;
51*c0558f20SBarry Smith   PetscReal      abstol,rtol,stol,norm;
52*c0558f20SBarry Smith   PetscBool      viewinitial = PETSC_FALSE;
53*c0558f20SBarry Smith   PetscBool      view_kokkos_configuration = PETSC_TRUE;
54*c0558f20SBarry Smith 
55*c0558f20SBarry Smith 
56*c0558f20SBarry Smith   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
57*c0558f20SBarry Smith   ierr  = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr);
58*c0558f20SBarry Smith   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr);
59*c0558f20SBarry Smith   ctx.h = 1.0/(N-1);
60*c0558f20SBarry Smith   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr);
61*c0558f20SBarry Smith 
62*c0558f20SBarry Smith   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
63*c0558f20SBarry Smith      Create nonlinear solver context
64*c0558f20SBarry Smith      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
65*c0558f20SBarry Smith 
66*c0558f20SBarry Smith   ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr);
67*c0558f20SBarry Smith 
68*c0558f20SBarry Smith   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
69*c0558f20SBarry Smith      Create vector data structures; set function evaluation routine
70*c0558f20SBarry Smith      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
71*c0558f20SBarry Smith 
72*c0558f20SBarry Smith   /*
73*c0558f20SBarry Smith      Create distributed array (DMDA) to manage parallel grid and vectors
74*c0558f20SBarry Smith   */
75*c0558f20SBarry Smith   ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr);
76*c0558f20SBarry Smith   ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr);
77*c0558f20SBarry Smith   ierr = DMSetUp(ctx.da);CHKERRQ(ierr);
78*c0558f20SBarry Smith 
79*c0558f20SBarry Smith   /*
80*c0558f20SBarry Smith      Extract global and local vectors from DMDA; then duplicate for remaining
81*c0558f20SBarry Smith      vectors that are the same types
82*c0558f20SBarry Smith   */
83*c0558f20SBarry Smith   ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr);
84*c0558f20SBarry Smith   ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr);
85*c0558f20SBarry Smith   ierr = VecDuplicate(x,&r);CHKERRQ(ierr);
86*c0558f20SBarry Smith   ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F;
87*c0558f20SBarry Smith   ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr);
88*c0558f20SBarry Smith   ierr = VecDuplicate(x,&U);CHKERRQ(ierr);
89*c0558f20SBarry Smith   ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr);
90*c0558f20SBarry Smith 
91*c0558f20SBarry Smith   /*
92*c0558f20SBarry Smith      Set function evaluation routine and vector.  Whenever the nonlinear
93*c0558f20SBarry Smith      solver needs to compute the nonlinear function, it will call this
94*c0558f20SBarry Smith      routine.
95*c0558f20SBarry Smith       - Note that the final routine argument is the user-defined
96*c0558f20SBarry Smith         context that provides application-specific data for the
97*c0558f20SBarry Smith         function evaluation routine.
98*c0558f20SBarry Smith   */
99*c0558f20SBarry Smith   ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr);
100*c0558f20SBarry Smith 
101*c0558f20SBarry Smith   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
102*c0558f20SBarry Smith      Create matrix data structure; set Jacobian evaluation routine
103*c0558f20SBarry Smith      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
104*c0558f20SBarry Smith 
105*c0558f20SBarry Smith   ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr);
106*c0558f20SBarry Smith 
107*c0558f20SBarry Smith   /*
108*c0558f20SBarry Smith      Set Jacobian matrix data structure and default Jacobian evaluation
109*c0558f20SBarry Smith      routine.  Whenever the nonlinear solver needs to compute the
110*c0558f20SBarry Smith      Jacobian matrix, it will call this routine.
111*c0558f20SBarry Smith       - Note that the final routine argument is the user-defined
112*c0558f20SBarry Smith         context that provides application-specific data for the
113*c0558f20SBarry Smith         Jacobian evaluation routine.
114*c0558f20SBarry Smith   */
115*c0558f20SBarry Smith   ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr);
116*c0558f20SBarry Smith   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
117*c0558f20SBarry Smith   ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr);
118*c0558f20SBarry Smith   ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr);
119*c0558f20SBarry Smith 
120*c0558f20SBarry Smith   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
121*c0558f20SBarry Smith      Initialize application:
122*c0558f20SBarry Smith      Store forcing function of PDE and exact solution
123*c0558f20SBarry Smith    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
124*c0558f20SBarry Smith 
125*c0558f20SBarry Smith   /*
126*c0558f20SBarry Smith      Get local grid boundaries (for 1-dimensional DMDA):
127*c0558f20SBarry Smith        xs, xm - starting grid index, width of local grid (no ghost points)
128*c0558f20SBarry Smith   */
129*c0558f20SBarry Smith   ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
130*c0558f20SBarry Smith 
131*c0558f20SBarry Smith   /*
132*c0558f20SBarry Smith      Get pointers to vector data
133*c0558f20SBarry Smith   */
134*c0558f20SBarry Smith   ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr);
135*c0558f20SBarry Smith   ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr);
136*c0558f20SBarry Smith 
137*c0558f20SBarry Smith   /* -------------------------------------------------------------------------------------------------- */
138*c0558f20SBarry Smith   /*  ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */
139*c0558f20SBarry Smith   /*  export OMP_PROC_BIND="threads"  export OMP_PROC_BIND="spread" */
140*c0558f20SBarry Smith 
141*c0558f20SBarry Smith   if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1);
142*c0558f20SBarry Smith   if (!view_kokkos_configuration) {
143*c0558f20SBarry Smith     /* use private routine to turn off warnings about negative number of cores etc */
144*c0558f20SBarry Smith     Kokkos::Impl::pre_initialize(Kokkos::InitArguments(-1, -1, -1, true));
145*c0558f20SBarry Smith   }
146*c0558f20SBarry Smith   Kokkos::initialize( argc, argv );
147*c0558f20SBarry Smith   if (view_kokkos_configuration) {
148*c0558f20SBarry Smith     Kokkos::print_configuration(std::cout, true);
149*c0558f20SBarry Smith   }
150*c0558f20SBarry Smith 
151*c0558f20SBarry Smith   /* introduce a view object; reference like object  */
152*c0558f20SBarry Smith   Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs});
153*c0558f20SBarry Smith 
154*c0558f20SBarry Smith   PetscReal xpbase = xs*ctx.h;
155*c0558f20SBarry Smith   Kokkos:: parallel_for(Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j ) {
156*c0558f20SBarry Smith     PetscReal xp = xpbase + j*ctx.h;
157*c0558f20SBarry Smith     xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */
158*c0558f20SBarry Smith     xUU(j) = xp*xp*xp;
159*c0558f20SBarry Smith   });
160*c0558f20SBarry Smith 
161*c0558f20SBarry Smith   Kokkos::finalize();
162*c0558f20SBarry Smith 
163*c0558f20SBarry Smith   /*
164*c0558f20SBarry Smith      Restore vectors
165*c0558f20SBarry Smith   */
166*c0558f20SBarry Smith   ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr);
167*c0558f20SBarry Smith   ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr);
168*c0558f20SBarry Smith   if (viewinitial) {
169*c0558f20SBarry Smith     ierr = VecView(U,NULL);CHKERRQ(ierr);
170*c0558f20SBarry Smith     ierr = VecView(F,NULL);CHKERRQ(ierr);
171*c0558f20SBarry Smith   }
172*c0558f20SBarry Smith 
173*c0558f20SBarry Smith   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
174*c0558f20SBarry Smith      Evaluate initial guess; then solve nonlinear system
175*c0558f20SBarry Smith    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
176*c0558f20SBarry Smith 
177*c0558f20SBarry Smith   /*
178*c0558f20SBarry Smith      Note: The user should initialize the vector, x, with the initial guess
179*c0558f20SBarry Smith      for the nonlinear solver prior to calling SNESSolve().  In particular,
180*c0558f20SBarry Smith      to employ an initial guess of zero, the user should explicitly set
181*c0558f20SBarry Smith      this vector to zero by calling VecSet().
182*c0558f20SBarry Smith   */
183*c0558f20SBarry Smith   ierr = FormInitialGuess(x);CHKERRQ(ierr);
184*c0558f20SBarry Smith   ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr);
185*c0558f20SBarry Smith   ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
186*c0558f20SBarry Smith   ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr);
187*c0558f20SBarry Smith 
188*c0558f20SBarry Smith   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
189*c0558f20SBarry Smith      Check solution and clean up
190*c0558f20SBarry Smith    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
191*c0558f20SBarry Smith 
192*c0558f20SBarry Smith   /*
193*c0558f20SBarry Smith      Check the error
194*c0558f20SBarry Smith   */
195*c0558f20SBarry Smith   ierr = VecAXPY(x,none,U);CHKERRQ(ierr);
196*c0558f20SBarry Smith   ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
197*c0558f20SBarry Smith   ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr);
198*c0558f20SBarry Smith 
199*c0558f20SBarry Smith   /*
200*c0558f20SBarry Smith      Free work space.  All PETSc objects should be destroyed when they
201*c0558f20SBarry Smith      are no longer needed.
202*c0558f20SBarry Smith   */
203*c0558f20SBarry Smith   ierr = VecDestroy(&x);CHKERRQ(ierr);
204*c0558f20SBarry Smith   ierr = VecDestroy(&r);CHKERRQ(ierr);
205*c0558f20SBarry Smith   ierr = VecDestroy(&U);CHKERRQ(ierr);
206*c0558f20SBarry Smith   ierr = VecDestroy(&F);CHKERRQ(ierr);
207*c0558f20SBarry Smith   ierr = MatDestroy(&J);CHKERRQ(ierr);
208*c0558f20SBarry Smith   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
209*c0558f20SBarry Smith   ierr = DMDestroy(&ctx.da);CHKERRQ(ierr);
210*c0558f20SBarry Smith   ierr = PetscFinalize();
211*c0558f20SBarry Smith   return ierr;
212*c0558f20SBarry Smith }
213*c0558f20SBarry Smith 
214*c0558f20SBarry Smith /* ------------------------------------------------------------------- */
215*c0558f20SBarry Smith /*
216*c0558f20SBarry Smith    FormInitialGuess - Computes initial guess.
217*c0558f20SBarry Smith 
218*c0558f20SBarry Smith    Input/Output Parameter:
219*c0558f20SBarry Smith .  x - the solution vector
220*c0558f20SBarry Smith */
221*c0558f20SBarry Smith PetscErrorCode FormInitialGuess(Vec x)
222*c0558f20SBarry Smith {
223*c0558f20SBarry Smith   PetscErrorCode ierr;
224*c0558f20SBarry Smith   PetscScalar    pfive = .50;
225*c0558f20SBarry Smith 
226*c0558f20SBarry Smith   PetscFunctionBeginUser;
227*c0558f20SBarry Smith   ierr = VecSet(x,pfive);CHKERRQ(ierr);
228*c0558f20SBarry Smith   PetscFunctionReturn(0);
229*c0558f20SBarry Smith }
230*c0558f20SBarry Smith 
231*c0558f20SBarry Smith /* ------------------------------------------------------------------- */
232*c0558f20SBarry Smith /*
233*c0558f20SBarry Smith    FormFunction - Evaluates nonlinear function, F(x).
234*c0558f20SBarry Smith 
235*c0558f20SBarry Smith    Input Parameters:
236*c0558f20SBarry Smith .  snes - the SNES context
237*c0558f20SBarry Smith .  x - input vector
238*c0558f20SBarry Smith .  ctx - optional user-defined context, as set by SNESSetFunction()
239*c0558f20SBarry Smith 
240*c0558f20SBarry Smith    Output Parameter:
241*c0558f20SBarry Smith .  f - function vector
242*c0558f20SBarry Smith 
243*c0558f20SBarry Smith    Note:
244*c0558f20SBarry Smith    The user-defined context can contain any application-specific
245*c0558f20SBarry Smith    data needed for the function evaluation.
246*c0558f20SBarry Smith */
247*c0558f20SBarry Smith PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx)
248*c0558f20SBarry Smith {
249*c0558f20SBarry Smith   ApplicationCtx *user = (ApplicationCtx*) ctx;
250*c0558f20SBarry Smith   DM             da    = user->da;
251*c0558f20SBarry Smith   PetscScalar    *xx,*ff,*FF,d;
252*c0558f20SBarry Smith   PetscErrorCode ierr;
253*c0558f20SBarry Smith   PetscInt       i,M,xs,xm;
254*c0558f20SBarry Smith   Vec            xlocal;
255*c0558f20SBarry Smith 
256*c0558f20SBarry Smith   PetscFunctionBeginUser;
257*c0558f20SBarry Smith   ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr);
258*c0558f20SBarry Smith   /*
259*c0558f20SBarry Smith      Scatter ghost points to local vector, using the 2-step process
260*c0558f20SBarry Smith         DMGlobalToLocalBegin(), DMGlobalToLocalEnd().
261*c0558f20SBarry Smith      By placing code between these two statements, computations can
262*c0558f20SBarry Smith      be done while messages are in transition.
263*c0558f20SBarry Smith   */
264*c0558f20SBarry Smith   ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
265*c0558f20SBarry Smith   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
266*c0558f20SBarry Smith 
267*c0558f20SBarry Smith   /*
268*c0558f20SBarry Smith      Get pointers to vector data.
269*c0558f20SBarry Smith        - The vector xlocal includes ghost point; the vectors x and f do
270*c0558f20SBarry Smith          NOT include ghost points.
271*c0558f20SBarry Smith        - Using DMDAVecGetArray() allows accessing the values using global ordering
272*c0558f20SBarry Smith   */
273*c0558f20SBarry Smith   ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
274*c0558f20SBarry Smith   ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr);
275*c0558f20SBarry Smith   ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr);
276*c0558f20SBarry Smith 
277*c0558f20SBarry Smith   /*
278*c0558f20SBarry Smith      Get local grid boundaries (for 1-dimensional DMDA):
279*c0558f20SBarry Smith        xs, xm  - starting grid index, width of local grid (no ghost points)
280*c0558f20SBarry Smith   */
281*c0558f20SBarry Smith   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
282*c0558f20SBarry Smith   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
283*c0558f20SBarry Smith 
284*c0558f20SBarry Smith   /*
285*c0558f20SBarry Smith      Set function values for boundary points; define local interior grid point range:
286*c0558f20SBarry Smith         xsi - starting interior grid index
287*c0558f20SBarry Smith         xei - ending interior grid index
288*c0558f20SBarry Smith   */
289*c0558f20SBarry Smith   if (xs == 0) { /* left boundary */
290*c0558f20SBarry Smith     ff[0] = xx[0];
291*c0558f20SBarry Smith     xs++;xm--;
292*c0558f20SBarry Smith   }
293*c0558f20SBarry Smith   if (xs+xm == M) {  /* right boundary */
294*c0558f20SBarry Smith     ff[xs+xm-1] = xx[xs+xm-1] - 1.0;
295*c0558f20SBarry Smith     xm--;
296*c0558f20SBarry Smith   }
297*c0558f20SBarry Smith 
298*c0558f20SBarry Smith   /*
299*c0558f20SBarry Smith      Compute function over locally owned part of the grid (interior points only)
300*c0558f20SBarry Smith   */
301*c0558f20SBarry Smith   d = 1.0/(user->h*user->h);
302*c0558f20SBarry Smith   for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i];
303*c0558f20SBarry Smith 
304*c0558f20SBarry Smith   /*
305*c0558f20SBarry Smith      Restore vectors
306*c0558f20SBarry Smith   */
307*c0558f20SBarry Smith   ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
308*c0558f20SBarry Smith   ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr);
309*c0558f20SBarry Smith   ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr);
310*c0558f20SBarry Smith   ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
311*c0558f20SBarry Smith   PetscFunctionReturn(0);
312*c0558f20SBarry Smith }
313*c0558f20SBarry Smith 
314*c0558f20SBarry Smith /* ------------------------------------------------------------------- */
315*c0558f20SBarry Smith /*
316*c0558f20SBarry Smith    FormJacobian - Evaluates Jacobian matrix.
317*c0558f20SBarry Smith 
318*c0558f20SBarry Smith    Input Parameters:
319*c0558f20SBarry Smith .  snes - the SNES context
320*c0558f20SBarry Smith .  x - input vector
321*c0558f20SBarry Smith .  dummy - optional user-defined context (not used here)
322*c0558f20SBarry Smith 
323*c0558f20SBarry Smith    Output Parameters:
324*c0558f20SBarry Smith .  jac - Jacobian matrix
325*c0558f20SBarry Smith .  B - optionally different preconditioning matrix
326*c0558f20SBarry Smith .  flag - flag indicating matrix structure
327*c0558f20SBarry Smith */
328*c0558f20SBarry Smith PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx)
329*c0558f20SBarry Smith {
330*c0558f20SBarry Smith   ApplicationCtx *user = (ApplicationCtx*) ctx;
331*c0558f20SBarry Smith   PetscScalar    *xx,d,A[3];
332*c0558f20SBarry Smith   PetscErrorCode ierr;
333*c0558f20SBarry Smith   PetscInt       i,j[3],M,xs,xm;
334*c0558f20SBarry Smith   DM             da = user->da;
335*c0558f20SBarry Smith 
336*c0558f20SBarry Smith   PetscFunctionBeginUser;
337*c0558f20SBarry Smith   /*
338*c0558f20SBarry Smith      Get pointer to vector data
339*c0558f20SBarry Smith   */
340*c0558f20SBarry Smith   ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr);
341*c0558f20SBarry Smith   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
342*c0558f20SBarry Smith 
343*c0558f20SBarry Smith   /*
344*c0558f20SBarry Smith     Get range of locally owned matrix
345*c0558f20SBarry Smith   */
346*c0558f20SBarry Smith   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
347*c0558f20SBarry Smith 
348*c0558f20SBarry Smith   /*
349*c0558f20SBarry Smith      Determine starting and ending local indices for interior grid points.
350*c0558f20SBarry Smith      Set Jacobian entries for boundary points.
351*c0558f20SBarry Smith   */
352*c0558f20SBarry Smith 
353*c0558f20SBarry Smith   if (xs == 0) {  /* left boundary */
354*c0558f20SBarry Smith     i = 0; A[0] = 1.0;
355*c0558f20SBarry Smith 
356*c0558f20SBarry Smith     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
357*c0558f20SBarry Smith     xs++;xm--;
358*c0558f20SBarry Smith   }
359*c0558f20SBarry Smith   if (xs+xm == M) { /* right boundary */
360*c0558f20SBarry Smith     i    = M-1;
361*c0558f20SBarry Smith     A[0] = 1.0;
362*c0558f20SBarry Smith     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
363*c0558f20SBarry Smith     xm--;
364*c0558f20SBarry Smith   }
365*c0558f20SBarry Smith 
366*c0558f20SBarry Smith   /*
367*c0558f20SBarry Smith      Interior grid points
368*c0558f20SBarry Smith       - Note that in this case we set all elements for a particular
369*c0558f20SBarry Smith         row at once.
370*c0558f20SBarry Smith   */
371*c0558f20SBarry Smith   d = 1.0/(user->h*user->h);
372*c0558f20SBarry Smith   for (i=xs; i<xs+xm; i++) {
373*c0558f20SBarry Smith     j[0] = i - 1; j[1] = i; j[2] = i + 1;
374*c0558f20SBarry Smith     A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i];
375*c0558f20SBarry Smith     ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr);
376*c0558f20SBarry Smith   }
377*c0558f20SBarry Smith 
378*c0558f20SBarry Smith   /*
379*c0558f20SBarry Smith      Assemble matrix, using the 2-step process:
380*c0558f20SBarry Smith        MatAssemblyBegin(), MatAssemblyEnd().
381*c0558f20SBarry Smith      By placing code between these two statements, computations can be
382*c0558f20SBarry Smith      done while messages are in transition.
383*c0558f20SBarry Smith 
384*c0558f20SBarry Smith      Also, restore vector.
385*c0558f20SBarry Smith   */
386*c0558f20SBarry Smith 
387*c0558f20SBarry Smith   ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
388*c0558f20SBarry Smith   ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr);
389*c0558f20SBarry Smith   ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
390*c0558f20SBarry Smith 
391*c0558f20SBarry Smith   PetscFunctionReturn(0);
392*c0558f20SBarry Smith }
393*c0558f20SBarry Smith 
394*c0558f20SBarry Smith /*
395*c0558f20SBarry Smith      The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA.
396*c0558f20SBarry Smith 
397*c0558f20SBarry Smith      The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos
398*c0558f20SBarry Smith      assumes all memory it is given is allocated with cudaMallocHost() and can be used with
399*c0558f20SBarry Smith      cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory.
400*c0558f20SBarry Smith 
401*c0558f20SBarry Smith      The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi
402*c0558f20SBarry Smith */
403*c0558f20SBarry Smith 
404*c0558f20SBarry Smith /*TEST
405*c0558f20SBarry Smith 
406*c0558f20SBarry Smith    build:
407*c0558f20SBarry Smith      requires: kokkos
408*c0558f20SBarry Smith 
409*c0558f20SBarry Smith    test:
410*c0558f20SBarry Smith      requires: kokkos double !complex !single !cuda
411*c0558f20SBarry Smith      args: -view_initial -view_kokkos_configuration false -snes_monitor
412*c0558f20SBarry Smith      filter: grep -v type:
413*c0558f20SBarry Smith 
414*c0558f20SBarry Smith    test:
415*c0558f20SBarry Smith      suffix: 2
416*c0558f20SBarry Smith      requires: kokkos double !complex !single cuda
417*c0558f20SBarry Smith      args: -dm_vec_type cuda -vec_pinned_memory_min 0 -use_gpu_aware_mpi 0 -view_initial -view_kokkos_configuration false  -snes_monitor
418*c0558f20SBarry Smith      output_file: output/ex3k_1.out
419*c0558f20SBarry Smith      filter: grep -v type:
420*c0558f20SBarry Smith 
421*c0558f20SBarry Smith    test:
422*c0558f20SBarry Smith      suffix: 3
423*c0558f20SBarry Smith      requires: kokkos double !complex !single defined(PETSC_HAVE_cusparseCreateSolveAnalysisInfo) cuda
424*c0558f20SBarry Smith      nsize: 2
425*c0558f20SBarry Smith      args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -use_gpu_aware_mpi 0 -view_initial -view_kokkos_configuration false  -snes_monitor
426*c0558f20SBarry Smith      output_file: output/ex3k_1.out
427*c0558f20SBarry Smith      filter: grep -v type:
428*c0558f20SBarry Smith 
429*c0558f20SBarry Smith TEST*/
430