1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscScalar pfive = .50; 25 26 PetscFunctionBeginUser; 27 CHKERRQ(VecSet(x,pfive)); 28 PetscFunctionReturn(0); 29 } 30 31 /* ------------------------------------------------------------------- */ 32 /* 33 CpuFunction - Evaluates nonlinear function, F(x) on CPU 34 35 Input Parameters: 36 . snes - the SNES context 37 . x - input vector 38 . ctx - optional user-defined context, as set by SNESSetFunction() 39 40 Output Parameter: 41 . r - function vector 42 43 Note: 44 The user-defined context can contain any application-specific 45 data needed for the function evaluation. 46 */ 47 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx) 48 { 49 ApplicationCtx *user = (ApplicationCtx*) ctx; 50 DM da = user->da; 51 PetscScalar *X,*R,*F,d; 52 PetscInt i,M,xs,xm; 53 Vec xl; 54 55 PetscFunctionBeginUser; 56 CHKERRQ(DMGetLocalVector(da,&xl)); 57 CHKERRQ(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 58 CHKERRQ(DMDAVecGetArray(da,xl,&X)); 59 CHKERRQ(DMDAVecGetArray(da,r,&R)); 60 CHKERRQ(DMDAVecGetArray(da,user->F,&F)); 61 62 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 63 CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 64 65 if (xs == 0) { /* left boundary */ 66 R[0] = X[0]; 67 xs++;xm--; 68 } 69 if (xs+xm == M) { /* right boundary */ 70 R[xs+xm-1] = X[xs+xm-1] - 1.0; 71 xm--; 72 } 73 d = 1.0/(user->h*user->h); 74 for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i]; 75 76 CHKERRQ(DMDAVecRestoreArray(da,xl,&X)); 77 CHKERRQ(DMDAVecRestoreArray(da,r,&R)); 78 CHKERRQ(DMDAVecRestoreArray(da,user->F,&F)); 79 CHKERRQ(DMRestoreLocalVector(da,&xl)); 80 PetscFunctionReturn(0); 81 } 82 83 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 84 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 85 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>; 86 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>; 87 88 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx) 89 { 90 ApplicationCtx *user = (ApplicationCtx*) ctx; 91 DM da = user->da; 92 PetscScalar d; 93 PetscInt M; 94 Vec xl; 95 PetscScalarKokkosOffsetView R; 96 ConstPetscScalarKokkosOffsetView X,F; 97 98 PetscFunctionBeginUser; 99 CHKERRQ(DMGetLocalVector(da,&xl)); 100 CHKERRQ(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 101 d = 1.0/(user->h*user->h); 102 CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 103 CHKERRQ(DMDAVecGetKokkosOffsetView(da,xl,&X)); /* read only */ 104 CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(da,r,&R)); /* write only */ 105 CHKERRQ(DMDAVecGetKokkosOffsetView(da,user->F,&F)); /* read only */ 106 Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) { 107 if (i == 0) R(0) = X(0); /* left boundary */ 108 else if (i == M-1) R(i) = X(i) - 1.0; /* right boundary */ 109 else R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */ 110 }); 111 CHKERRQ(DMDAVecRestoreKokkosOffsetView(da,xl,&X)); 112 CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R)); 113 CHKERRQ(DMDAVecRestoreKokkosOffsetView(da,user->F,&F)); 114 CHKERRQ(DMRestoreLocalVector(da,&xl)); 115 PetscFunctionReturn(0); 116 } 117 118 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx) 119 { 120 ApplicationCtx *user = (ApplicationCtx*) ctx; 121 DM da = user->da; 122 Vec rk; 123 PetscReal norm=0; 124 125 PetscFunctionBeginUser; 126 CHKERRQ(DMGetGlobalVector(da,&rk)); 127 CHKERRQ(CpuFunction(snes,x,r,ctx)); 128 CHKERRQ(KokkosFunction(snes,x,rk,ctx)); 129 CHKERRQ(VecAXPY(rk,-1.0,r)); 130 CHKERRQ(VecNorm(rk,NORM_2,&norm)); 131 CHKERRQ(DMRestoreGlobalVector(da,&rk)); 132 PetscCheckFalse(norm > 1e-6,PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g",norm); 133 PetscFunctionReturn(0); 134 } 135 /* ------------------------------------------------------------------- */ 136 /* 137 FormJacobian - Evaluates Jacobian matrix. 138 139 Input Parameters: 140 . snes - the SNES context 141 . x - input vector 142 . dummy - optional user-defined context (not used here) 143 144 Output Parameters: 145 . jac - Jacobian matrix 146 . B - optionally different preconditioning matrix 147 . flag - flag indicating matrix structure 148 */ 149 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 150 { 151 ApplicationCtx *user = (ApplicationCtx*) ctx; 152 PetscScalar *xx,d,A[3]; 153 PetscInt i,j[3],M,xs,xm; 154 DM da = user->da; 155 156 PetscFunctionBeginUser; 157 /* 158 Get pointer to vector data 159 */ 160 CHKERRQ(DMDAVecGetArrayRead(da,x,&xx)); 161 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 162 163 /* 164 Get range of locally owned matrix 165 */ 166 CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 167 168 /* 169 Determine starting and ending local indices for interior grid points. 170 Set Jacobian entries for boundary points. 171 */ 172 173 if (xs == 0) { /* left boundary */ 174 i = 0; A[0] = 1.0; 175 176 CHKERRQ(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 177 xs++;xm--; 178 } 179 if (xs+xm == M) { /* right boundary */ 180 i = M-1; 181 A[0] = 1.0; 182 CHKERRQ(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 183 xm--; 184 } 185 186 /* 187 Interior grid points 188 - Note that in this case we set all elements for a particular 189 row at once. 190 */ 191 d = 1.0/(user->h*user->h); 192 for (i=xs; i<xs+xm; i++) { 193 j[0] = i - 1; j[1] = i; j[2] = i + 1; 194 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 195 CHKERRQ(MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES)); 196 } 197 198 /* 199 Assemble matrix, using the 2-step process: 200 MatAssemblyBegin(), MatAssemblyEnd(). 201 By placing code between these two statements, computations can be 202 done while messages are in transition. 203 204 Also, restore vector. 205 */ 206 207 CHKERRQ(MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY)); 208 CHKERRQ(DMDAVecRestoreArrayRead(da,x,&xx)); 209 CHKERRQ(MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY)); 210 211 PetscFunctionReturn(0); 212 } 213 214 int main(int argc,char **argv) 215 { 216 SNES snes; /* SNES context */ 217 Mat J; /* Jacobian matrix */ 218 ApplicationCtx ctx; /* user-defined context */ 219 Vec x,r,U,F; /* vectors */ 220 PetscScalar none = -1.0; 221 PetscInt its,N = 5,maxit,maxf; 222 PetscReal abstol,rtol,stol,norm; 223 PetscBool viewinitial = PETSC_FALSE; 224 225 CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help)); 226 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL)); 227 ctx.h = 1.0/(N-1); 228 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL)); 229 230 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 231 Create nonlinear solver context 232 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 233 CHKERRQ(SNESCreate(PETSC_COMM_WORLD,&snes)); 234 235 /* 236 Create distributed array (DMDA) to manage parallel grid and vectors 237 */ 238 CHKERRQ(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da)); 239 CHKERRQ(DMSetFromOptions(ctx.da)); 240 CHKERRQ(DMSetUp(ctx.da)); 241 242 /* 243 Extract global and local vectors from DMDA; then duplicate for remaining 244 vectors that are the same types 245 */ 246 CHKERRQ(DMCreateGlobalVector(ctx.da,&x)); 247 CHKERRQ(PetscObjectSetName((PetscObject)x,"Approximate Solution")); 248 CHKERRQ(VecDuplicate(x,&r)); 249 CHKERRQ(VecDuplicate(x,&F)); ctx.F = F; 250 CHKERRQ(PetscObjectSetName((PetscObject)F,"Forcing function")); 251 CHKERRQ(VecDuplicate(x,&U)); 252 CHKERRQ(PetscObjectSetName((PetscObject)U,"Exact Solution")); 253 254 /* 255 Set function evaluation routine and vector. Whenever the nonlinear 256 solver needs to compute the nonlinear function, it will call this 257 routine. 258 - Note that the final routine argument is the user-defined 259 context that provides application-specific data for the 260 function evaluation routine. 261 262 At the beginning, one can use a stub function that checks the Kokkos version 263 against the CPU version to quickly expose errors. 264 CHKERRQ(SNESSetFunction(snes,r,StubFunction,&ctx)); 265 */ 266 CHKERRQ(SNESSetFunction(snes,r,KokkosFunction,&ctx)); 267 268 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 269 Create matrix data structure; set Jacobian evaluation routine 270 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 271 CHKERRQ(DMCreateMatrix(ctx.da,&J)); 272 273 /* 274 Set Jacobian matrix data structure and default Jacobian evaluation 275 routine. Whenever the nonlinear solver needs to compute the 276 Jacobian matrix, it will call this routine. 277 - Note that the final routine argument is the user-defined 278 context that provides application-specific data for the 279 Jacobian evaluation routine. 280 */ 281 CHKERRQ(SNESSetJacobian(snes,J,J,FormJacobian,&ctx)); 282 CHKERRQ(SNESSetFromOptions(snes)); 283 CHKERRQ(SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf)); 284 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf)); 285 286 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 287 Initialize application: 288 Store forcing function of PDE and exact solution 289 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 290 { 291 PetscScalarKokkosOffsetView FF,UU; 292 CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF)); 293 CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU)); 294 Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) { 295 PetscReal xp = i*ctx.h; 296 FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 297 UU(i) = xp*xp*xp; 298 }); 299 CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF)); 300 CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU)); 301 } 302 303 if (viewinitial) { 304 CHKERRQ(VecView(U,NULL)); 305 CHKERRQ(VecView(F,NULL)); 306 } 307 308 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 309 Evaluate initial guess; then solve nonlinear system 310 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 311 312 /* 313 Note: The user should initialize the vector, x, with the initial guess 314 for the nonlinear solver prior to calling SNESSolve(). In particular, 315 to employ an initial guess of zero, the user should explicitly set 316 this vector to zero by calling VecSet(). 317 */ 318 CHKERRQ(FormInitialGuess(x)); 319 CHKERRQ(SNESSolve(snes,NULL,x)); 320 CHKERRQ(SNESGetIterationNumber(snes,&its)); 321 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its)); 322 323 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 324 Check solution and clean up 325 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 326 /* 327 Check the error 328 */ 329 CHKERRQ(VecAXPY(x,none,U)); 330 CHKERRQ(VecNorm(x,NORM_2,&norm)); 331 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its)); 332 333 /* 334 Free work space. All PETSc objects should be destroyed when they 335 are no longer needed. 336 */ 337 CHKERRQ(VecDestroy(&x)); 338 CHKERRQ(VecDestroy(&r)); 339 CHKERRQ(VecDestroy(&U)); 340 CHKERRQ(VecDestroy(&F)); 341 CHKERRQ(MatDestroy(&J)); 342 CHKERRQ(SNESDestroy(&snes)); 343 CHKERRQ(DMDestroy(&ctx.da)); 344 CHKERRQ(PetscFinalize()); 345 return 0; 346 } 347 348 /*TEST 349 350 build: 351 requires: kokkos_kernels 352 353 test: 354 requires: kokkos_kernels !complex !single 355 nsize: 2 356 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 357 output_file: output/ex3k_1.out 358 359 TEST*/ 360