diff --git a/README.md b/README.md index 43f6548..e21c7cd 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen ############################################################################### # ODE solvers, callbacks etc. -ode = semidiscretize_gpu(semi, (0.0, 1.0)) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, (0.0, 1.0)) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/advection_basic_1d.jl b/examples/advection_basic_1d.jl index 2a31644..6de0fda 100644 --- a/examples/advection_basic_1d.jl +++ b/examples/advection_basic_1d.jl @@ -28,7 +28,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize_gpu(semi, (0.0, 1.0)) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, (0.0, 1.0)) # from TrixiCUDA.jl # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/examples/advection_basic_2d.jl b/examples/advection_basic_2d.jl index 5e7b098..e9b6b0f 100644 --- a/examples/advection_basic_2d.jl +++ b/examples/advection_basic_2d.jl @@ -28,7 +28,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize_gpu(semi, (0.0, 1.0)) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, (0.0, 1.0)) # from TrixiCUDA.jl # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/examples/advection_basic_3d.jl b/examples/advection_basic_3d.jl index e716835..c0962f9 100644 --- a/examples/advection_basic_3d.jl +++ b/examples/advection_basic_3d.jl @@ -28,7 +28,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergen # ODE solvers, callbacks etc. # Create ODE problem with time span from 0.0 to 1.0 -ode = semidiscretize_gpu(semi, (0.0, 1.0)) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, (0.0, 1.0)) # from TrixiCUDA.jl # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup # and resets the timers diff --git a/examples/advection_mortar_2d.jl b/examples/advection_mortar_2d.jl index 4c17a51..e18e8a3 100644 --- a/examples/advection_mortar_2d.jl +++ b/examples/advection_mortar_2d.jl @@ -27,7 +27,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 1.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/advection_mortar_3d.jl b/examples/advection_mortar_3d.jl index 2c7c71f..1de239e 100644 --- a/examples/advection_mortar_3d.jl +++ b/examples/advection_mortar_3d.jl @@ -30,7 +30,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 5.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_ec_1d.jl b/examples/euler_ec_1d.jl index 5a32374..b00045d 100644 --- a/examples/euler_ec_1d.jl +++ b/examples/euler_ec_1d.jl @@ -25,7 +25,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 0.4) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_ec_2d.jl b/examples/euler_ec_2d.jl index 1dab39e..b90b48c 100644 --- a/examples/euler_ec_2d.jl +++ b/examples/euler_ec_2d.jl @@ -27,7 +27,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 0.4) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_ec_3d.jl b/examples/euler_ec_3d.jl index 70c118e..9894557 100644 --- a/examples/euler_ec_3d.jl +++ b/examples/euler_ec_3d.jl @@ -26,7 +26,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 0.4) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_shockcapturing_1d.jl b/examples/euler_shockcapturing_1d.jl index 03db761..bf7fb1b 100644 --- a/examples/euler_shockcapturing_1d.jl +++ b/examples/euler_shockcapturing_1d.jl @@ -35,7 +35,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 1.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_shockcapturing_2d.jl b/examples/euler_shockcapturing_2d.jl index 2eae8ed..8da8c79 100644 --- a/examples/euler_shockcapturing_2d.jl +++ b/examples/euler_shockcapturing_2d.jl @@ -35,7 +35,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 1.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_shockcapturing_3d.jl b/examples/euler_shockcapturing_3d.jl index 457105e..fef1328 100644 --- a/examples/euler_shockcapturing_3d.jl +++ b/examples/euler_shockcapturing_3d.jl @@ -37,7 +37,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 0.4) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_source_terms_1d.jl b/examples/euler_source_terms_1d.jl index 8389c7a..e4ef214 100644 --- a/examples/euler_source_terms_1d.jl +++ b/examples/euler_source_terms_1d.jl @@ -27,7 +27,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 2.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_source_terms_2d.jl b/examples/euler_source_terms_2d.jl index e28153c..fe02312 100644 --- a/examples/euler_source_terms_2d.jl +++ b/examples/euler_source_terms_2d.jl @@ -24,7 +24,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 2.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/euler_source_terms_3d.jl b/examples/euler_source_terms_3d.jl index b6a403c..c3fac7b 100644 --- a/examples/euler_source_terms_3d.jl +++ b/examples/euler_source_terms_3d.jl @@ -26,7 +26,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 5.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/eulermulti_ec_1d.jl b/examples/eulermulti_ec_1d.jl index 03fd861..498692e 100644 --- a/examples/eulermulti_ec_1d.jl +++ b/examples/eulermulti_ec_1d.jl @@ -26,7 +26,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 0.4) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/eulermulti_ec_2d.jl b/examples/eulermulti_ec_2d.jl index 0db5c01..55b4e03 100644 --- a/examples/eulermulti_ec_2d.jl +++ b/examples/eulermulti_ec_2d.jl @@ -26,7 +26,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solvers, callbacks etc. tspan = (0.0, 0.4) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/hypdiff_nonperiodic_1d.jl b/examples/hypdiff_nonperiodic_1d.jl index b4b1b58..b418676 100644 --- a/examples/hypdiff_nonperiodic_1d.jl +++ b/examples/hypdiff_nonperiodic_1d.jl @@ -29,7 +29,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 5.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/hypdiff_nonperiodic_2d.jl b/examples/hypdiff_nonperiodic_2d.jl index 622e5e2..a47f44c 100644 --- a/examples/hypdiff_nonperiodic_2d.jl +++ b/examples/hypdiff_nonperiodic_2d.jl @@ -32,7 +32,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 5.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/hypdiff_nonperiodic_3d.jl b/examples/hypdiff_nonperiodic_3d.jl index 03afb06..f19e364 100644 --- a/examples/hypdiff_nonperiodic_3d.jl +++ b/examples/hypdiff_nonperiodic_3d.jl @@ -33,7 +33,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 5.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/shallowwater_dirichlet_1d.jl b/examples/shallowwater_dirichlet_1d.jl index 609287a..bd78b0b 100644 --- a/examples/shallowwater_dirichlet_1d.jl +++ b/examples/shallowwater_dirichlet_1d.jl @@ -39,7 +39,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 1.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/shallowwater_dirichlet_2d.jl b/examples/shallowwater_dirichlet_2d.jl index baedfdf..f79c478 100644 --- a/examples/shallowwater_dirichlet_2d.jl +++ b/examples/shallowwater_dirichlet_2d.jl @@ -39,7 +39,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 1.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/shallowwater_ec_1d.jl b/examples/shallowwater_ec_1d.jl index c06d6fb..6fba164 100644 --- a/examples/shallowwater_ec_1d.jl +++ b/examples/shallowwater_ec_1d.jl @@ -65,7 +65,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solver tspan = (0.0, 2.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl ############################################################################### # Callbacks diff --git a/examples/shallowwater_ec_2d.jl b/examples/shallowwater_ec_2d.jl index fa60e2a..3116031 100644 --- a/examples/shallowwater_ec_2d.jl +++ b/examples/shallowwater_ec_2d.jl @@ -37,7 +37,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) # ODE solver tspan = (0.0, 2.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl ############################################################################### # Workaround to set a discontinuous bottom topography and initial condition for debugging and testing. diff --git a/examples/shallowwater_source_terms_1d.jl b/examples/shallowwater_source_terms_1d.jl index fc1ba40..b00faca 100644 --- a/examples/shallowwater_source_terms_1d.jl +++ b/examples/shallowwater_source_terms_1d.jl @@ -36,7 +36,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 1.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/examples/shallowwater_source_terms_2d.jl b/examples/shallowwater_source_terms_2d.jl index 272c5cd..78c24bd 100644 --- a/examples/shallowwater_source_terms_2d.jl +++ b/examples/shallowwater_source_terms_2d.jl @@ -36,7 +36,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 1.0) -ode = semidiscretize_gpu(semi, tspan) # from TrixiCUDA.jl +ode = semidiscretizeGPU(semi, tspan) # from TrixiCUDA.jl summary_callback = SummaryCallback() diff --git a/src/TrixiCUDA.jl b/src/TrixiCUDA.jl index 7d551e7..57c87ef 100644 --- a/src/TrixiCUDA.jl +++ b/src/TrixiCUDA.jl @@ -3,16 +3,20 @@ module TrixiCUDA # Include other packages that are used in TrixiCUDA.jl # using Reexport: @reexport +using CUDA using CUDA: @cuda, CuArray, HostKernel, threadIdx, blockIdx, blockDim, similar, launch_configuration -using Trixi: AbstractEquations, TreeMesh, DGSEM, +using Trixi: AbstractEquations, True, False, + TreeMesh, DGSEM, BoundaryConditionPeriodic, SemidiscretizationHyperbolic, VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG, - flux, ntuple, nvariables, - True, False, + LobattoLegendreMortarL2, + flux, ntuple, nvariables, nnodes, nelements, nmortars, + local_leaf_cells, init_elements, init_interfaces, init_boundaries, init_mortars, wrap_array, compute_coefficients, have_nonconservative_terms, boundary_condition_periodic, + digest_boundary_conditions, check_periodicity_mesh_boundary_conditions, set_log_type!, set_sqrt_type! import Trixi: get_node_vars, get_node_coords, get_surface_node_vars @@ -25,12 +29,15 @@ using StaticArrays: SVector # Include other source files include("auxiliary/auxiliary.jl") +include("semidiscretization/semidiscretization.jl") include("solvers/solvers.jl") +# Change to use the Base.log and Base.sqrt - need to be fixed to avoid outputs set_log_type!("log_Base") set_sqrt_type!("sqrt_Base") # Export the public APIs -export semidiscretize_gpu +export SemidiscretizationHyperbolicGPU +export semidiscretizeGPU end diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 65c446f..0bad5a9 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -1,2 +1,2 @@ include("configurators.jl") -include("helpers.jl") +include("stable.jl") diff --git a/src/auxiliary/helpers.jl b/src/auxiliary/stable.jl similarity index 74% rename from src/auxiliary/helpers.jl rename to src/auxiliary/stable.jl index c099e1a..cdf1c39 100644 --- a/src/auxiliary/helpers.jl +++ b/src/auxiliary/stable.jl @@ -1,18 +1,23 @@ -# Some helper functions and function extensions from Trixi.jl. +# Some helper functions and function extensions that are invoked by GPU +# kernels, mainly to ensure the functions themselves or other functions +# remain stable on the GPU. -# Ref: `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl +# See also `get_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl +# `DG` type is not stable on GPU @inline function get_node_vars(u, equations, indices...) return SVector(ntuple(@inline(v->u[v, indices...]), Val(nvariables(equations)))) end -# Ref: `get_node_coords(x, equations, solver::DG, indices...)` in Trixi.jl +# See also `get_node_coords(x, equations, solver::DG, indices...)` in Trixi.jl +# `DG` type is not stable on GPU @inline function get_node_coords(x, equations, indices...) return SVector(ntuple(@inline(idx->x[idx, indices...]), Val(ndims(equations)))) end -# Ref: `get_surface_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl +# See also `get_surface_node_vars(u, equations, solver::DG, indices...)` in Trixi.jl +# `DG` type is not stable on GPU @inline function get_surface_node_vars(u, equations, indices...) u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), Val(nvariables(equations)))) diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl new file mode 100644 index 0000000..11391c3 --- /dev/null +++ b/src/semidiscretization/semidiscretization.jl @@ -0,0 +1 @@ +include("semidiscretization_hyperbolic.jl") diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl new file mode 100644 index 0000000..a924b43 --- /dev/null +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -0,0 +1,27 @@ +# This file is part of the package `Semidiscretizations.jl`. + +function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver; + source_terms = nothing, + boundary_conditions = boundary_condition_periodic, + # `RealT` is used as real type for node locations etc. + # while `uEltype` is used as element type of solutions etc. + RealT = real(solver), uEltype = RealT, + initial_cache = NamedTuple()) + @assert ndims(mesh) == ndims(equations) + + cache = (; create_cache_gpu(mesh, equations, solver, RealT, uEltype)..., + initial_cache...) + _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, + cache) + + check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) + + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), + typeof(initial_condition), + typeof(_boundary_conditions), typeof(source_terms), + typeof(solver), typeof(cache)}(mesh, equations, + initial_condition, + _boundary_conditions, + source_terms, solver, + cache) +end diff --git a/src/solvers/cache.jl b/src/solvers/cache.jl new file mode 100644 index 0000000..01fcd69 --- /dev/null +++ b/src/solvers/cache.jl @@ -0,0 +1,179 @@ +# Rewrite `create_cache` in Trixi.jl to add the specialized parts of the arrays +# required to copy the arrays from CPU to GPU. + +# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently +# experimental in Trixi.jl and it is not implemented here. + +# Create cache for general tree mesh +function create_cache_gpu(mesh, equations, + volume_integral::VolumeIntegralWeakForm, dg::DGSEM, + uEltype, cache) + NamedTuple() +end + +# Create cache specialized for 1D tree mesh +function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltype) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) + + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + + cache = (; elements, interfaces, boundaries) + + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...) + + return cache +end + +function create_cache_gpu(mesh::TreeMesh{1}, equations, + volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, + uEltype, cache) + NamedTuple() +end + +function create_cache_gpu(mesh::TreeMesh{1}, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, + uEltype, cache) + fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements)) + fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements)) + + cache = create_cache_gpu(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype, cache) + + # Remove `element_ids_dg` and `element_ids_dgfv` here + return (; cache..., fstar1_L, fstar1_R) +end + +# Create cache specialized for 2D tree mesh +function create_cache_gpu(mesh::TreeMesh{2}, equations, + dg::DGSEM, RealT, uEltype) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) + + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + + mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + + cache = (; elements, interfaces, boundaries, mortars) + + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...) + cache = (; cache..., create_cache_gpu(mesh, equations, dg.mortar, uEltype, cache)...) + + return cache +end + +function create_cache_gpu(mesh::TreeMesh{2}, equations, + volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, + uEltype, cache) + NamedTuple() +end + +function create_cache_gpu(mesh::TreeMesh{2}, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, + uEltype, cache) + fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg), + nelements(cache.elements)) + fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg), + nelements(cache.elements)) + fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1, + nelements(cache.elements)) + fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1, + nelements(cache.elements)) + + cache = create_cache_gpu(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype, cache) + + return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R) +end + +function create_cache_gpu(mesh::TreeMesh{2}, equations, + mortar_l2::LobattoLegendreMortarL2, uEltype, cache) + fstar_upper = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nmortars(cache.mortars)) + fstar_lower = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nmortars(cache.mortars)) + + (; fstar_upper, fstar_lower) +end + +# Create cache specialized for 3D tree mesh +function create_cache_gpu(mesh::TreeMesh{3}, equations, + dg::DGSEM, RealT, uEltype) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) + + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + + mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + + cache = (; elements, interfaces, boundaries, mortars) + + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...) + cache = (; cache..., create_cache_gpu(mesh, equations, dg.mortar, uEltype, cache)...) + + return cache +end + +function create_cache_gpu(mesh::TreeMesh{3}, equations, + volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, + uEltype, cache) + NamedTuple() +end + +function create_cache_gpu(mesh::TreeMesh{3}, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, + uEltype, cache) + fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg), + nnodes(dg), nelements(cache.elements)) + fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg), + nnodes(dg), nelements(cache.elements)) + fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1, + nnodes(dg), nelements(cache.elements)) + fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1, + nnodes(dg), nelements(cache.elements)) + fstar3_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg), + nnodes(dg) + 1, nelements(cache.elements)) + fstar3_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg), + nnodes(dg) + 1, nelements(cache.elements)) + + cache = create_cache_gpu(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype, cache) + + return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R) +end + +function create_cache_gpu(mesh::TreeMesh{3}, equations, + mortar_l2::LobattoLegendreMortarL2, uEltype, cache) + fstar_upper_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2), nmortars(cache.mortars)) + fstar_upper_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2), nmortars(cache.mortars)) + fstar_lower_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2), nmortars(cache.mortars)) + fstar_lower_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nnodes(mortar_l2), nmortars(cache.mortars)) + + # Temporary arrays can also be created here + (; fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right) +end diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 2fbc329..ebbc075 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -604,6 +604,9 @@ end # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) # and run them on the device (i.e., GPU). +# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently experimental +# in Trixi.jl and it is not implemented here. + # Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) @@ -697,7 +700,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 - atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl + atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl element_ids_dg = zero(CuArray{Int64}(undef, length(alpha))) element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha))) @@ -716,8 +719,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) @@ -766,7 +769,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 - atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl + atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl element_ids_dg = zero(CuArray{Int64}(undef, length(alpha))) element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha))) @@ -786,8 +789,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: noncons_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) @@ -1090,7 +1093,7 @@ end # Put everything together into a single function. -# Ref: `rhs!` function in Trixi.jl +# See also `rhs!` function in Trixi.jl function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, boundary_conditions, source_terms::Source, dg::DGSEM, cache) where {Source} du, u = copy_to_device!(du_cpu, u_cpu) diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 966cf1c..bb3eb90 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -1003,6 +1003,9 @@ end # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) # and run them on the device (i.e., GPU). +# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently experimental +# in Trixi.jl and it is not implemented here. + # Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) @@ -1119,7 +1122,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 - atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl + atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl element_ids_dg = zero(CuArray{Int64}(undef, length(alpha))) element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha))) @@ -1141,10 +1144,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: size(u, 4)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) - fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R + fstar2_L = cache.fstar2_L + fstar2_R = cache.fstar2_R size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4)) @@ -1202,7 +1205,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 - atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl + atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl element_ids_dg = zero(CuArray{Int64}(undef, length(alpha))) element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha))) @@ -1228,10 +1231,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: size(u, 4)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) - fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R + fstar2_L = cache.fstar2_L + fstar2_R = cache.fstar2_R size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4)) @@ -1591,8 +1594,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) tmp_surface_flux_values = zero(similar(surface_flux_values)) - fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) - fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + fstar_upper = cache.fstar_upper + fstar_lower = cache.fstar_lower size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations)) @@ -1642,8 +1645,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) tmp_surface_flux_values = zero(similar(surface_flux_values)) - fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) - fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + fstar_upper = cache.fstar_upper + fstar_lower = cache.fstar_lower size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations)) @@ -1729,7 +1732,7 @@ end # Put everything together into a single function. -# Ref: `rhs!` function in Trixi.jl +# See also `rhs!` function in Trixi.jl function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{2}, equations, boundary_conditions, source_terms::Source, dg::DGSEM, cache) where {Source} du, u = copy_to_device!(du_cpu, u_cpu) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index cc2853e..501f894 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -1311,6 +1311,9 @@ end # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) # and run them on the device (i.e., GPU). +# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently experimental +# in Trixi.jl and it is not implemented here. + # Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) @@ -1445,7 +1448,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 - atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl + atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl element_ids_dg = zero(CuArray{Int64}(undef, length(alpha))) element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha))) @@ -1469,18 +1472,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: size(u, 2), size(u, 5)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2), - size(u, 5))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2), - size(u, 5))) - fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2), - size(u, 5))) - fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2), - size(u, 5))) - fstar3_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1, - size(u, 5))) - fstar3_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1, - size(u, 5))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R + fstar2_L = cache.fstar2_L + fstar2_R = cache.fstar2_R + fstar3_L = cache.fstar3_L + fstar3_R = cache.fstar3_R size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5)) @@ -1543,7 +1540,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 - atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl + atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl element_ids_dg = zero(CuArray{Int64}(undef, length(alpha))) element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha))) @@ -1573,18 +1570,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: size(u, 2), size(u, 5)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2), - size(u, 5))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 2), - size(u, 5))) - fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2), - size(u, 5))) - fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 2), - size(u, 5))) - fstar3_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1, - size(u, 5))) - fstar3_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2) + 1, - size(u, 5))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R + fstar2_L = cache.fstar2_L + fstar2_R = cache.fstar2_R + fstar3_L = cache.fstar3_L + fstar3_R = cache.fstar3_R size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5)) @@ -2004,14 +1995,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero - fstar_upper_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) - fstar_upper_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) - fstar_lower_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) - fstar_lower_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) + fstar_upper_left = cache.fstar_upper_left + fstar_upper_right = cache.fstar_upper_right + fstar_lower_left = cache.fstar_lower_left + fstar_lower_right = cache.fstar_lower_right size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4), length(orientations)) @@ -2096,14 +2083,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero - fstar_upper_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) - fstar_upper_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) - fstar_lower_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) - fstar_lower_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), - size(u_upper_left, 4), length(orientations)) + fstar_upper_left = cache.fstar_upper_left + fstar_upper_right = cache.fstar_upper_right + fstar_lower_left = cache.fstar_lower_left + fstar_lower_right = cache.fstar_lower_right size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4), length(orientations)) @@ -2222,7 +2205,7 @@ end # Put everything together into a single function. -# Ref: `rhs!` function in Trixi.jl +# See also `rhs!` function in Trixi.jl function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{3}, equations, boundary_conditions, source_terms::Source, dg::DGSEM, cache) where {Source} du, u = copy_to_device!(du_cpu, u_cpu) diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl index 26db714..46474bd 100644 --- a/src/solvers/solvers.jl +++ b/src/solvers/solvers.jl @@ -1,9 +1,10 @@ +include("cache.jl") include("common.jl") include("dg_1d.jl") include("dg_2d.jl") include("dg_3d.jl") -# Ref: `rhs!` function in Trixi.jl +# See also `rhs!` function in Trixi.jl function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi @@ -15,8 +16,8 @@ function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) return nothing end -# Ref: `semidiscretize` function in Trixi.jl -function semidiscretize_gpu(semi::SemidiscretizationHyperbolic, tspan) +# See also `semidiscretize` function in Trixi.jl +function semidiscretizeGPU(semi::SemidiscretizationHyperbolic, tspan) u0_ode = compute_coefficients(first(tspan), semi) iip = true diff --git a/test/test_script.jl b/test/test_script.jl index 4bf5eac..377d1fb 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -1,12 +1,13 @@ include("test_trixicuda.jl") -equations = IdealGlmMhdEquations1D(1.4) +equations = CompressibleEulerEquations3D(1.4) initial_condition = initial_condition_weak_blast_wave -surface_flux = (flux_hindenlang_gassner, flux_nonconservative_powell) -volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell) -polydeg = 4 +surface_flux = flux_ranocha # OBS! Using a non-dissipative flux is only sensible to test EC, +# but not for real shock simulations +volume_flux = flux_ranocha +polydeg = 3 basis = LobattoLegendreBasis(polydeg) indicator_sc = IndicatorHennemannGassner(equations, basis, alpha_max = 0.5, @@ -16,28 +17,27 @@ indicator_sc = IndicatorHennemannGassner(equations, basis, volume_integral = VolumeIntegralShockCapturingHG(indicator_sc; volume_flux_dg = volume_flux, volume_flux_fv = surface_flux) +solver = DGSEM(basis, surface_flux, volume_integral) -solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux, - volume_integral = volume_integral) - -coordinates_min = -2.0 -coordinates_max = 2.0 +coordinates_min = (-2.0, -2.0, -2.0) +coordinates_max = (2.0, 2.0, 2.0) mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 3, - n_cells_max = 10_000) + n_cells_max = 100_000) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) +semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver) -tspan = (0.0, 1.0) +tspan = (0.0, 5.0) # Get CPU data (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi # Get GPU data -equations_gpu = deepcopy(equations) -mesh_gpu, solver_gpu, cache_gpu = deepcopy(mesh), deepcopy(solver), deepcopy(cache) -boundary_conditions_gpu, source_terms_gpu = deepcopy(boundary_conditions), - deepcopy(source_terms) +mesh_gpu, solver_gpu, cache_gpu = semi_gpu.mesh, semi_gpu.solver, semi_gpu.cache +equations_gpu, source_terms_gpu = semi_gpu.equations, semi_gpu.source_terms +initial_condition_gpu, boundary_conditions_gpu = semi_gpu.initial_condition, + semi_gpu.boundary_conditions # Set initial time t = t_gpu = 0.0 @@ -63,63 +63,63 @@ Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equati equations, solver.volume_integral, solver, cache) @test_approx du_gpu ≈ du -# # Test `cuda_prolong2interfaces!` -# TrixiCUDA.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) -# Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) -# @test_approx cache_gpu.interfaces.u ≈ cache.interfaces.u - -# # Test `cuda_interface_flux!` -# TrixiCUDA.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), -# equations_gpu, solver_gpu, cache_gpu) -# Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, -# Trixi.have_nonconservative_terms(equations), equations, -# solver.surface_integral, solver, cache) -# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# # Test `cuda_prolong2boundaries!` -# TrixiCUDA.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, -# cache_gpu) -# Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) -# @test_approx cache_gpu.boundaries.u ≈ cache.boundaries.u - -# # Test `cuda_boundary_flux!` -# TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, -# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, -# solver_gpu, cache_gpu) -# Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, -# solver.surface_integral, solver) -# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# # Test `cuda_prolong2mortars!` -# TrixiCUDA.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu), -# solver_gpu, cache_gpu) -# Trixi.prolong2mortars!(cache, u, mesh, equations, -# solver.mortar, solver.surface_integral, solver) -# @test_approx cache_gpu.mortars.u_upper_left ≈ cache.mortars.u_upper_left -# @test_approx cache_gpu.mortars.u_upper_right ≈ cache.mortars.u_upper_right -# @test_approx cache_gpu.mortars.u_lower_left ≈ cache.mortars.u_lower_left -# @test_approx cache_gpu.mortars.u_lower_right ≈ cache.mortars.u_lower_right - -# # Test `cuda_mortar_flux!` -# TrixiCUDA.cuda_mortar_flux!(mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu), -# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, -# solver_gpu, cache_gpu) -# Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, -# Trixi.have_nonconservative_terms(equations), equations, -# solver.mortar, solver.surface_integral, solver, cache) -# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# # Test `cuda_surface_integral!` -# TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) -# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) -# @test_approx du_gpu ≈ du - -# # Test `cuda_jacobian!` -# TrixiCUDA.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) -# Trixi.apply_jacobian!(du, mesh, equations, solver, cache) -# @test_approx du_gpu ≈ du - -# # Test `cuda_sources!` -# TrixiCUDA.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) -# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) -# @test_approx du_gpu ≈ du +# Test `cuda_prolong2interfaces!` +TrixiCUDA.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) +@test_approx cache_gpu.interfaces.u ≈ cache.interfaces.u + +# Test `cuda_interface_flux!` +TrixiCUDA.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.surface_integral, solver, cache) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_prolong2boundaries!` +TrixiCUDA.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + cache_gpu) +Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) +@test_approx cache_gpu.boundaries.u ≈ cache.boundaries.u + +# Test `cuda_boundary_flux!` +TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + solver.surface_integral, solver) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_prolong2mortars!` +TrixiCUDA.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu), + solver_gpu, cache_gpu) +Trixi.prolong2mortars!(cache, u, mesh, equations, + solver.mortar, solver.surface_integral, solver) +@test_approx cache_gpu.mortars.u_upper_left ≈ cache.mortars.u_upper_left +@test_approx cache_gpu.mortars.u_upper_right ≈ cache.mortars.u_upper_right +@test_approx cache_gpu.mortars.u_lower_left ≈ cache.mortars.u_lower_left +@test_approx cache_gpu.mortars.u_lower_right ≈ cache.mortars.u_lower_right + +# Test `cuda_mortar_flux!` +TrixiCUDA.cuda_mortar_flux!(mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_surface_integral!` +TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_jacobian!` +TrixiCUDA.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.apply_jacobian!(du, mesh, equations, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_sources!` +TrixiCUDA.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) +Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) +@test_approx du_gpu ≈ du