Merge pull request #4 from dylan-asmar/dev

POMDPTools update + original tx function option added
JuliaPOMDP · Sep 26, 2022 · bb19b81 · bb19b81 · dylan-asmar · Sep 26, 2022
2 parents 7be7f4e + 4561f51
commit bb19b81
Show file tree

Hide file tree

Showing 14 changed files with 169 additions and 120 deletions.
diff --git a/.github/workflows/BuildStatus.yml b/.github/workflows/BuildStatus.yml
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -0,0 +1,55 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+    tags: '*'
+  pull_request:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - "1"
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        arch:
+          - x64
+          # - aarch64
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v3
+        with:
+          files: lcov.info
+  docs:
+    name: Documentation
+    permissions: write-all
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: '1'
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-docdeploy@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/RunTests.yml b/.github/workflows/RunTests.yml
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 Manifest.toml
 .DS_Store
-docs/build/
+docs/build/
+.vscode
diff --git a/Project.toml b/Project.toml
@@ -1,18 +1,16 @@
 name = "TagPOMDPProblem"
 uuid = "8a653263-a1cc-4cf9-849f-f530f6ffc800"
-version = "0.1.0"
+version = "0.1.1"
 
 [deps]
-BeliefUpdaters = "8bb6e9a1-7d73-552c-a44a-e5dc5634aac4"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-POMDPModelTools = "08074719-1b2a-587c-a292-00f91cc44415"
+POMDPTools = "7588e00f-9cae-40de-98dc-e0c70c48cdd7"
 POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
 julia = "1.6"
 POMDPs = "0.9"
-BeliefUpdaters = "0.2.1"
-POMDPModelTools = "0.3.9"
-Plots = "1.23"
+Plots = "1.23"
+POMDPTools = "0.1"
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ The goal of the agent is to tag the opponent by performing the tag action while
 
 - **Actions**:  The agent can move in the four cardinal directions or perform the tag action. When performing the `tag` action, the robot does not move. The target moves during `tag` if the robot and target are not at the same location.  
 
-- **Transition model**: The movement of the agent is deterministic based on its selected action. The opponent moves stochastically according to a fixed policy away from the agent. The opponent moves away from the agent `move_away_probability` of the time and stays in the same cell otherwise. The implementation of the opponent’s movement policy varies slightly from the original paper allowing more movement away from the agent, thus making the scenario slightly more challenging. This implementation redistributes the probabilities of actions that result in hitting a wall to other actions that result in moving away. See the [transitions.jl](https://github.com/dylan-asmar/TagPOMDPProblem.jl/blob/b0100ddb39b27990a70668187d6f1de8acb50f1e/src/transition.jl#L11) for details.
+- **Transition model**: The movement of the agent is deterministic based on its selected action. The opponent moves stochastically according to a fixed policy away from the agent. The opponent moves away from the agent `move_away_probability` of the time and stays in the same cell otherwise. The implementation of the opponent’s movement policy varies slightly from the original paper allowing more movement away from the agent, thus making the scenario slightly more challenging. This implementation redistributes the probabilities of actions that result in hitting a wall to other actions that result in moving away. See the [transitions.jl](https://github.com/dylan-asmar/TagPOMDPProblem.jl/blob/b0100ddb39b27990a70668187d6f1de8acb50f1e/src/transition.jl#L11) for details. The transition function from the original implementation can be used by passing `orig_transition_fcn = true`.
 
 
 - **Observation model**: The agent’s position is fully observable but the opponent’s position is unobserved unless both actors are in the same cell. The number of observations is one more than the number of grid squares (e.g. 30 observations for the default problem).

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -2,7 +2,7 @@
 
 Tag POMDP problem using [POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl). Original problem was presented in Pineau, Joelle et al. “Point-based value iteration: An anytime algorithm for POMDPs.” IJCAI (2003) ([online here](https://www.ijcai.org/Proceedings/03/Papers/147.pdf)).
 
-The goal of the agent is to tag the opponent by performing the tag action while in the same square as the opponent. The agent can move in the four cardinal directions or perform the tag action. The movement of the agent is deterministic based on its selected action. A reward of `step_penalty` is imposed for each motion action and the tag action results in a `tag_reward` for a successful tag and `tag_penalty` otherwise. The agent’s position is fully observable but the opponent’s position is unobserved unless both actors are in the same cell. The opponent moves stochastically according to a fixed policy away from the agent. The opponent moves away from the agent `move_away_probability` of the time and stays in the same cell otherwise. The implementation of the opponent’s movement policy varies slightly from the original paper allowing more movement away from the agent, thus making the scenario slightly more challenging. This implementation redistributes the probabilities of actions that result in hitting a wall to other actions that result in moving away.
+The goal of the agent is to tag the opponent by performing the tag action while in the same square as the opponent. The agent can move in the four cardinal directions or perform the tag action. The movement of the agent is deterministic based on its selected action. A reward of `step_penalty` is imposed for each motion action and the tag action results in a `tag_reward` for a successful tag and `tag_penalty` otherwise. The agent’s position is fully observable but the opponent’s position is unobserved unless both actors are in the same cell. The opponent moves stochastically according to a fixed policy away from the agent. The opponent moves away from the agent `move_away_probability` of the time and stays in the same cell otherwise. The implementation of the opponent’s movement policy varies slightly from the original paper allowing more movement away from the agent, thus making the scenario slightly more challenging. This implementation redistributes the probabilities of actions that result in hitting a wall to other actions that result in moving away. The original transition function is available by passing `orig_transition_fcn = true` during creation of the problem.
 
 ```@docs
 TagPOMDP()

diff --git a/src/TagPOMDPProblem.jl b/src/TagPOMDPProblem.jl
@@ -2,8 +2,7 @@ module TagPOMDPProblem
 
 using LinearAlgebra
 using POMDPs
-using POMDPModelTools
-using BeliefUpdaters
+using POMDPTools
 using Plots
 using SparseArrays
 

diff --git a/src/tag_types.jl b/src/tag_types.jl
@@ -97,13 +97,14 @@ end
 POMDP type for the Tag POMDP.
 
 # Fields
-- `tag_grid::TagGrid`:
-- `tag_reward::Float64`:
-- `tag_penalty::Float64`:
-- `step_penalty::Float64`:
-- `terminal_state::TagState`:
-- `discount_factor::Float64`:
-- `move_away_probability::Float64`:
+- `tag_grid::TagGrid`
+- `tag_reward::Float64`
+- `tag_penalty::Float64`
+- `step_penalty::Float64`
+- `terminal_state::TagState`
+- `discount_factor::Float64`
+- `move_away_probability::Float64`
+- `orig_transition_fcn::Bool`
 """
 struct TagPOMDP <: POMDP{TagState, Int, Int}
     tag_grid::TagGrid
@@ -113,20 +114,21 @@ struct TagPOMDP <: POMDP{TagState, Int, Int}
     terminal_state::TagState
     discount_factor::Float64
     move_away_probability::Float64
+    orig_transition_fcn::Bool
 end
 
 """
     TagPOMDP(; kwargs...)
 
 Returns a `TagPOMDP <: POMDP{TagState, Int, Int}`.
 Default values are from the original paper:
-
 Pineau, Joelle et al. “Point-based value iteration: An anytime algorithm for POMDPs.” IJCAI (2003).
 
 The main difference in this implementation is the use of only 1 terminal state
 and an opponent transition function that aims to keep the probability of moving away to the
 specified value if there is a valid action (versus allowing the action and thus increasing
-the probability of remaining in place).
+the probability of remaining in place). To use the transition function from the original
+implementation, pass `orig_transition_fcn = true`.
 
 # Keywords
 - `tag_grid::TagGrid`: Grid details, default = `TagGrid()`
@@ -135,6 +137,7 @@ the probability of remaining in place).
 - `step_penalty::Float64`: Reward for each movement action, default = -1.0
 - `discount_factor::Float64`: Discount factor, default = 0.95
 - `move_away_probability::Float64`: Probability associated with the opponent srategy. This probability is the chance it moves away, default = 0.8
+- `orig_transition_fcn::Bool`: Boolean to use the transition function from the original paper implementation, default = false
 """
 function TagPOMDP(;
     tag_grid::TagGrid = TagGrid(),
@@ -143,9 +146,12 @@ function TagPOMDP(;
     step_penalty::Float64 = -1.0,
     discount_factor::Float64 = 0.95,
     move_away_probability::Float64 = 0.8,
+    orig_transition_fcn::Bool = false,
 )
-    return TagPOMDP(tag_grid, tag_reward, tag_penalty, step_penalty,
-        TagState((0,0), (0,0), true), discount_factor, move_away_probability)
+    return TagPOMDP(
+        tag_grid, tag_reward, tag_penalty, step_penalty, TagState((0,0), (0,0), true),
+        discount_factor, move_away_probability, orig_transition_fcn
+    )
 end
 
 Base.length(pomdp::TagPOMDP) = length(pomdp.tag_grid.full_grid_lin_indices) + 1

diff --git a/src/transition.jl b/src/transition.jl
@@ -1,12 +1,7 @@
 """
     POMDPs.transition(pomdp::TagPOMDP, s::TagState, a::Int)
 
-Transition function for the TagPOMDP. This transition is similar to the original paper but
-differs with how it redistributes the probabilities of actions where the opponent would hit
-a wall and stay in place. The original implementation redistributed those probabilities to
-the stay in place state. This implementation keeps the probability of moving away from the
-agent at the defined threshold if there is a valid movement option (away and not into a
-wall). The movement of the agent is deterministic in the direction of the action.
+Transition function for the TagPOMDP. This transition is mimics the original paper.
 """
 function POMDPs.transition(pomdp::TagPOMDP, s::TagState, a::Int)
     if isterminal(pomdp, s)
@@ -20,6 +15,22 @@ function POMDPs.transition(pomdp::TagPOMDP, s::TagState, a::Int)
         end
     end
 
+    if pomdp.orig_transition_fcn
+        return orig_transition(pomdp, s, a)
+    end
+    return modified_transition(pomdp, s, a)
+end
+
+"""
+Modified transition function for the TagPOMDP. This transition is similar to the original
+    paper but differs with how it redistributes the probabilities of actions where the
+    opponent would hit a wall and stay in place. The original implementation redistributed
+    those probabilities to the stay in place state. This implementation keeps the
+    probability of moving away from the agent at the defined threshold if there is a valid
+    movement option (away and not into a wall). The movement of the agent is deterministic
+    in the direction of the action.
+"""
+function modified_transition(pomdp::TagPOMDP, s::TagState, a::Int)
     r_pos_x, r_pos_y = s.r_pos
     t_pos_x, t_pos_y = s.t_pos
     grid = pomdp.tag_grid
@@ -59,6 +70,74 @@ function POMDPs.transition(pomdp::TagPOMDP, s::TagState, a::Int)
     return SparseCat(states, t_probs)
 end
 
+"""
+Transition function for the TagPOMDP. This transition is mimics the original paper.
+    implementation is structured to be closely aligned with the modified transition
+    function.
+"""
+function orig_transition(pomdp::TagPOMDP, s::TagState, a::Int)
+    r_pos_x, r_pos_y = s.r_pos
+    t_pos_x, t_pos_y = s.t_pos
+    grid = pomdp.tag_grid
+    t_move_pos_options = Vector{Tuple{Int, Int}}()
+
+    # Counters added to modify transition probability to align with original implementation
+    cnt_wall_hits_ns = 0
+    cnt_ns_options = 0
+    cnt_wall_hits_ew = 0
+    cnt_ew_options = 0
+
+    # Look for viable moves for the target to move "away" from the robot
+    for card_d_i in X_DIRS
+        if ACTION_INEQ[card_d_i](t_pos_x, r_pos_x)
+            cnt_ew_options += 1
+            d_i = ACTION_DIRS[ACTIONS_DICT[card_d_i]]
+            if !hit_wall(grid, s.t_pos, d_i)
+                push!(t_move_pos_options, move_direction(grid, s.t_pos, d_i))
+            else
+                cnt_wall_hits_ew += 1
+            end
+        end
+    end
+    for card_d_i in Y_DIRS
+        if ACTION_INEQ[card_d_i](t_pos_y, r_pos_y)
+            cnt_ns_options += 1
+            d_i = ACTION_DIRS[ACTIONS_DICT[card_d_i]]
+            if !hit_wall(grid, s.t_pos, d_i)
+                push!(t_move_pos_options, move_direction(grid, s.t_pos, d_i))
+            else
+                cnt_wall_hits_ns += 1
+            end
+        end
+    end
+
+    # Split the move_away_probability across E-W and N-S movements. If a move away direction
+    # results in hitting a wall, that probability is allocated to the "stay in place"
+    # transition
+    ns_moves = cnt_ns_options - cnt_wall_hits_ns
+    ew_moves = cnt_ew_options - cnt_wall_hits_ew
+
+    ns_prob = pomdp.move_away_probability / 2 / cnt_ns_options
+    ew_prob = pomdp.move_away_probability / 2 / cnt_ew_options
+
+    # Create the transition probability array
+    t_probs = ones(length(t_move_pos_options) + 1)
+    t_probs[1:ew_moves] .= ew_prob
+    t_probs[ew_moves+1:ew_moves+ns_moves] .= ns_prob
+
+    push!(t_move_pos_options, s.t_pos)
+    t_probs[end] = 1.0 - sum(t_probs[1:end-1])
+
+    # Robot position is deterministic
+    r_pos′ = move_direction(pomdp.tag_grid, s.r_pos, ACTION_DIRS[a])
+
+    states = Vector{TagState}(undef, length(t_move_pos_options))
+    for (ii, t_pos′) in enumerate(t_move_pos_options)
+        states[ii] = TagState(r_pos′, t_pos′, false)
+    end
+    return SparseCat(states, t_probs)
+end
+
 function move_direction(grid::TagGrid, p::Tuple{Int, Int}, d::Tuple{Int, Int})
     if hit_wall(grid, p, d)
         return p

diff --git a/src/visualization.jl b/src/visualization.jl
@@ -1,5 +1,5 @@
 
-function POMDPModelTools.render(pomdp::TagPOMDP, step; pre_act_text::String="")
+function POMDPTools.render(pomdp::TagPOMDP, step; pre_act_text::String="")
 
     plt = nothing
     plotted_robot = false