forked from johnjim0816/joyrl-offline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
trainer.py
34 lines (34 loc) · 1.55 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class Trainer:
def __init__(self) -> None:
pass
def train_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.sample_action(state) # sample action
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
# agent.memory.push(state, action, reward,
# next_state, terminated) # save transitions
agent.update() # update agent
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res
def test_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.predict_action(state) # sample action
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res