-
Notifications
You must be signed in to change notification settings - Fork 1
/
point_circle.py
96 lines (85 loc) · 3.19 KB
/
point_circle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class PointEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
done = not notdone
return ob, reward, done, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
def viewer_setup(self):
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent
def __init__(self,
size=40,
align_mode=True,
reward_dir=[0.,0.],
target_dist=5.,
*args, **kwargs):
self.size = size
self.align_mode = align_mode
self.reward_dir = reward_dir
self.target_dist = target_dist
super(PointEnv, self).__init__(*args, **kwargs)
Serializable.quick_init(self, locals())
def get_current_obs(self):
return np.concatenate([
self.model.data.qpos.flatten(),
self.model.data.qvel.flat,
self.get_body_com("torso").flat,
])
def step(self, action):
qpos = np.copy(self.model.data.qpos)
qpos[2, 0] += action[1]
ori = qpos[2, 0]
# compute increment in each direction
dx = math.cos(ori) * action[0]
dy = math.sin(ori) * action[0]
# ensure that the robot is within reasonable range
qpos[0, 0] = np.clip(qpos[0, 0] + dx, -self.size, self.size)
qpos[1, 0] = np.clip(qpos[1, 0] + dy, -self.size, self.size)
self.model.data.qpos = qpos
self.model.forward()
next_obs = self.get_current_obs()
if self.align_mode:
reward = max(self.reward_dir[0] * dx + self.reward_dir[1] * dy,0)
else:
x, y = qpos[0,0], qpos[1,0]
reward = -y * dx + x * dy
reward /= (1 + np.abs( np.sqrt(x **2 + y **2) - self.target_dist))
return Step(next_obs, reward, False)
def get_xy(self):
qpos = self.model.data.qpos
return qpos[0, 0], qpos[1, 0]
def set_xy(self, xy):
qpos = np.copy(self.model.data.qpos)
qpos[0, 0] = xy[0]
qpos[1, 0] = xy[1]
self.model.data.qpos = qpos
self.model.forward()
@overrides
def action_from_key(self, key):
lb, ub = self.action_bounds
if key == glfw.KEY_LEFT:
return np.array([0, ub[0]*0.3])
elif key == glfw.KEY_RIGHT:
return np.array([0, lb[0]*0.3])
elif key == glfw.KEY_UP:
return np.array([ub[1], 0])
elif key == glfw.KEY_DOWN:
return np.array([lb[1], 0])
else:
return np.array([0, 0])