-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathgym_torcs.py
290 lines (237 loc) · 11.1 KB
/
gym_torcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import gym
from gym import spaces
import numpy as np
# from os import path
import snakeoil3_gym as snakeoil3
import numpy as np
import copy
import collections as col
import os
import time
stopProb = 0.1
class TorcsEnv:
terminal_judge_start = 100 # If after 100 timestep still no progress, terminated
termination_limit_progress = 5 # [km/h], episode terminates if car is running slower than this limit
default_speed = 50
initial_reset = True
def __init__(self, vision=False, throttle=False, gear_change=False):
self.vision = vision
self.throttle = throttle
self.gear_change = gear_change
self.initial_run = True
##print("launch torcs")
os.system('pkill torcs')
time.sleep(0.5)
if self.vision is True:
os.system('torcs -nofuel -nodamage -nolaptime -vision &')
else:
os.system('torcs -nofuel -nolaptime &')
time.sleep(0.5)
os.system('sh autostart.sh')
time.sleep(0.5)
"""
# Modify here if you use multiple tracks in the environment
self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs
self.client.MAX_STEPS = np.inf
client = self.client
client.get_servers_input() # Get the initial input from torcs
obs = client.S.d # Get the current full-observation from torcs
"""
if throttle is False:
self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,))
else:
self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
if vision is False:
high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf])
low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf])
self.observation_space = spaces.Box(low=low, high=high)
else:
high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255])
low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0])
self.observation_space = spaces.Box(low=low, high=high)
def step(self, u):
#print("Step")
# convert thisAction to the actual torcs actionstr
client = self.client
this_action = self.agent_to_torcs(u)
# Apply Action
action_torcs = client.R.d
# Steering
action_torcs['steer'] = this_action['steer'] # in [-1, 1]
# Simple Autnmatic Throttle Control by Snakeoil
if self.throttle is False:
target_speed = self.default_speed
if client.S.d['speedX'] < target_speed - (client.R.d['steer']*50):
client.R.d['accel'] += .01
else:
client.R.d['accel'] -= .01
if client.R.d['accel'] > 0.2:
client.R.d['accel'] = 0.2
if client.S.d['speedX'] < 10:
client.R.d['accel'] += 1/(client.S.d['speedX']+.1)
# Traction Control System
if ((client.S.d['wheelSpinVel'][2]+client.S.d['wheelSpinVel'][3]) -
(client.S.d['wheelSpinVel'][0]+client.S.d['wheelSpinVel'][1]) > 5):
action_torcs['accel'] -= .2
else:
action_torcs['accel'] = this_action['accel']
action_torcs['brake'] = this_action['brake']
# Automatic Gear Change by Snakeoil
if self.gear_change is True:
action_torcs['gear'] = this_action['gear']
else:
# Automatic Gear Change by Snakeoil is possible
action_torcs['gear'] = 1
if self.throttle:
if client.S.d['speedX'] > 50:
action_torcs['gear'] = 2
if client.S.d['speedX'] > 80:
action_torcs['gear'] = 3
if client.S.d['speedX'] > 110:
action_torcs['gear'] = 4
if client.S.d['speedX'] > 140:
action_torcs['gear'] = 5
if client.S.d['speedX'] > 170:
action_torcs['gear'] = 6
# Save the privious full-obs from torcs for the reward calculation
obs_pre = copy.deepcopy(client.S.d)
# One-Step Dynamics Update #################################
# Apply the Agent's action into torcs
client.respond_to_server()
# Get the response of TORCS
client.get_servers_input()
# Get the current full-observation from torcs
obs = client.S.d
# Make an obsevation from a raw observation vector from TORCS
self.observation = self.make_observaton(obs)
# Reward setting Here #######################################
# direction-dependent positive reward
track = np.array(obs['track'])
trackPos = np.array(obs['trackPos'])
sp = np.array(obs['speedX'])
spy = np.array(obs['speedY'])
damage = np.array(obs['damage'])
rpm = np.array(obs['rpm'])
progress = sp*np.cos(1.0*obs['angle']) - np.abs(1.0*sp*np.sin(obs['angle']))\
- 2 * sp*np.abs(obs['trackPos']*np.sin(obs['angle']))\
-spy*np.cos(obs['angle'])
reward = progress
# collision detection
if obs['damage'] - obs_pre['damage'] > 0:
reward = -200
# Termination judgement #########################
episode_terminate = False
if (abs(track.any()) > 1 or abs(trackPos) > 1): # Episode is terminated if the car is out of track
reward = -50
if np.random.rand() < stopProb:
episode_terminate = True
client.R.d['meta'] = True
if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small
if progress < self.termination_limit_progress:
print("No progress")
episode_terminate = True
client.R.d['meta'] = True
if np.cos(obs['angle']) < 0: # Episode is terminated if the agent runs backward
episode_terminate = True
client.R.d['meta'] = True
if client.R.d['meta'] is True: # Send a reset signal
self.initial_run = False
client.respond_to_server()
self.time_step += 1
return self.get_obs(), reward, client.R.d['meta'], {}
def reset(self, relaunch=False):
#print("Reset")
self.time_step = 0
if self.initial_reset is not True:
self.client.R.d['meta'] = True
self.client.respond_to_server()
## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
if relaunch is True:
self.reset_torcs()
print("### TORCS is RELAUNCHED ###")
# Modify here if you use multiple tracks in the environment
self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs
self.client.MAX_STEPS = np.inf
client = self.client
client.get_servers_input() # Get the initial input from torcs
obs = client.S.d # Get the current full-observation from torcs
self.observation = self.make_observaton(obs)
self.last_u = None
self.initial_reset = False
return self.get_obs()
def end(self):
os.system('pkill torcs')
def get_obs(self):
return self.observation
def reset_torcs(self):
#print("relaunch torcs")
os.system('pkill torcs')
time.sleep(0.5)
if self.vision is True:
os.system('torcs -nofuel -nodamage -nolaptime -vision &')
else:
os.system('torcs -nofuel -nolaptime &')
time.sleep(0.5)
os.system('sh autostart.sh')
time.sleep(0.5)
def agent_to_torcs(self, u):
torcs_action = {'steer': u[0]}
if self.throttle is True: # throttle action is enabled
torcs_action.update({'accel': u[1]})
torcs_action.update({'brake': u[2]})
if self.gear_change is True: # gear change action is enabled
torcs_action.update({'gear': int(u[3])})
return torcs_action
def obs_vision_to_image_rgb(self, obs_image_vec):
image_vec = obs_image_vec
r = image_vec[0:len(image_vec):3]
g = image_vec[1:len(image_vec):3]
b = image_vec[2:len(image_vec):3]
sz = (64, 64)
r = np.array(r).reshape(sz)
g = np.array(g).reshape(sz)
b = np.array(b).reshape(sz)
return np.array([r, g, b], dtype=np.uint8)
def make_observaton(self, raw_obs):
if self.vision is False:
names = ['focus',
'speedX', 'speedY', 'speedZ', 'angle', 'damage',
'opponents',
'rpm',
'track',
'trackPos',
'wheelSpinVel']
Observation = col.namedtuple('Observaion', names)
return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200.,
speedX=np.array(raw_obs['speedX'], dtype=np.float32)/300.0,
speedY=np.array(raw_obs['speedY'], dtype=np.float32)/300.0,
speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/300.0,
angle=np.array(raw_obs['angle'], dtype=np.float32)/3.1416,
damage=np.array(raw_obs['damage'], dtype=np.float32),
opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200.,
rpm=np.array(raw_obs['rpm'], dtype=np.float32)/10000,
track=np.array(raw_obs['track'], dtype=np.float32)/200.,
trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1.,
wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32))
else:
names = ['focus',
'speedX', 'speedY', 'speedZ', 'angle',
'opponents',
'rpm',
'track',
'trackPos',
'wheelSpinVel',
'img']
Observation = col.namedtuple('Observaion', names)
# Get RGB from observation
image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]])
return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200.,
speedX=np.array(raw_obs['speedX'], dtype=np.float32)/self.default_speed,
speedY=np.array(raw_obs['speedY'], dtype=np.float32)/self.default_speed,
speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/self.default_speed,
opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200.,
rpm=np.array(raw_obs['rpm'], dtype=np.float32),
track=np.array(raw_obs['track'], dtype=np.float32)/200.,
trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1.,
wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32),
img=image_rgb)