Skip to content

Commit

Permalink
Improvements for Reinforcement Learning (#78)
Browse files Browse the repository at this point in the history
* Drop Python 3.6 support

* Move limits to config, enable hand brake, normalize reward
  • Loading branch information
araffin authored May 30, 2022
1 parent 6d9496e commit 4ea6704
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 41 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.7, 3.8, 3.9]

steps:
- uses: actions/checkout@v2
Expand Down
6 changes: 5 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
History
=======

1.2.0 (WIP)
1.3.0 (WIP)
------------------
* Dropped Python 3.6 support, pinned Gym to version 0.21
* Move steer limits and throttle limits to config dict
* Normalized reward and use squared error for CTE
* Enabled hand brake in ``send_control()`` and at reset time
* Added type hints to most core methods
* Added ``send_lidar_config()`` method to configure LIDAR
* Added car roll, pitch yaw angle
Expand Down
4 changes: 1 addition & 3 deletions gym_donkeycar/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-

"""Top-level package for OpenAI Gym Environments for Donkey Car."""
import os

Expand All @@ -21,7 +19,7 @@

# Read version from file
version_file = os.path.join(os.path.dirname(__file__), "version.txt")
with open(version_file, "r") as file_handler:
with open(version_file) as file_handler:
__version__ = file_handler.read().strip()

__author__ = """Tawn Kramer"""
Expand Down
2 changes: 1 addition & 1 deletion gym_donkeycar/core/fps.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time


class FPSTimer(object):
class FPSTimer:
"""
Every N on_frame events, give the average iterations per interval.
"""
Expand Down
2 changes: 1 addition & 1 deletion gym_donkeycar/core/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from gym_donkeycar.core.client import SDClient


class IMesgHandler(object):
class IMesgHandler:
def on_connect(self, client: SDClient) -> None:
pass

Expand Down
54 changes: 33 additions & 21 deletions gym_donkeycar/envs/donkey_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,24 @@


def supply_defaults(conf: Dict[str, Any]) -> None:
"""
Update the config dictonnary
with defaults when values are missing.
:param conf: The user defined config dict,
passed to the environment constructor.
"""
defaults = [
("start_delay", 5.0),
("max_cte", 5.0),
("max_cte", 8.0),
("frame_skip", 1),
("cam_resolution", (120, 160, 3)),
("log_level", logging.INFO),
("host", "localhost"),
("port", 9091),
("steer_limit", 1.0),
("throttle_min", 0.0),
("throttle_max", 1.0),
]

for key, val in defaults:
Expand All @@ -46,10 +56,6 @@ class DonkeyEnv(gym.Env):
metadata = {"render.modes": ["human", "rgb_array"]}

ACTION_NAMES: List[str] = ["steer", "throttle"]
STEER_LIMIT_LEFT: float = -1.0
STEER_LIMIT_RIGHT: float = 1.0
THROTTLE_MIN: float = 0.0
THROTTLE_MAX: float = 1.0
VAL_PER_PIXEL: int = 255

def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None):
Expand All @@ -66,7 +72,7 @@ def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None):
supply_defaults(conf)

# set logging level
logging.basicConfig(level=conf["log_level"]) # pytype: disable=key-error
logging.basicConfig(level=conf["log_level"])

logger.debug("DEBUG ON")
logger.debug(conf)
Expand All @@ -84,10 +90,12 @@ def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None):
# start simulation com
self.viewer = DonkeyUnitySimContoller(conf=conf)

# Note: for some RL algorithms, it would be better to normalize the action space to [-1, 1]
# and then rescale to proper limtis
# steering and throttle
self.action_space = spaces.Box(
low=np.array([self.STEER_LIMIT_LEFT, self.THROTTLE_MIN]),
high=np.array([self.STEER_LIMIT_RIGHT, self.THROTTLE_MAX]),
low=np.array([-float(conf["steer_limit"]), float(conf["throttle_min"])]),
high=np.array([float(conf["steer_limit"]), float(conf["throttle_max"])]),
dtype=np.float32,
)

Expand All @@ -98,7 +106,7 @@ def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None):
self.seed()

# Frame Skipping
self.frame_skip = conf["frame_skip"] # pytype: disable=key-error
self.frame_skip = conf["frame_skip"]

# wait until the car is loaded in the scene
self.viewer.wait_until_loaded()
Expand Down Expand Up @@ -129,9 +137,13 @@ def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict[str, A
return observation, reward, done, info

def reset(self) -> np.ndarray:
# Activate hand brake, so the car does not move
self.viewer.handler.send_control(0, 0, 1.0)
time.sleep(0.1)
self.viewer.reset()
self.viewer.handler.send_control(0, 0, 1.0)
time.sleep(0.1)
observation, reward, done, info = self.viewer.observe()
time.sleep(1)
return observation

def render(self, mode: str = "human", close: bool = False) -> Optional[np.ndarray]:
Expand All @@ -149,54 +161,54 @@ def is_game_over(self) -> bool:

class GeneratedRoadsEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(GeneratedRoadsEnv, self).__init__(level="generated_road", *args, **kwargs)
super().__init__(level="generated_road", *args, **kwargs)


class WarehouseEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(WarehouseEnv, self).__init__(level="warehouse", *args, **kwargs)
super().__init__(level="warehouse", *args, **kwargs)


class AvcSparkfunEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(AvcSparkfunEnv, self).__init__(level="sparkfun_avc", *args, **kwargs)
super().__init__(level="sparkfun_avc", *args, **kwargs)


class GeneratedTrackEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(GeneratedTrackEnv, self).__init__(level="generated_track", *args, **kwargs)
super().__init__(level="generated_track", *args, **kwargs)


class MountainTrackEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(MountainTrackEnv, self).__init__(level="mountain_track", *args, **kwargs)
super().__init__(level="mountain_track", *args, **kwargs)


class RoboRacingLeagueTrackEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(RoboRacingLeagueTrackEnv, self).__init__(level="roboracingleague_1", *args, **kwargs)
super().__init__(level="roboracingleague_1", *args, **kwargs)


class WaveshareEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(WaveshareEnv, self).__init__(level="waveshare", *args, **kwargs)
super().__init__(level="waveshare", *args, **kwargs)


class MiniMonacoEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(MiniMonacoEnv, self).__init__(level="mini_monaco", *args, **kwargs)
super().__init__(level="mini_monaco", *args, **kwargs)


class WarrenTrackEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(WarrenTrackEnv, self).__init__(level="warren", *args, **kwargs)
super().__init__(level="warren", *args, **kwargs)


class ThunderhillTrackEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(ThunderhillTrackEnv, self).__init__(level="thunderhill", *args, **kwargs)
super().__init__(level="thunderhill", *args, **kwargs)


class CircuitLaunchEnv(DonkeyEnv):
def __init__(self, *args, **kwargs):
super(CircuitLaunchEnv, self).__init__(level="circuit_launch", *args, **kwargs)
super().__init__(level="circuit_launch", *args, **kwargs)
2 changes: 1 addition & 1 deletion gym_donkeycar/envs/donkey_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess


class DonkeyUnityProcess(object):
class DonkeyUnityProcess:
def __init__(self):
self.proc1 = None

Expand Down
36 changes: 29 additions & 7 deletions gym_donkeycar/envs/donkey_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import base64
import logging
import math
import os
import time
import types
from io import BytesIO
Expand Down Expand Up @@ -50,9 +51,11 @@ def set_episode_over_fn(self, ep_over_fn: Callable) -> None:
self.handler.set_episode_over_fn(ep_over_fn)

def wait_until_loaded(self) -> None:
time.sleep(0.1)
while not self.handler.loaded:
logger.warning("waiting for sim to start..")
time.sleep(3.0)
time.sleep(1.0)
logger.info("sim started!")

def reset(self) -> None:
self.handler.reset()
Expand Down Expand Up @@ -440,17 +443,22 @@ def set_reward_fn(self, reward_fn: Callable[[], float]):
logger.debug("custom reward fn set.")

def calc_reward(self, done: bool) -> float:
# Normalization factor, real max speed is around 30
# but only attained on a long straight line
max_speed = 10

if done:
return -1.0

if self.cte > self.max_cte:
return -1.0

# Collision
if self.hit != "none":
return -2.0

# going fast close to the center of lane yeilds best reward
return (1.0 - (math.fabs(self.cte) / self.max_cte)) * self.speed
# going fast close to the center of lane yields best reward
return (1.0 - (self.cte / self.max_cte) ** 2) * (self.speed / max_speed)

# ------ Socket interface ----------- #

Expand Down Expand Up @@ -560,13 +568,19 @@ def determine_episode_over(self):
logger.debug("disqualified")
self.over = True

# Disable reset
if os.environ.get("RACE") == "True":
self.over = False

def on_scene_selection_ready(self, message: Dict[str, Any]) -> None:
logger.debug("SceneSelectionReady")
self.send_get_scene_names()

def on_car_loaded(self, message: Dict[str, Any]) -> None:
logger.debug("car loaded")
self.loaded = True
# Enable hand brake, so the car doesn't move
self.send_control(0, 0, 1.0)
self.on_need_car_config({})

def on_recv_scene_names(self, message: Dict[str, Any]) -> None:
Expand All @@ -579,14 +593,22 @@ def on_recv_scene_names(self, message: Dict[str, Any]) -> None:
else:
raise ValueError(f"Scene name {self.SceneToLoad} not in scene list {names}")

def send_control(self, steer: float, throttle: float) -> None:
def send_control(self, steer: float, throttle: float, brake: float = 0.0) -> None:
"""
Send command to simulator.
:param steer: desired steering
:param throttle: desired throttle
:param brake: whether to activate or not hand brake
(can be a continuous value)
"""
if not self.loaded:
return
msg = {
"msg_type": "control",
"steering": steer.__str__(),
"throttle": throttle.__str__(),
"brake": "0.0",
"steering": str(steer),
"throttle": str(throttle),
"brake": str(brake),
}
self.queue_message(msg)

Expand Down
2 changes: 1 addition & 1 deletion gym_donkeycar/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.0
1.3.0
10 changes: 6 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os

from setuptools import find_packages, setup

with open(os.path.join("gym_donkeycar", "version.txt"), "r") as file_handler:
with open(os.path.join("gym_donkeycar", "version.txt")) as file_handler:
__version__ = file_handler.read().strip()

description = "OpenAI Gym Environments for Donkey Car"
Expand All @@ -16,20 +15,23 @@
with open("HISTORY.rst") as history_file:
history = history_file.read()

requirements = ["gym", "numpy", "pillow"]
# gym 0.23 introduces breaking changes
requirements = ["gym==0.21", "numpy", "pillow"]


setup(
name="gym_donkeycar",
author="Tawn Kramer",
author_email="tawnkramer@gmail.com",
python_requires=">=3.7",
classifiers=[
"Development Status :: 2 - Pre-Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
description=description,
install_requires=requirements,
Expand Down

0 comments on commit 4ea6704

Please sign in to comment.