Skip to content

Commit

Permalink
Merge pull request #146 from dee0512/master
Browse files Browse the repository at this point in the history
Breakout RL experiments
  • Loading branch information
djsaunde authored Oct 28, 2018
2 parents 2cc97a8 + a3942d8 commit da730bc
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 11 deletions.
1 change: 0 additions & 1 deletion bindsnet/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def __init__(self, network: Network, environment: Environment, encoding: Callabl
for l in self.network.layers:
self.network.add_monitor(Monitor(self.network.layers[l], 's', self.plot_interval * self.time),
name=f'{l}_spikes')

if 'v' in self.network.layers[l].__dict__:
self.network.add_monitor(Monitor(self.network.layers[l], 'v', self.plot_interval * self.time),
name=f'{l}_voltages')
Expand Down
5 changes: 3 additions & 2 deletions bindsnet/pipeline/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ def select_softmax(pipeline: Pipeline, **kwargs) -> int:
assert pipeline.network.layers[output].n == pipeline.env.action_space.n, \
'Output layer size not equal to size of action space.'

assert hasattr(pipeline, 'spike_record'), 'Pipeline has not attribute named: spike_record.'

# Sum of previous iterations' spikes (Not yet implemented)
spikes = pipeline.network.layers[output].s
spikes = torch.sum(pipeline.spike_record[output], dim=1)
_sum = torch.sum(torch.exp(spikes.float()))

# Choose action based on readout neuron spiking
if _sum == 0:
action = np.random.choice(pipeline.env.action_space.n)
else:
Expand Down
16 changes: 8 additions & 8 deletions examples/breakout/breakout.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,23 @@
out = LIFNodes(n=4, refrac=0, traces=True)

# Connections between layers.
inpt_middle = Connection(source=inpt, target=middle, wmax=1e-2)
middle_out = Connection(source=middle, target=out, wmax=1e-1, nu=2e-2)
inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
middle_out = Connection(source=middle, target=out, wmin=0, wmax=1)

# Add all layers and connections to the network.
network.add_layer(inpt, name='X')
network.add_layer(middle, name='Y')
network.add_layer(out, name='Z')
network.add_connection(inpt_middle, source='X', target='Y')
network.add_connection(middle_out, source='Y', target='Z')
network.add_layer(inpt, name='Input Layer')
network.add_layer(middle, name='Hidden Layer')
network.add_layer(out, name='Output Layer')
network.add_connection(inpt_middle, source='Input Layer', target='Hidden Layer')
network.add_connection(middle_out, source='Hidden Layer', target='Output Layer')

# Load SpaceInvaders environment.
environment = GymEnvironment('BreakoutDeterministic-v4')
environment.reset()

# Build pipeline from specified components.
pipeline = Pipeline(network, environment, encoding=bernoulli,
action_function=select_softmax, output='Z',
action_function=select_softmax, output='Output Layer',
time=100, history_length=1, delta=1,
plot_interval=1, render_interval=1)

Expand Down
68 changes: 68 additions & 0 deletions examples/breakout/breakout_stdp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import torch

from bindsnet.network import Network
from bindsnet.pipeline import Pipeline
from bindsnet.learning import MSTDP
from bindsnet.encoding import bernoulli
from bindsnet.network.topology import Connection
from bindsnet.environment import GymEnvironment
from bindsnet.network.nodes import Input, LIFNodes
from bindsnet.pipeline.action import select_softmax

# Build network.
network = Network(dt=1.0)

# Layers of neurons.
inpt = Input(n=80 * 80, shape=[80, 80], traces=True)
middle = LIFNodes(n=100, traces=True)
out = LIFNodes(n=4, refrac=0, traces=True)

# Connections between layers.
inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
middle_out = Connection(source=middle, target=out, wmin=0, wmax=1, update_rule=MSTDP, nu=1e-1, norm=0.5 * middle.n)

# Add all layers and connections to the network.
network.add_layer(inpt, name='Input Layer')
network.add_layer(middle, name='Hidden Layer')
network.add_layer(out, name='Output Layer')
network.add_connection(inpt_middle, source='Input Layer', target='Hidden Layer')
network.add_connection(middle_out, source='Hidden Layer', target='Output Layer')

# Load SpaceInvaders environment.
environment = GymEnvironment('BreakoutDeterministic-v4')
environment.reset()

# Build pipeline from specified components.
pipeline = Pipeline(network, environment, encoding=bernoulli,
action_function=select_softmax, output='Output Layer',
time=100, history_length=1, delta=1,
plot_interval=1, render_interval=1)


# Train agent for 100 episodes.
print("Training: ")
for i in range(100):
pipeline.reset_()
# initialize episode reward
reward = 0
while True:
pipeline.step()
reward += pipeline.reward
if pipeline.done:
break
print("Episode " + str(i) + " reward:", reward)

# stop MSTDP
pipeline.network.learning = False

print("Testing: ")
for i in range(100):
pipeline.reset_()
# initialize episode reward
reward = 0
while True:
pipeline.step()
reward += pipeline.reward
if pipeline.done:
break
print("Episode " + str(i) + " reward:", reward)

0 comments on commit da730bc

Please sign in to comment.