Module pacai.ui.crawler.gui
Expand source code
import math
import time
import threading
import tkinter
import traceback
from pacai.student.qlearningAgents import QLearningAgent
from pacai.core.environment import Environment
class CrawlingRobotEnvironment(Environment):
"""
A GUI display for crawler.
"""
def __init__(self, crawlingRobot):
self.crawlingRobot = crawlingRobot
# The state is of the form (armAngle, handAngle)
# where the angles are bucket numbers, not actual
# degree measurements
self.state = None
self.nArmStates = 9
self.nHandStates = 13
# create a list of arm buckets and hand buckets to
# discretize the state space
minArmAngle, maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles()
minHandAngle, maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles()
armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates - 1)
handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates - 1)
self.armBuckets = [minArmAngle + (armIncrement * i) for i in range(self.nArmStates)]
self.handBuckets = [minHandAngle + (handIncrement * i) for i in range(self.nHandStates)]
# Reset
self.reset()
def getCurrentState(self):
"""
Return the current state of the crawling robot.
"""
return self.state
def getPossibleActions(self, state):
"""
Returns possible actions for the states in the current state.
"""
actions = list()
currArmBucket, currHandBucket = state
if currArmBucket > 0:
actions.append('arm-down')
if currArmBucket < self.nArmStates - 1:
actions.append('arm-up')
if currHandBucket > 0:
actions.append('hand-down')
if currHandBucket < self.nHandStates - 1:
actions.append('hand-up')
return actions
def doAction(self, action):
"""
Perform the action and update
the current state of the Environment
and return the reward for the
current state, the next state
and the taken action.
Returns:
nextState, reward
"""
nextState, reward = None, None
oldX, oldY = self.crawlingRobot.getRobotPosition()
armBucket, handBucket = self.state
armAngle, handAngle = self.crawlingRobot.getAngles()
if action == 'arm-up':
newArmAngle = self.armBuckets[armBucket + 1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket + 1, handBucket)
if action == 'arm-down':
newArmAngle = self.armBuckets[armBucket - 1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket - 1, handBucket)
if action == 'hand-up':
newHandAngle = self.handBuckets[handBucket + 1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket, handBucket + 1)
if action == 'hand-down':
newHandAngle = self.handBuckets[handBucket - 1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket, handBucket - 1)
newX, newY = self.crawlingRobot.getRobotPosition()
# a simple reward function
reward = newX - oldX
self.state = nextState
return nextState, reward
def reset(self):
"""
Resets the Environment to the initial state
"""
# Initialize the state to be the middle
# value for each parameter e.g. if there are 13 and 19
# buckets for the arm and hand parameters, then the intial
# state should be (6, 9)
# Also call self.crawlingRobot.setAngles()
# to the initial arm and hand angle
armState = int(self.nArmStates / 2)
handState = int(self.nHandStates / 2)
self.state = armState, handState
self.crawlingRobot.setAngles(self.armBuckets[armState], self.handBuckets[handState])
self.crawlingRobot.positions = [20, self.crawlingRobot.getRobotPosition()[0]]
class CrawlingRobot(object):
def setAngles(self, armAngle, handAngle):
"""
set the robot's arm and hand angles
to the passed in values
"""
self.armAngle = armAngle
self.handAngle = handAngle
def getAngles(self):
"""
returns the pair of (armAngle, handAngle)
"""
return self.armAngle, self.handAngle
def getRobotPosition(self):
"""
returns the (x, y) coordinates
of the lower-left point of the robot
"""
return self.robotPos
def moveArm(self, newArmAngle):
"""
move the robot arm to 'newArmAngle'
"""
if newArmAngle > self.maxArmAngle:
raise Exception('Crawling Robot: Arm Raised too high. Careful!')
if newArmAngle < self.minArmAngle:
raise Exception('Crawling Robot: Arm Raised too low. Careful!')
disp = self.displacement(self.armAngle, self.handAngle, newArmAngle, self.handAngle)
curXPos = self.robotPos[0]
self.robotPos = (curXPos + disp, self.robotPos[1])
self.armAngle = newArmAngle
# Position and Velocity Sign Post
self.positions.append(self.getRobotPosition()[0])
if len(self.positions) > 100:
self.positions.pop(0)
# self.angleSums.pop(0)
def moveHand(self, newHandAngle):
"""
move the robot hand to 'newArmAngle'
"""
if newHandAngle > self.maxHandAngle:
raise Exception('Crawling Robot: Hand Raised too high. Careful!')
if newHandAngle < self.minHandAngle:
raise Exception('Crawling Robot: Hand Raised too low. Careful!')
disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle)
curXPos = self.robotPos[0]
self.robotPos = (curXPos + disp, self.robotPos[1])
self.handAngle = newHandAngle
# Position and Velocity Sign Post
self.positions.append(self.getRobotPosition()[0])
if len(self.positions) > 100:
self.positions.pop(0)
# self.angleSums.pop(0)
def getMinAndMaxArmAngles(self):
"""
get the lower- and upper- bound
for the arm angles returns (min, max) pair
"""
return self.minArmAngle, self.maxArmAngle
def getMinAndMaxHandAngles(self):
"""
get the lower- and upper- bound
for the hand angles returns (min, max) pair
"""
return self.minHandAngle, self.maxHandAngle
def getRotationAngle(self):
"""
get the current angle the
robot body is rotated off the ground
"""
armCos, armSin = self.__getCosAndSin(self.armAngle)
handCos, handSin = self.__getCosAndSin(self.handAngle)
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
if y < 0:
return math.atan(-y / x)
return 0.0
# You shouldn't need methods below here
def __getCosAndSin(self, angle):
return math.cos(angle), math.sin(angle)
def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree):
oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree)
armCos, armSin = self.__getCosAndSin(armDegree)
oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree)
handCos, handSin = self.__getCosAndSin(handDegree)
xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth
yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight
x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
y = self.armLength * armSin + self.handLength * handSin + self.robotHeight
if y < 0:
if yOld <= 0:
return math.sqrt(xOld * xOld + yOld * yOld) - math.sqrt(x * x + y * y)
return (xOld - yOld * (x - xOld) / (y - yOld)) - math.sqrt(x * x + y * y)
else:
if yOld >= 0:
return 0.0
return -(x - y * (xOld - x) / (yOld - y)) + math.sqrt(xOld * xOld + yOld * yOld)
raise Exception('Never Should See This!')
def draw(self, stepCount, stepDelay):
x1, y1 = self.getRobotPosition()
x1 = x1 % self.totWidth
# Check Lower Still on the ground
if y1 != self.groundY:
raise Exception('Flying Robot!!')
rotationAngle = self.getRotationAngle()
cosRot, sinRot = self.__getCosAndSin(rotationAngle)
x2 = x1 + self.robotWidth * cosRot
y2 = y1 - self.robotWidth * sinRot
x3 = x1 - self.robotHeight * sinRot
y3 = y1 - self.robotHeight * cosRot
x4 = x3 + cosRot * self.robotWidth
y4 = y3 - sinRot * self.robotWidth
self.canvas.coords(self.robotBody, x1, y1, x2, y2, x4, y4, x3, y3)
armCos, armSin = self.__getCosAndSin(rotationAngle + self.armAngle)
xArm = x4 + self.armLength * armCos
yArm = y4 - self.armLength * armSin
self.canvas.coords(self.robotArm, x4, y4, xArm, yArm)
handCos, handSin = self.__getCosAndSin(self.handAngle + rotationAngle)
xHand = xArm + self.handLength * handCos
yHand = yArm - self.handLength * handSin
self.canvas.coords(self.robotHand, xArm, yArm, xHand, yHand)
# Position and Velocity Sign Post
# time = len(self.positions) + 0.5 * sum(self.angleSums)
# velocity = (self.positions[-1]-self.positions[0]) / time
# if len(self.positions) == 1: return
steps = (stepCount - self.lastStep)
if (steps == 0):
return
# pos = self.positions[-1]
# velocity = (pos - self.lastPos) / steps
# g = .9 ** (10 * stepDelay)
# g = .99 ** steps
# self.velAvg = g * self.velAvg + (1 - g) * velocity
# g = .999 ** steps
# self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity
pos = self.positions[-1]
velocity = pos - self.positions[-2]
vel2 = (pos - self.positions[0]) / len(self.positions)
self.velAvg = .9 * self.velAvg + .1 * vel2
velMsg = '100-step Avg Velocity: %.2f' % self.velAvg
# velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2
velocityMsg = 'Velocity: %.2f' % velocity
positionMsg = 'Position: %2.f' % pos
stepMsg = 'Step: %d' % stepCount
if (self.vel_msg is not None):
self.canvas.delete(self.vel_msg)
self.canvas.delete(self.pos_msg)
self.canvas.delete(self.step_msg)
self.canvas.delete(self.velavg_msg)
# self.canvas.delete(self.velavg2_msg)
# self.velavg2_msg = self.canvas.create_text(850, 190, text=velMsg2)
self.velavg_msg = self.canvas.create_text(650, 190, text=velMsg)
self.vel_msg = self.canvas.create_text(450, 190, text=velocityMsg)
self.pos_msg = self.canvas.create_text(250, 190, text=positionMsg)
self.step_msg = self.canvas.create_text(50, 190, text=stepMsg)
# self.lastPos = pos
self.lastStep = stepCount
# self.lastVel = velocity
def __init__(self, canvas):
# Canvas
self.canvas = canvas
self.velAvg = 0
# self.velAvg2 = 0
# self.lastPos = 0
self.lastStep = 0
# self.lastVel = 0
# Arm and Hand Degrees
self.armAngle = self.oldArmDegree = 0.0
self.handAngle = self.oldHandDegree = -math.pi / 6
self.maxArmAngle = math.pi / 6
self.minArmAngle = -math.pi / 6
self.maxHandAngle = 0
self.minHandAngle = -(5.0 / 6.0) * math.pi
# Draw Ground
self.totWidth = canvas.winfo_reqwidth()
self.totHeight = canvas.winfo_reqheight()
self.groundHeight = 40
self.groundY = self.totHeight - self.groundHeight
self.ground = canvas.create_rectangle(0, self.groundY, self.totWidth, self.totHeight,
fill = 'blue')
# Robot Body
self.robotWidth = 80
self.robotHeight = 40
self.robotPos = (20, self.groundY)
self.robotBody = canvas.create_polygon(0, 0, 0, 0, 0, 0, 0, 0, fill='green')
# Robot Arm
self.armLength = 60
self.robotArm = canvas.create_line(0, 0, 0, 0, fill='orange', width=5)
# Robot Hand
self.handLength = 40
self.robotHand = canvas.create_line(0, 0, 0, 0, fill='red', width=3)
self.positions = [0, 0]
# self.angleSums = [0, 0]
self.vel_msg = None
self.velavg_msg = None
self.pos_msg = None
self.step_msg = None
class Application(object):
def __init__(self, win, max_steps):
self.ep = 0
self.ga = 2
self.al = 2
self.stepCount = 0
self.max_steps = max_steps
self.exit_status = 0
# Init Gui
self.__initGUI(win)
self.robot = CrawlingRobot(self.canvas)
self.robotEnvironment = CrawlingRobotEnvironment(self.robot)
# Init Agent
actionFn = lambda state: self.robotEnvironment.getPossibleActions(state)
self.learner = QLearningAgent(0, actionFn=actionFn)
self.learner.setEpsilon(self.epsilon)
self.learner.setLearningRate(self.alpha)
self.learner.setDiscount(self.gamma)
# Start GUI
self.running = True
self.stopped = False
self.stepsToSkip = 0
self.thread = threading.Thread(target = self._run_wrapper)
self.thread.start()
def sigmoid(self, x):
return 1.0 / (1.0 + 2.0 ** (-x))
def incrementSpeed(self, inc):
self.tickTime *= inc
# self.epsilon = min(1.0, self.epsilon)
# self.epsilon = max(0.0, self.epsilon)
# self.learner.setSpeed(self.epsilon)
self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
def incrementEpsilon(self, inc):
self.ep += inc
self.epsilon = self.sigmoid(self.ep)
self.learner.setEpsilon(self.epsilon)
self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
def incrementGamma(self, inc):
self.ga += inc
self.gamma = self.sigmoid(self.ga)
self.learner.setDiscount(self.gamma)
self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
def incrementAlpha(self, inc):
self.al += inc
self.alpha = self.sigmoid(self.al)
self.learner.setLearningRate(self.alpha)
self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
def __initGUI(self, win):
# Window
self.win = win
# Initialize Frame
win.grid()
self.dec = -0.5
self.inc = 0.5
self.tickTime = 0.05
# Epsilon Button + Label
self.setupSpeedButtonAndLabel(win)
self.setupEpsilonButtonAndLabel(win)
# Gamma Button + Label
self.setUpGammaButtonAndLabel(win)
# Alpha Button + Label
self.setupAlphaButtonAndLabel(win)
# Exit Button
# self.exit_button = tkinter.Button(win, text='Quit', command=self.exit)
# self.exit_button.grid(row=0, column=9)
# Simulation Buttons
# self.setupSimulationButtons(win)
# Canvas
self.canvas = tkinter.Canvas(root, height=200, width=1000)
self.canvas.grid(row=2, columnspan=10)
def setupAlphaButtonAndLabel(self, win):
self.alpha_minus = tkinter.Button(win, text="-",
command=(lambda: self.incrementAlpha(self.dec)))
self.alpha_minus.grid(row=1, column=3, padx=10)
self.alpha = self.sigmoid(self.al)
self.alpha_label = tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
self.alpha_label.grid(row=1, column=4)
self.alpha_plus = tkinter.Button(win, text="+",
command=(lambda: self.incrementAlpha(self.inc)))
self.alpha_plus.grid(row=1, column=5, padx=10)
def setUpGammaButtonAndLabel(self, win):
self.gamma_minus = tkinter.Button(win, text="-",
command=(lambda: self.incrementGamma(self.dec)))
self.gamma_minus.grid(row=1, column=0, padx=10)
self.gamma = self.sigmoid(self.ga)
self.gamma_label = tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
self.gamma_label.grid(row=1, column=1)
self.gamma_plus = tkinter.Button(win, text="+",
command=(lambda: self.incrementGamma(self.inc)))
self.gamma_plus.grid(row=1, column=2, padx=10)
def setupEpsilonButtonAndLabel(self, win):
self.epsilon_minus = tkinter.Button(win, text="-",
command=(lambda: self.incrementEpsilon(self.dec)))
self.epsilon_minus.grid(row=0, column=3)
self.epsilon = self.sigmoid(self.ep)
self.epsilon_label = tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
self.epsilon_label.grid(row=0, column=4)
self.epsilon_plus = tkinter.Button(win, text="+",
command=(lambda: self.incrementEpsilon(self.inc)))
self.epsilon_plus.grid(row=0, column=5)
def setupSpeedButtonAndLabel(self, win):
self.speed_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementSpeed(0.5)))
self.speed_minus.grid(row=0, column=0)
self.speed_label = tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
self.speed_label.grid(row=0, column=1)
self.speed_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementSpeed(2)))
self.speed_plus.grid(row=0, column=2)
def skip5kSteps(self):
self.stepsToSkip = 5000
def exit(self):
self.running = False
if (self.thread is not None):
self.thread.join()
self.thread = None
if (self.win is not None):
self.win.destroy()
self.win = None
def step(self):
self.stepCount += 1
state = self.robotEnvironment.getCurrentState()
actions = self.robotEnvironment.getPossibleActions(state)
if len(actions) == 0.0:
self.robotEnvironment.reset()
state = self.robotEnvironment.getCurrentState()
actions = self.robotEnvironment.getPossibleActions(state)
print('Reset!')
action = self.learner.getAction(state)
if (action is None):
raise Exception('None action returned: Code Not Complete')
nextState, reward = self.robotEnvironment.doAction(action)
self.learner.observeTransition(state, action, nextState, reward)
# Run on a different thread.
def _run_wrapper(self):
try:
self.run()
except Exception:
self.exit_status = 10
traceback.print_exc()
self.win.quit()
self.running = False
self.stopped = True
self.win.quit()
# Run on a different thread.
def run(self):
self.stepCount = 0
self.learner.startEpisode()
while True:
minSleep = 0.01
tm = max(minSleep, self.tickTime)
time.sleep(tm)
self.stepsToSkip = int(tm / self.tickTime) - 1
if not self.running:
break
for i in range(self.stepsToSkip):
self.step()
self.stepsToSkip = 0
self.step()
if (self.max_steps is not None and self.stepCount >= self.max_steps):
break
self.learner.stopEpisode()
def start(self):
self.win.mainloop()
def run(max_steps = None):
global root
root = tkinter.Tk(baseName = 'crawler')
root.title('Crawler GUI')
root.resizable(0, 0)
app = Application(root, max_steps = max_steps)
def update_gui():
if (app.running):
app.robot.draw(app.stepCount, app.tickTime)
root.after(10, update_gui)
update_gui()
root.protocol('WM_DELETE_WINDOW', app.exit)
app.start()
app.exit()
return app.exit_status
Functions
def run(max_steps=None)
-
Expand source code
def run(max_steps = None): global root root = tkinter.Tk(baseName = 'crawler') root.title('Crawler GUI') root.resizable(0, 0) app = Application(root, max_steps = max_steps) def update_gui(): if (app.running): app.robot.draw(app.stepCount, app.tickTime) root.after(10, update_gui) update_gui() root.protocol('WM_DELETE_WINDOW', app.exit) app.start() app.exit() return app.exit_status
Classes
class Application (win, max_steps)
-
Expand source code
class Application(object): def __init__(self, win, max_steps): self.ep = 0 self.ga = 2 self.al = 2 self.stepCount = 0 self.max_steps = max_steps self.exit_status = 0 # Init Gui self.__initGUI(win) self.robot = CrawlingRobot(self.canvas) self.robotEnvironment = CrawlingRobotEnvironment(self.robot) # Init Agent actionFn = lambda state: self.robotEnvironment.getPossibleActions(state) self.learner = QLearningAgent(0, actionFn=actionFn) self.learner.setEpsilon(self.epsilon) self.learner.setLearningRate(self.alpha) self.learner.setDiscount(self.gamma) # Start GUI self.running = True self.stopped = False self.stepsToSkip = 0 self.thread = threading.Thread(target = self._run_wrapper) self.thread.start() def sigmoid(self, x): return 1.0 / (1.0 + 2.0 ** (-x)) def incrementSpeed(self, inc): self.tickTime *= inc # self.epsilon = min(1.0, self.epsilon) # self.epsilon = max(0.0, self.epsilon) # self.learner.setSpeed(self.epsilon) self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime) def incrementEpsilon(self, inc): self.ep += inc self.epsilon = self.sigmoid(self.ep) self.learner.setEpsilon(self.epsilon) self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon) def incrementGamma(self, inc): self.ga += inc self.gamma = self.sigmoid(self.ga) self.learner.setDiscount(self.gamma) self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma) def incrementAlpha(self, inc): self.al += inc self.alpha = self.sigmoid(self.al) self.learner.setLearningRate(self.alpha) self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha) def __initGUI(self, win): # Window self.win = win # Initialize Frame win.grid() self.dec = -0.5 self.inc = 0.5 self.tickTime = 0.05 # Epsilon Button + Label self.setupSpeedButtonAndLabel(win) self.setupEpsilonButtonAndLabel(win) # Gamma Button + Label self.setUpGammaButtonAndLabel(win) # Alpha Button + Label self.setupAlphaButtonAndLabel(win) # Exit Button # self.exit_button = tkinter.Button(win, text='Quit', command=self.exit) # self.exit_button.grid(row=0, column=9) # Simulation Buttons # self.setupSimulationButtons(win) # Canvas self.canvas = tkinter.Canvas(root, height=200, width=1000) self.canvas.grid(row=2, columnspan=10) def setupAlphaButtonAndLabel(self, win): self.alpha_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementAlpha(self.dec))) self.alpha_minus.grid(row=1, column=3, padx=10) self.alpha = self.sigmoid(self.al) self.alpha_label = tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha)) self.alpha_label.grid(row=1, column=4) self.alpha_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementAlpha(self.inc))) self.alpha_plus.grid(row=1, column=5, padx=10) def setUpGammaButtonAndLabel(self, win): self.gamma_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementGamma(self.dec))) self.gamma_minus.grid(row=1, column=0, padx=10) self.gamma = self.sigmoid(self.ga) self.gamma_label = tkinter.Label(win, text='Discount: %.3f' % (self.gamma)) self.gamma_label.grid(row=1, column=1) self.gamma_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementGamma(self.inc))) self.gamma_plus.grid(row=1, column=2, padx=10) def setupEpsilonButtonAndLabel(self, win): self.epsilon_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementEpsilon(self.dec))) self.epsilon_minus.grid(row=0, column=3) self.epsilon = self.sigmoid(self.ep) self.epsilon_label = tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon)) self.epsilon_label.grid(row=0, column=4) self.epsilon_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementEpsilon(self.inc))) self.epsilon_plus.grid(row=0, column=5) def setupSpeedButtonAndLabel(self, win): self.speed_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementSpeed(0.5))) self.speed_minus.grid(row=0, column=0) self.speed_label = tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime)) self.speed_label.grid(row=0, column=1) self.speed_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementSpeed(2))) self.speed_plus.grid(row=0, column=2) def skip5kSteps(self): self.stepsToSkip = 5000 def exit(self): self.running = False if (self.thread is not None): self.thread.join() self.thread = None if (self.win is not None): self.win.destroy() self.win = None def step(self): self.stepCount += 1 state = self.robotEnvironment.getCurrentState() actions = self.robotEnvironment.getPossibleActions(state) if len(actions) == 0.0: self.robotEnvironment.reset() state = self.robotEnvironment.getCurrentState() actions = self.robotEnvironment.getPossibleActions(state) print('Reset!') action = self.learner.getAction(state) if (action is None): raise Exception('None action returned: Code Not Complete') nextState, reward = self.robotEnvironment.doAction(action) self.learner.observeTransition(state, action, nextState, reward) # Run on a different thread. def _run_wrapper(self): try: self.run() except Exception: self.exit_status = 10 traceback.print_exc() self.win.quit() self.running = False self.stopped = True self.win.quit() # Run on a different thread. def run(self): self.stepCount = 0 self.learner.startEpisode() while True: minSleep = 0.01 tm = max(minSleep, self.tickTime) time.sleep(tm) self.stepsToSkip = int(tm / self.tickTime) - 1 if not self.running: break for i in range(self.stepsToSkip): self.step() self.stepsToSkip = 0 self.step() if (self.max_steps is not None and self.stepCount >= self.max_steps): break self.learner.stopEpisode() def start(self): self.win.mainloop()
Methods
def exit(self)
-
Expand source code
def exit(self): self.running = False if (self.thread is not None): self.thread.join() self.thread = None if (self.win is not None): self.win.destroy() self.win = None
def incrementAlpha(self, inc)
-
Expand source code
def incrementAlpha(self, inc): self.al += inc self.alpha = self.sigmoid(self.al) self.learner.setLearningRate(self.alpha) self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
def incrementEpsilon(self, inc)
-
Expand source code
def incrementEpsilon(self, inc): self.ep += inc self.epsilon = self.sigmoid(self.ep) self.learner.setEpsilon(self.epsilon) self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
def incrementGamma(self, inc)
-
Expand source code
def incrementGamma(self, inc): self.ga += inc self.gamma = self.sigmoid(self.ga) self.learner.setDiscount(self.gamma) self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
def incrementSpeed(self, inc)
-
Expand source code
def incrementSpeed(self, inc): self.tickTime *= inc # self.epsilon = min(1.0, self.epsilon) # self.epsilon = max(0.0, self.epsilon) # self.learner.setSpeed(self.epsilon) self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
def run(self)
-
Expand source code
def run(self): self.stepCount = 0 self.learner.startEpisode() while True: minSleep = 0.01 tm = max(minSleep, self.tickTime) time.sleep(tm) self.stepsToSkip = int(tm / self.tickTime) - 1 if not self.running: break for i in range(self.stepsToSkip): self.step() self.stepsToSkip = 0 self.step() if (self.max_steps is not None and self.stepCount >= self.max_steps): break self.learner.stopEpisode()
def setUpGammaButtonAndLabel(self, win)
-
Expand source code
def setUpGammaButtonAndLabel(self, win): self.gamma_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementGamma(self.dec))) self.gamma_minus.grid(row=1, column=0, padx=10) self.gamma = self.sigmoid(self.ga) self.gamma_label = tkinter.Label(win, text='Discount: %.3f' % (self.gamma)) self.gamma_label.grid(row=1, column=1) self.gamma_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementGamma(self.inc))) self.gamma_plus.grid(row=1, column=2, padx=10)
def setupAlphaButtonAndLabel(self, win)
-
Expand source code
def setupAlphaButtonAndLabel(self, win): self.alpha_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementAlpha(self.dec))) self.alpha_minus.grid(row=1, column=3, padx=10) self.alpha = self.sigmoid(self.al) self.alpha_label = tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha)) self.alpha_label.grid(row=1, column=4) self.alpha_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementAlpha(self.inc))) self.alpha_plus.grid(row=1, column=5, padx=10)
def setupEpsilonButtonAndLabel(self, win)
-
Expand source code
def setupEpsilonButtonAndLabel(self, win): self.epsilon_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementEpsilon(self.dec))) self.epsilon_minus.grid(row=0, column=3) self.epsilon = self.sigmoid(self.ep) self.epsilon_label = tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon)) self.epsilon_label.grid(row=0, column=4) self.epsilon_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementEpsilon(self.inc))) self.epsilon_plus.grid(row=0, column=5)
def setupSpeedButtonAndLabel(self, win)
-
Expand source code
def setupSpeedButtonAndLabel(self, win): self.speed_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementSpeed(0.5))) self.speed_minus.grid(row=0, column=0) self.speed_label = tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime)) self.speed_label.grid(row=0, column=1) self.speed_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementSpeed(2))) self.speed_plus.grid(row=0, column=2)
def sigmoid(self, x)
-
Expand source code
def sigmoid(self, x): return 1.0 / (1.0 + 2.0 ** (-x))
def skip5kSteps(self)
-
Expand source code
def skip5kSteps(self): self.stepsToSkip = 5000
def start(self)
-
Expand source code
def start(self): self.win.mainloop()
def step(self)
-
Expand source code
def step(self): self.stepCount += 1 state = self.robotEnvironment.getCurrentState() actions = self.robotEnvironment.getPossibleActions(state) if len(actions) == 0.0: self.robotEnvironment.reset() state = self.robotEnvironment.getCurrentState() actions = self.robotEnvironment.getPossibleActions(state) print('Reset!') action = self.learner.getAction(state) if (action is None): raise Exception('None action returned: Code Not Complete') nextState, reward = self.robotEnvironment.doAction(action) self.learner.observeTransition(state, action, nextState, reward)
class CrawlingRobot (canvas)
-
Expand source code
class CrawlingRobot(object): def setAngles(self, armAngle, handAngle): """ set the robot's arm and hand angles to the passed in values """ self.armAngle = armAngle self.handAngle = handAngle def getAngles(self): """ returns the pair of (armAngle, handAngle) """ return self.armAngle, self.handAngle def getRobotPosition(self): """ returns the (x, y) coordinates of the lower-left point of the robot """ return self.robotPos def moveArm(self, newArmAngle): """ move the robot arm to 'newArmAngle' """ if newArmAngle > self.maxArmAngle: raise Exception('Crawling Robot: Arm Raised too high. Careful!') if newArmAngle < self.minArmAngle: raise Exception('Crawling Robot: Arm Raised too low. Careful!') disp = self.displacement(self.armAngle, self.handAngle, newArmAngle, self.handAngle) curXPos = self.robotPos[0] self.robotPos = (curXPos + disp, self.robotPos[1]) self.armAngle = newArmAngle # Position and Velocity Sign Post self.positions.append(self.getRobotPosition()[0]) if len(self.positions) > 100: self.positions.pop(0) # self.angleSums.pop(0) def moveHand(self, newHandAngle): """ move the robot hand to 'newArmAngle' """ if newHandAngle > self.maxHandAngle: raise Exception('Crawling Robot: Hand Raised too high. Careful!') if newHandAngle < self.minHandAngle: raise Exception('Crawling Robot: Hand Raised too low. Careful!') disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle) curXPos = self.robotPos[0] self.robotPos = (curXPos + disp, self.robotPos[1]) self.handAngle = newHandAngle # Position and Velocity Sign Post self.positions.append(self.getRobotPosition()[0]) if len(self.positions) > 100: self.positions.pop(0) # self.angleSums.pop(0) def getMinAndMaxArmAngles(self): """ get the lower- and upper- bound for the arm angles returns (min, max) pair """ return self.minArmAngle, self.maxArmAngle def getMinAndMaxHandAngles(self): """ get the lower- and upper- bound for the hand angles returns (min, max) pair """ return self.minHandAngle, self.maxHandAngle def getRotationAngle(self): """ get the current angle the robot body is rotated off the ground """ armCos, armSin = self.__getCosAndSin(self.armAngle) handCos, handSin = self.__getCosAndSin(self.handAngle) x = self.armLength * armCos + self.handLength * handCos + self.robotWidth y = self.armLength * armSin + self.handLength * handSin + self.robotHeight if y < 0: return math.atan(-y / x) return 0.0 # You shouldn't need methods below here def __getCosAndSin(self, angle): return math.cos(angle), math.sin(angle) def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree): oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree) armCos, armSin = self.__getCosAndSin(armDegree) oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree) handCos, handSin = self.__getCosAndSin(handDegree) xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight x = self.armLength * armCos + self.handLength * handCos + self.robotWidth y = self.armLength * armSin + self.handLength * handSin + self.robotHeight if y < 0: if yOld <= 0: return math.sqrt(xOld * xOld + yOld * yOld) - math.sqrt(x * x + y * y) return (xOld - yOld * (x - xOld) / (y - yOld)) - math.sqrt(x * x + y * y) else: if yOld >= 0: return 0.0 return -(x - y * (xOld - x) / (yOld - y)) + math.sqrt(xOld * xOld + yOld * yOld) raise Exception('Never Should See This!') def draw(self, stepCount, stepDelay): x1, y1 = self.getRobotPosition() x1 = x1 % self.totWidth # Check Lower Still on the ground if y1 != self.groundY: raise Exception('Flying Robot!!') rotationAngle = self.getRotationAngle() cosRot, sinRot = self.__getCosAndSin(rotationAngle) x2 = x1 + self.robotWidth * cosRot y2 = y1 - self.robotWidth * sinRot x3 = x1 - self.robotHeight * sinRot y3 = y1 - self.robotHeight * cosRot x4 = x3 + cosRot * self.robotWidth y4 = y3 - sinRot * self.robotWidth self.canvas.coords(self.robotBody, x1, y1, x2, y2, x4, y4, x3, y3) armCos, armSin = self.__getCosAndSin(rotationAngle + self.armAngle) xArm = x4 + self.armLength * armCos yArm = y4 - self.armLength * armSin self.canvas.coords(self.robotArm, x4, y4, xArm, yArm) handCos, handSin = self.__getCosAndSin(self.handAngle + rotationAngle) xHand = xArm + self.handLength * handCos yHand = yArm - self.handLength * handSin self.canvas.coords(self.robotHand, xArm, yArm, xHand, yHand) # Position and Velocity Sign Post # time = len(self.positions) + 0.5 * sum(self.angleSums) # velocity = (self.positions[-1]-self.positions[0]) / time # if len(self.positions) == 1: return steps = (stepCount - self.lastStep) if (steps == 0): return # pos = self.positions[-1] # velocity = (pos - self.lastPos) / steps # g = .9 ** (10 * stepDelay) # g = .99 ** steps # self.velAvg = g * self.velAvg + (1 - g) * velocity # g = .999 ** steps # self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity pos = self.positions[-1] velocity = pos - self.positions[-2] vel2 = (pos - self.positions[0]) / len(self.positions) self.velAvg = .9 * self.velAvg + .1 * vel2 velMsg = '100-step Avg Velocity: %.2f' % self.velAvg # velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2 velocityMsg = 'Velocity: %.2f' % velocity positionMsg = 'Position: %2.f' % pos stepMsg = 'Step: %d' % stepCount if (self.vel_msg is not None): self.canvas.delete(self.vel_msg) self.canvas.delete(self.pos_msg) self.canvas.delete(self.step_msg) self.canvas.delete(self.velavg_msg) # self.canvas.delete(self.velavg2_msg) # self.velavg2_msg = self.canvas.create_text(850, 190, text=velMsg2) self.velavg_msg = self.canvas.create_text(650, 190, text=velMsg) self.vel_msg = self.canvas.create_text(450, 190, text=velocityMsg) self.pos_msg = self.canvas.create_text(250, 190, text=positionMsg) self.step_msg = self.canvas.create_text(50, 190, text=stepMsg) # self.lastPos = pos self.lastStep = stepCount # self.lastVel = velocity def __init__(self, canvas): # Canvas self.canvas = canvas self.velAvg = 0 # self.velAvg2 = 0 # self.lastPos = 0 self.lastStep = 0 # self.lastVel = 0 # Arm and Hand Degrees self.armAngle = self.oldArmDegree = 0.0 self.handAngle = self.oldHandDegree = -math.pi / 6 self.maxArmAngle = math.pi / 6 self.minArmAngle = -math.pi / 6 self.maxHandAngle = 0 self.minHandAngle = -(5.0 / 6.0) * math.pi # Draw Ground self.totWidth = canvas.winfo_reqwidth() self.totHeight = canvas.winfo_reqheight() self.groundHeight = 40 self.groundY = self.totHeight - self.groundHeight self.ground = canvas.create_rectangle(0, self.groundY, self.totWidth, self.totHeight, fill = 'blue') # Robot Body self.robotWidth = 80 self.robotHeight = 40 self.robotPos = (20, self.groundY) self.robotBody = canvas.create_polygon(0, 0, 0, 0, 0, 0, 0, 0, fill='green') # Robot Arm self.armLength = 60 self.robotArm = canvas.create_line(0, 0, 0, 0, fill='orange', width=5) # Robot Hand self.handLength = 40 self.robotHand = canvas.create_line(0, 0, 0, 0, fill='red', width=3) self.positions = [0, 0] # self.angleSums = [0, 0] self.vel_msg = None self.velavg_msg = None self.pos_msg = None self.step_msg = None
Methods
def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree)
-
Expand source code
def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree): oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree) armCos, armSin = self.__getCosAndSin(armDegree) oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree) handCos, handSin = self.__getCosAndSin(handDegree) xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight x = self.armLength * armCos + self.handLength * handCos + self.robotWidth y = self.armLength * armSin + self.handLength * handSin + self.robotHeight if y < 0: if yOld <= 0: return math.sqrt(xOld * xOld + yOld * yOld) - math.sqrt(x * x + y * y) return (xOld - yOld * (x - xOld) / (y - yOld)) - math.sqrt(x * x + y * y) else: if yOld >= 0: return 0.0 return -(x - y * (xOld - x) / (yOld - y)) + math.sqrt(xOld * xOld + yOld * yOld) raise Exception('Never Should See This!')
def draw(self, stepCount, stepDelay)
-
Expand source code
def draw(self, stepCount, stepDelay): x1, y1 = self.getRobotPosition() x1 = x1 % self.totWidth # Check Lower Still on the ground if y1 != self.groundY: raise Exception('Flying Robot!!') rotationAngle = self.getRotationAngle() cosRot, sinRot = self.__getCosAndSin(rotationAngle) x2 = x1 + self.robotWidth * cosRot y2 = y1 - self.robotWidth * sinRot x3 = x1 - self.robotHeight * sinRot y3 = y1 - self.robotHeight * cosRot x4 = x3 + cosRot * self.robotWidth y4 = y3 - sinRot * self.robotWidth self.canvas.coords(self.robotBody, x1, y1, x2, y2, x4, y4, x3, y3) armCos, armSin = self.__getCosAndSin(rotationAngle + self.armAngle) xArm = x4 + self.armLength * armCos yArm = y4 - self.armLength * armSin self.canvas.coords(self.robotArm, x4, y4, xArm, yArm) handCos, handSin = self.__getCosAndSin(self.handAngle + rotationAngle) xHand = xArm + self.handLength * handCos yHand = yArm - self.handLength * handSin self.canvas.coords(self.robotHand, xArm, yArm, xHand, yHand) # Position and Velocity Sign Post # time = len(self.positions) + 0.5 * sum(self.angleSums) # velocity = (self.positions[-1]-self.positions[0]) / time # if len(self.positions) == 1: return steps = (stepCount - self.lastStep) if (steps == 0): return # pos = self.positions[-1] # velocity = (pos - self.lastPos) / steps # g = .9 ** (10 * stepDelay) # g = .99 ** steps # self.velAvg = g * self.velAvg + (1 - g) * velocity # g = .999 ** steps # self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity pos = self.positions[-1] velocity = pos - self.positions[-2] vel2 = (pos - self.positions[0]) / len(self.positions) self.velAvg = .9 * self.velAvg + .1 * vel2 velMsg = '100-step Avg Velocity: %.2f' % self.velAvg # velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2 velocityMsg = 'Velocity: %.2f' % velocity positionMsg = 'Position: %2.f' % pos stepMsg = 'Step: %d' % stepCount if (self.vel_msg is not None): self.canvas.delete(self.vel_msg) self.canvas.delete(self.pos_msg) self.canvas.delete(self.step_msg) self.canvas.delete(self.velavg_msg) # self.canvas.delete(self.velavg2_msg) # self.velavg2_msg = self.canvas.create_text(850, 190, text=velMsg2) self.velavg_msg = self.canvas.create_text(650, 190, text=velMsg) self.vel_msg = self.canvas.create_text(450, 190, text=velocityMsg) self.pos_msg = self.canvas.create_text(250, 190, text=positionMsg) self.step_msg = self.canvas.create_text(50, 190, text=stepMsg) # self.lastPos = pos self.lastStep = stepCount # self.lastVel = velocity
def getAngles(self)
-
returns the pair of (armAngle, handAngle)
Expand source code
def getAngles(self): """ returns the pair of (armAngle, handAngle) """ return self.armAngle, self.handAngle
def getMinAndMaxArmAngles(self)
-
get the lower- and upper- bound for the arm angles returns (min, max) pair
Expand source code
def getMinAndMaxArmAngles(self): """ get the lower- and upper- bound for the arm angles returns (min, max) pair """ return self.minArmAngle, self.maxArmAngle
def getMinAndMaxHandAngles(self)
-
get the lower- and upper- bound for the hand angles returns (min, max) pair
Expand source code
def getMinAndMaxHandAngles(self): """ get the lower- and upper- bound for the hand angles returns (min, max) pair """ return self.minHandAngle, self.maxHandAngle
def getRobotPosition(self)
-
returns the (x, y) coordinates of the lower-left point of the robot
Expand source code
def getRobotPosition(self): """ returns the (x, y) coordinates of the lower-left point of the robot """ return self.robotPos
def getRotationAngle(self)
-
get the current angle the robot body is rotated off the ground
Expand source code
def getRotationAngle(self): """ get the current angle the robot body is rotated off the ground """ armCos, armSin = self.__getCosAndSin(self.armAngle) handCos, handSin = self.__getCosAndSin(self.handAngle) x = self.armLength * armCos + self.handLength * handCos + self.robotWidth y = self.armLength * armSin + self.handLength * handSin + self.robotHeight if y < 0: return math.atan(-y / x) return 0.0
def moveArm(self, newArmAngle)
-
move the robot arm to 'newArmAngle'
Expand source code
def moveArm(self, newArmAngle): """ move the robot arm to 'newArmAngle' """ if newArmAngle > self.maxArmAngle: raise Exception('Crawling Robot: Arm Raised too high. Careful!') if newArmAngle < self.minArmAngle: raise Exception('Crawling Robot: Arm Raised too low. Careful!') disp = self.displacement(self.armAngle, self.handAngle, newArmAngle, self.handAngle) curXPos = self.robotPos[0] self.robotPos = (curXPos + disp, self.robotPos[1]) self.armAngle = newArmAngle # Position and Velocity Sign Post self.positions.append(self.getRobotPosition()[0]) if len(self.positions) > 100: self.positions.pop(0) # self.angleSums.pop(0)
def moveHand(self, newHandAngle)
-
move the robot hand to 'newArmAngle'
Expand source code
def moveHand(self, newHandAngle): """ move the robot hand to 'newArmAngle' """ if newHandAngle > self.maxHandAngle: raise Exception('Crawling Robot: Hand Raised too high. Careful!') if newHandAngle < self.minHandAngle: raise Exception('Crawling Robot: Hand Raised too low. Careful!') disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle) curXPos = self.robotPos[0] self.robotPos = (curXPos + disp, self.robotPos[1]) self.handAngle = newHandAngle # Position and Velocity Sign Post self.positions.append(self.getRobotPosition()[0]) if len(self.positions) > 100: self.positions.pop(0) # self.angleSums.pop(0)
def setAngles(self, armAngle, handAngle)
-
set the robot's arm and hand angles to the passed in values
Expand source code
def setAngles(self, armAngle, handAngle): """ set the robot's arm and hand angles to the passed in values """ self.armAngle = armAngle self.handAngle = handAngle
class CrawlingRobotEnvironment (crawlingRobot)
-
A GUI display for crawler.
Expand source code
class CrawlingRobotEnvironment(Environment): """ A GUI display for crawler. """ def __init__(self, crawlingRobot): self.crawlingRobot = crawlingRobot # The state is of the form (armAngle, handAngle) # where the angles are bucket numbers, not actual # degree measurements self.state = None self.nArmStates = 9 self.nHandStates = 13 # create a list of arm buckets and hand buckets to # discretize the state space minArmAngle, maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles() minHandAngle, maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles() armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates - 1) handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates - 1) self.armBuckets = [minArmAngle + (armIncrement * i) for i in range(self.nArmStates)] self.handBuckets = [minHandAngle + (handIncrement * i) for i in range(self.nHandStates)] # Reset self.reset() def getCurrentState(self): """ Return the current state of the crawling robot. """ return self.state def getPossibleActions(self, state): """ Returns possible actions for the states in the current state. """ actions = list() currArmBucket, currHandBucket = state if currArmBucket > 0: actions.append('arm-down') if currArmBucket < self.nArmStates - 1: actions.append('arm-up') if currHandBucket > 0: actions.append('hand-down') if currHandBucket < self.nHandStates - 1: actions.append('hand-up') return actions def doAction(self, action): """ Perform the action and update the current state of the Environment and return the reward for the current state, the next state and the taken action. Returns: nextState, reward """ nextState, reward = None, None oldX, oldY = self.crawlingRobot.getRobotPosition() armBucket, handBucket = self.state armAngle, handAngle = self.crawlingRobot.getAngles() if action == 'arm-up': newArmAngle = self.armBuckets[armBucket + 1] self.crawlingRobot.moveArm(newArmAngle) nextState = (armBucket + 1, handBucket) if action == 'arm-down': newArmAngle = self.armBuckets[armBucket - 1] self.crawlingRobot.moveArm(newArmAngle) nextState = (armBucket - 1, handBucket) if action == 'hand-up': newHandAngle = self.handBuckets[handBucket + 1] self.crawlingRobot.moveHand(newHandAngle) nextState = (armBucket, handBucket + 1) if action == 'hand-down': newHandAngle = self.handBuckets[handBucket - 1] self.crawlingRobot.moveHand(newHandAngle) nextState = (armBucket, handBucket - 1) newX, newY = self.crawlingRobot.getRobotPosition() # a simple reward function reward = newX - oldX self.state = nextState return nextState, reward def reset(self): """ Resets the Environment to the initial state """ # Initialize the state to be the middle # value for each parameter e.g. if there are 13 and 19 # buckets for the arm and hand parameters, then the intial # state should be (6, 9) # Also call self.crawlingRobot.setAngles() # to the initial arm and hand angle armState = int(self.nArmStates / 2) handState = int(self.nHandStates / 2) self.state = armState, handState self.crawlingRobot.setAngles(self.armBuckets[armState], self.handBuckets[handState]) self.crawlingRobot.positions = [20, self.crawlingRobot.getRobotPosition()[0]]
Ancestors
- Environment
- abc.ABC
Methods
def doAction(self, action)
-
Perform the action and update the current state of the Environment and return the reward for the current state, the next state and the taken action.
Returns
nextState
,reward
Expand source code
def doAction(self, action): """ Perform the action and update the current state of the Environment and return the reward for the current state, the next state and the taken action. Returns: nextState, reward """ nextState, reward = None, None oldX, oldY = self.crawlingRobot.getRobotPosition() armBucket, handBucket = self.state armAngle, handAngle = self.crawlingRobot.getAngles() if action == 'arm-up': newArmAngle = self.armBuckets[armBucket + 1] self.crawlingRobot.moveArm(newArmAngle) nextState = (armBucket + 1, handBucket) if action == 'arm-down': newArmAngle = self.armBuckets[armBucket - 1] self.crawlingRobot.moveArm(newArmAngle) nextState = (armBucket - 1, handBucket) if action == 'hand-up': newHandAngle = self.handBuckets[handBucket + 1] self.crawlingRobot.moveHand(newHandAngle) nextState = (armBucket, handBucket + 1) if action == 'hand-down': newHandAngle = self.handBuckets[handBucket - 1] self.crawlingRobot.moveHand(newHandAngle) nextState = (armBucket, handBucket - 1) newX, newY = self.crawlingRobot.getRobotPosition() # a simple reward function reward = newX - oldX self.state = nextState return nextState, reward
def getCurrentState(self)
-
Return the current state of the crawling robot.
Expand source code
def getCurrentState(self): """ Return the current state of the crawling robot. """ return self.state
def getPossibleActions(self, state)
-
Returns possible actions for the states in the current state.
Expand source code
def getPossibleActions(self, state): """ Returns possible actions for the states in the current state. """ actions = list() currArmBucket, currHandBucket = state if currArmBucket > 0: actions.append('arm-down') if currArmBucket < self.nArmStates - 1: actions.append('arm-up') if currHandBucket > 0: actions.append('hand-down') if currHandBucket < self.nHandStates - 1: actions.append('hand-up') return actions
def isTerminal(self)
-
Inherited from:
Environment
.isTerminal
Has the enviornment entered a terminal state? This means there are no successors.
def reset(self)
-
Resets the Environment to the initial state
Expand source code
def reset(self): """ Resets the Environment to the initial state """ # Initialize the state to be the middle # value for each parameter e.g. if there are 13 and 19 # buckets for the arm and hand parameters, then the intial # state should be (6, 9) # Also call self.crawlingRobot.setAngles() # to the initial arm and hand angle armState = int(self.nArmStates / 2) handState = int(self.nHandStates / 2) self.state = armState, handState self.crawlingRobot.setAngles(self.armBuckets[armState], self.handBuckets[handState]) self.crawlingRobot.positions = [20, self.crawlingRobot.getRobotPosition()[0]]