Module pacai.agents.learning.value

Expand source code
import abc

from pacai.agents.base import BaseAgent

class ValueEstimationAgent(BaseAgent):
    """
    An abstract agent which assigns Q-values to (state, action) pairs.
    The best values and policies are estimated by:
    ```
    V(state) = max_{action in actions} Q(state ,action)
    policy(state) = arg_max_{action in actions} Q(state, action)
    ```
    """

    def __init__(self, index, alpha = 1.0, epsilon = 0.05,
            gamma = 0.8, numTraining = 10, **kwargs):
        """
        Args:
            alpha: The learning rate.
            epsilon: The exploration rate.
            gamma: The discount factor.
            numTraining: The number of training episodes.
        """

        super().__init__(index, **kwargs)

        self.alpha = float(alpha)
        self.epsilon = float(epsilon)
        self.discountRate = float(gamma)
        self.numTraining = int(numTraining)

    @abc.abstractmethod
    def getQValue(self, state, action):
        """
        Should return Q(state,action).
        """

        pass

    @abc.abstractmethod
    def getValue(self, state):
        """
        What is the value of this state under the best action?
        Concretely, this is given by:
        ```
        V(state) = max_{action in actions} Q(state ,action)
        ```
        """

        pass

    @abc.abstractmethod
    def getPolicy(self, state):
        """
        What is the best action to take in the state?
        Note that because we might want to explore,
        this might not coincide with `ValueEstimationAgent.getAction`.
        Concretely, this is given by:
        ```
        policy(state) = arg_max_{action in actions} Q(state, action)
        ```
        If many actions achieve the maximal Q-value,
        it doesn't matter which is selected.
        """

        pass

Classes

class ValueEstimationAgent (index, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining=10, **kwargs)

An abstract agent which assigns Q-values to (state, action) pairs. The best values and policies are estimated by:

V(state) = max_{action in actions} Q(state ,action)
policy(state) = arg_max_{action in actions} Q(state, action)

Args

alpha
The learning rate.
epsilon
The exploration rate.
gamma
The discount factor.
numTraining
The number of training episodes.
Expand source code
class ValueEstimationAgent(BaseAgent):
    """
    An abstract agent which assigns Q-values to (state, action) pairs.
    The best values and policies are estimated by:
    ```
    V(state) = max_{action in actions} Q(state ,action)
    policy(state) = arg_max_{action in actions} Q(state, action)
    ```
    """

    def __init__(self, index, alpha = 1.0, epsilon = 0.05,
            gamma = 0.8, numTraining = 10, **kwargs):
        """
        Args:
            alpha: The learning rate.
            epsilon: The exploration rate.
            gamma: The discount factor.
            numTraining: The number of training episodes.
        """

        super().__init__(index, **kwargs)

        self.alpha = float(alpha)
        self.epsilon = float(epsilon)
        self.discountRate = float(gamma)
        self.numTraining = int(numTraining)

    @abc.abstractmethod
    def getQValue(self, state, action):
        """
        Should return Q(state,action).
        """

        pass

    @abc.abstractmethod
    def getValue(self, state):
        """
        What is the value of this state under the best action?
        Concretely, this is given by:
        ```
        V(state) = max_{action in actions} Q(state ,action)
        ```
        """

        pass

    @abc.abstractmethod
    def getPolicy(self, state):
        """
        What is the best action to take in the state?
        Note that because we might want to explore,
        this might not coincide with `ValueEstimationAgent.getAction`.
        Concretely, this is given by:
        ```
        policy(state) = arg_max_{action in actions} Q(state, action)
        ```
        If many actions achieve the maximal Q-value,
        it doesn't matter which is selected.
        """

        pass

Ancestors

Subclasses

Static methods

def loadAgent(name, index, args={})

Inherited from: BaseAgent.loadAgent

Load an agent with the given class name. The name can be fully qualified or just the bare class name. If the bare name is given, the class should …

Methods

def final(self, state)

Inherited from: BaseAgent.final

Inform the agent about the result of a game.

def getAction(self, state)

Inherited from: BaseAgent.getAction

The BaseAgent will receive an AbstractGameState, and must return an action from Directions.

def getPolicy(self, state)

What is the best action to take in the state? Note that because we might want to explore, this might not coincide with BaseAgent.getAction(). Concretely, this is given by:

policy(state) = arg_max_{action in actions} Q(state, action)

If many actions achieve the maximal Q-value, it doesn't matter which is selected.

Expand source code
@abc.abstractmethod
def getPolicy(self, state):
    """
    What is the best action to take in the state?
    Note that because we might want to explore,
    this might not coincide with `ValueEstimationAgent.getAction`.
    Concretely, this is given by:
    ```
    policy(state) = arg_max_{action in actions} Q(state, action)
    ```
    If many actions achieve the maximal Q-value,
    it doesn't matter which is selected.
    """

    pass
def getQValue(self, state, action)

Should return Q(state,action).

Expand source code
@abc.abstractmethod
def getQValue(self, state, action):
    """
    Should return Q(state,action).
    """

    pass
def getValue(self, state)

What is the value of this state under the best action? Concretely, this is given by:

V(state) = max_{action in actions} Q(state ,action)
Expand source code
@abc.abstractmethod
def getValue(self, state):
    """
    What is the value of this state under the best action?
    Concretely, this is given by:
    ```
    V(state) = max_{action in actions} Q(state ,action)
    ```
    """

    pass
def observationFunction(self, state)

Inherited from: BaseAgent.observationFunction

Make an observation on the state of the game. Called once for each round of the game.

def registerInitialState(self, state)

Inherited from: BaseAgent.registerInitialState

Inspect the starting state.