colosseum.agent.actors.base

 1import abc
 2import random
 3from typing import TYPE_CHECKING
 4
 5import dm_env
 6import numpy as np
 7
 8from colosseum.utils.acme.specs import MDPSpec
 9
10if TYPE_CHECKING:
11    from colosseum.mdp import ACTION_TYPE
12
13
14class BaseActor(abc.ABC):
15    """
16    The `BaseActor` class is the abstract class for the actor component of a reinforcement learning agent, which
17    handles the interactions with the MDPs.
18    """
19
20    @abc.abstractmethod
21    def __init__(self, seed: int, mdp_specs: MDPSpec):
22        """
23        Parameters
24        ----------
25        seed : int
26            The random seed.
27        mdp_specs : MDPSpec
28            The full specification of the MDP.
29        """
30        self._mdp_spec = mdp_specs
31        self._seed = seed
32
33        self._rng = np.random.RandomState(seed)
34        self._rng_fast = random.Random(seed)
35
36    @abc.abstractmethod
37    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
38        """
39        Parameters
40        ----------
41        ts : dm_env.TimeStep
42            The TimeStep for which the agent is required to calculate the next action.
43        time : int
44            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
45            the continuous case this refers to the total number of previous interactions.
46
47        Returns
48        -------
49        action : ACTION_TYPE
50            The action that the agent suggests to take given the observation and the time step.
51        """
class BaseActor(abc.ABC):
15class BaseActor(abc.ABC):
16    """
17    The `BaseActor` class is the abstract class for the actor component of a reinforcement learning agent, which
18    handles the interactions with the MDPs.
19    """
20
21    @abc.abstractmethod
22    def __init__(self, seed: int, mdp_specs: MDPSpec):
23        """
24        Parameters
25        ----------
26        seed : int
27            The random seed.
28        mdp_specs : MDPSpec
29            The full specification of the MDP.
30        """
31        self._mdp_spec = mdp_specs
32        self._seed = seed
33
34        self._rng = np.random.RandomState(seed)
35        self._rng_fast = random.Random(seed)
36
37    @abc.abstractmethod
38    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
39        """
40        Parameters
41        ----------
42        ts : dm_env.TimeStep
43            The TimeStep for which the agent is required to calculate the next action.
44        time : int
45            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
46            the continuous case this refers to the total number of previous interactions.
47
48        Returns
49        -------
50        action : ACTION_TYPE
51            The action that the agent suggests to take given the observation and the time step.
52        """

The BaseActor class is the abstract class for the actor component of a reinforcement learning agent, which handles the interactions with the MDPs.

@abc.abstractmethod
BaseActor(seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec)
21    @abc.abstractmethod
22    def __init__(self, seed: int, mdp_specs: MDPSpec):
23        """
24        Parameters
25        ----------
26        seed : int
27            The random seed.
28        mdp_specs : MDPSpec
29            The full specification of the MDP.
30        """
31        self._mdp_spec = mdp_specs
32        self._seed = seed
33
34        self._rng = np.random.RandomState(seed)
35        self._rng_fast = random.Random(seed)
Parameters
  • seed (int): The random seed.
  • mdp_specs (MDPSpec): The full specification of the MDP.
@abc.abstractmethod
def select_action( self, ts: dm_env._environment.TimeStep, time: int) -> Union[int, float, numpy.ndarray]:
37    @abc.abstractmethod
38    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
39        """
40        Parameters
41        ----------
42        ts : dm_env.TimeStep
43            The TimeStep for which the agent is required to calculate the next action.
44        time : int
45            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
46            the continuous case this refers to the total number of previous interactions.
47
48        Returns
49        -------
50        action : ACTION_TYPE
51            The action that the agent suggests to take given the observation and the time step.
52        """
Parameters
  • ts (dm_env.TimeStep): The TimeStep for which the agent is required to calculate the next action.
  • time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.
Returns
  • action (ACTION_TYPE): The action that the agent suggests to take given the observation and the time step.