colosseum.agent.actors.base
1import abc 2import random 3from typing import TYPE_CHECKING 4 5import dm_env 6import numpy as np 7 8from colosseum.utils.acme.specs import MDPSpec 9 10if TYPE_CHECKING: 11 from colosseum.mdp import ACTION_TYPE 12 13 14class BaseActor(abc.ABC): 15 """ 16 The `BaseActor` class is the abstract class for the actor component of a reinforcement learning agent, which 17 handles the interactions with the MDPs. 18 """ 19 20 @abc.abstractmethod 21 def __init__(self, seed: int, mdp_specs: MDPSpec): 22 """ 23 Parameters 24 ---------- 25 seed : int 26 The random seed. 27 mdp_specs : MDPSpec 28 The full specification of the MDP. 29 """ 30 self._mdp_spec = mdp_specs 31 self._seed = seed 32 33 self._rng = np.random.RandomState(seed) 34 self._rng_fast = random.Random(seed) 35 36 @abc.abstractmethod 37 def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE": 38 """ 39 Parameters 40 ---------- 41 ts : dm_env.TimeStep 42 The TimeStep for which the agent is required to calculate the next action. 43 time : int 44 The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in 45 the continuous case this refers to the total number of previous interactions. 46 47 Returns 48 ------- 49 action : ACTION_TYPE 50 The action that the agent suggests to take given the observation and the time step. 51 """
class
BaseActor(abc.ABC):
15class BaseActor(abc.ABC): 16 """ 17 The `BaseActor` class is the abstract class for the actor component of a reinforcement learning agent, which 18 handles the interactions with the MDPs. 19 """ 20 21 @abc.abstractmethod 22 def __init__(self, seed: int, mdp_specs: MDPSpec): 23 """ 24 Parameters 25 ---------- 26 seed : int 27 The random seed. 28 mdp_specs : MDPSpec 29 The full specification of the MDP. 30 """ 31 self._mdp_spec = mdp_specs 32 self._seed = seed 33 34 self._rng = np.random.RandomState(seed) 35 self._rng_fast = random.Random(seed) 36 37 @abc.abstractmethod 38 def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE": 39 """ 40 Parameters 41 ---------- 42 ts : dm_env.TimeStep 43 The TimeStep for which the agent is required to calculate the next action. 44 time : int 45 The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in 46 the continuous case this refers to the total number of previous interactions. 47 48 Returns 49 ------- 50 action : ACTION_TYPE 51 The action that the agent suggests to take given the observation and the time step. 52 """
The BaseActor
class is the abstract class for the actor component of a reinforcement learning agent, which
handles the interactions with the MDPs.
21 @abc.abstractmethod 22 def __init__(self, seed: int, mdp_specs: MDPSpec): 23 """ 24 Parameters 25 ---------- 26 seed : int 27 The random seed. 28 mdp_specs : MDPSpec 29 The full specification of the MDP. 30 """ 31 self._mdp_spec = mdp_specs 32 self._seed = seed 33 34 self._rng = np.random.RandomState(seed) 35 self._rng_fast = random.Random(seed)
Parameters
- seed (int): The random seed.
- mdp_specs (MDPSpec): The full specification of the MDP.
@abc.abstractmethod
def
select_action( self, ts: dm_env._environment.TimeStep, time: int) -> Union[int, float, numpy.ndarray]:
37 @abc.abstractmethod 38 def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE": 39 """ 40 Parameters 41 ---------- 42 ts : dm_env.TimeStep 43 The TimeStep for which the agent is required to calculate the next action. 44 time : int 45 The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in 46 the continuous case this refers to the total number of previous interactions. 47 48 Returns 49 ------- 50 action : ACTION_TYPE 51 The action that the agent suggests to take given the observation and the time step. 52 """
Parameters
- ts (dm_env.TimeStep): The TimeStep for which the agent is required to calculate the next action.
- time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.
Returns
- action (ACTION_TYPE): The action that the agent suggests to take given the observation and the time step.