colosseum.agent.actors.random
1from typing import TYPE_CHECKING 2 3import dm_env 4 5from colosseum.agent.actors import BaseActor 6from colosseum.utils.acme.specs import DiscreteArray, MDPSpec 7 8if TYPE_CHECKING: 9 from colosseum.mdp import ACTION_TYPE 10 11 12class RandomActor(BaseActor): 13 """ 14 The `RandomActor` component acts uniformly randomly. 15 """ 16 17 def __init__(self, seed: int, mdp_specs: MDPSpec, cache_size=50_000): 18 """ 19 Parameters 20 ---------- 21 seed : int 22 The random seed. 23 mdp_specs : MDPSpec 24 The full specification of the MDP. 25 cache_size : int 26 The cache size for the randomly sampled actions. 27 """ 28 29 super(RandomActor, self).__init__(seed, mdp_specs) 30 31 self._cached_actions = [] 32 self._cache_size = cache_size 33 34 def _fill_cache(self): 35 if type(self._mdp_spec.actions) == DiscreteArray: 36 self._cached_actions = self._rng.randint( 37 0, self._mdp_spec.actions.num_values, self._cache_size 38 ).tolist() 39 else: 40 raise NotImplementedError( 41 "The random actor has been implemented only for discrete action spaces." 42 ) 43 44 def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE": 45 if len(self._cached_actions) == 0: 46 self._fill_cache() 47 return self._cached_actions.pop(0)
13class RandomActor(BaseActor): 14 """ 15 The `RandomActor` component acts uniformly randomly. 16 """ 17 18 def __init__(self, seed: int, mdp_specs: MDPSpec, cache_size=50_000): 19 """ 20 Parameters 21 ---------- 22 seed : int 23 The random seed. 24 mdp_specs : MDPSpec 25 The full specification of the MDP. 26 cache_size : int 27 The cache size for the randomly sampled actions. 28 """ 29 30 super(RandomActor, self).__init__(seed, mdp_specs) 31 32 self._cached_actions = [] 33 self._cache_size = cache_size 34 35 def _fill_cache(self): 36 if type(self._mdp_spec.actions) == DiscreteArray: 37 self._cached_actions = self._rng.randint( 38 0, self._mdp_spec.actions.num_values, self._cache_size 39 ).tolist() 40 else: 41 raise NotImplementedError( 42 "The random actor has been implemented only for discrete action spaces." 43 ) 44 45 def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE": 46 if len(self._cached_actions) == 0: 47 self._fill_cache() 48 return self._cached_actions.pop(0)
The RandomActor
component acts uniformly randomly.
RandomActor( seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, cache_size=50000)
18 def __init__(self, seed: int, mdp_specs: MDPSpec, cache_size=50_000): 19 """ 20 Parameters 21 ---------- 22 seed : int 23 The random seed. 24 mdp_specs : MDPSpec 25 The full specification of the MDP. 26 cache_size : int 27 The cache size for the randomly sampled actions. 28 """ 29 30 super(RandomActor, self).__init__(seed, mdp_specs) 31 32 self._cached_actions = [] 33 self._cache_size = cache_size
Parameters
- seed (int): The random seed.
- mdp_specs (MDPSpec): The full specification of the MDP.
- cache_size (int): The cache size for the randomly sampled actions.
def
select_action( self, ts: dm_env._environment.TimeStep, time: int) -> Union[int, float, numpy.ndarray]:
45 def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE": 46 if len(self._cached_actions) == 0: 47 self._fill_cache() 48 return self._cached_actions.pop(0)
Parameters
- ts (dm_env.TimeStep): The TimeStep for which the agent is required to calculate the next action.
- time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.
Returns
- action (ACTION_TYPE): The action that the agent suggests to take given the observation and the time step.