colosseum.agent.actors.random

View Source

 1from typing import TYPE_CHECKING
 2
 3import dm_env
 4
 5from colosseum.agent.actors import BaseActor
 6from colosseum.utils.acme.specs import DiscreteArray, MDPSpec
 7
 8if TYPE_CHECKING:
 9    from colosseum.mdp import ACTION_TYPE
10
11
12class RandomActor(BaseActor):
13    """
14    The `RandomActor` component acts uniformly randomly.
15    """
16
17    def __init__(self, seed: int, mdp_specs: MDPSpec, cache_size=50_000):
18        """
19        Parameters
20        ----------
21        seed : int
22            The random seed.
23        mdp_specs : MDPSpec
24            The full specification of the MDP.
25        cache_size : int
26            The cache size for the randomly sampled actions.
27        """
28
29        super(RandomActor, self).__init__(seed, mdp_specs)
30
31        self._cached_actions = []
32        self._cache_size = cache_size
33
34    def _fill_cache(self):
35        if type(self._mdp_spec.actions) == DiscreteArray:
36            self._cached_actions = self._rng.randint(
37                0, self._mdp_spec.actions.num_values, self._cache_size
38            ).tolist()
39        else:
40            raise NotImplementedError(
41                "The random actor has been implemented only for discrete action spaces."
42            )
43
44    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
45        if len(self._cached_actions) == 0:
46            self._fill_cache()
47        return self._cached_actions.pop(0)

class RandomActor(colosseum.agent.actors.base.BaseActor): View Source

13class RandomActor(BaseActor):
14    """
15    The `RandomActor` component acts uniformly randomly.
16    """
17
18    def __init__(self, seed: int, mdp_specs: MDPSpec, cache_size=50_000):
19        """
20        Parameters
21        ----------
22        seed : int
23            The random seed.
24        mdp_specs : MDPSpec
25            The full specification of the MDP.
26        cache_size : int
27            The cache size for the randomly sampled actions.
28        """
29
30        super(RandomActor, self).__init__(seed, mdp_specs)
31
32        self._cached_actions = []
33        self._cache_size = cache_size
34
35    def _fill_cache(self):
36        if type(self._mdp_spec.actions) == DiscreteArray:
37            self._cached_actions = self._rng.randint(
38                0, self._mdp_spec.actions.num_values, self._cache_size
39            ).tolist()
40        else:
41            raise NotImplementedError(
42                "The random actor has been implemented only for discrete action spaces."
43            )
44
45    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
46        if len(self._cached_actions) == 0:
47            self._fill_cache()
48        return self._cached_actions.pop(0)

The RandomActor component acts uniformly randomly.

RandomActor( seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, cache_size=50000) View Source

18    def __init__(self, seed: int, mdp_specs: MDPSpec, cache_size=50_000):
19        """
20        Parameters
21        ----------
22        seed : int
23            The random seed.
24        mdp_specs : MDPSpec
25            The full specification of the MDP.
26        cache_size : int
27            The cache size for the randomly sampled actions.
28        """
29
30        super(RandomActor, self).__init__(seed, mdp_specs)
31
32        self._cached_actions = []
33        self._cache_size = cache_size

Parameters

seed (int): The random seed.
mdp_specs (MDPSpec): The full specification of the MDP.
cache_size (int): The cache size for the randomly sampled actions.

def select_action( self, ts: dm_env._environment.TimeStep, time: int) -> Union[int, float, numpy.ndarray]: View Source

45    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
46        if len(self._cached_actions) == 0:
47            self._fill_cache()
48        return self._cached_actions.pop(0)

Parameters

ts (dm_env.TimeStep): The TimeStep for which the agent is required to calculate the next action.
time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.

Returns

action (ACTION_TYPE): The action that the agent suggests to take given the observation and the time step.