colosseum.agent.agents.random

View Source

  1import abc
  2from typing import Any, Dict, TYPE_CHECKING
  3
  4import numpy as np
  5from ray import tune
  6
  7from colosseum.agent.actors import RandomActor
  8from colosseum.agent.agents.base import BaseAgent
  9from colosseum.utils.acme.specs import DiscreteArray, MDPSpec
 10
 11if TYPE_CHECKING:
 12    from colosseum.emission_maps import EmissionMap
 13
 14
 15class RandomAgent(BaseAgent, abc.ABC):
 16    """
 17    The `RandomAgent` implements a uniformly randomly acting agent.
 18    """
 19
 20    @staticmethod
 21    def is_emission_map_accepted(emission_map: "EmissionMap") -> bool:
 22        return True
 23
 24    @staticmethod
 25    def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]:
 26        return {}
 27
 28    @staticmethod
 29    def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0):
 30        return ""
 31
 32    @property
 33    def current_optimal_stochastic_policy(self) -> np.ndarray:
 34        return self._policy
 35
 36    def __init__(self, seed: int, mdp_specs: MDPSpec):
 37        """
 38        Parameters
 39        ----------
 40        seed : int
 41            The random seed.
 42        mdp_specs : MDPSpec
 43            The full specification of the MDP.
 44        """
 45        super(RandomAgent, self).__init__(
 46            seed,
 47            mdp_specs,
 48            mdp_model=None,
 49            actor=RandomActor(seed, mdp_specs),
 50            optimization_horizon=0,
 51        )
 52
 53        if type(self._mdp_spec.observations) == DiscreteArray:
 54            if type(self._mdp_spec.actions) == DiscreteArray:
 55                n_s = self._mdp_spec.observations.num_values
 56                n_a = self._mdp_spec.actions.num_values
 57
 58                self._policy = (
 59                    np.ones(
 60                        (n_s, n_a)
 61                        if self._time_horizon == np.inf
 62                        else (self._time_horizon, n_s, n_a)
 63                    )
 64                    / n_a
 65                )
 66        else:
 67            raise NotImplementedError(
 68                "The RandomAgent is implemented for discrete MDP only."
 69            )
 70
 71    def episode_end_update(self):
 72        pass
 73
 74    def before_start_interacting(self):
 75        pass
 76
 77
 78class RandomAgentEpisodic(RandomAgent):
 79    @staticmethod
 80    def get_agent_instance_from_parameters(
 81        seed: int,
 82        optimization_horizon: int,
 83        mdp_specs: MDPSpec,
 84        parameters: Dict[str, Any],
 85    ) -> "BaseAgent":
 86        return RandomAgentEpisodic(seed, mdp_specs)
 87
 88    @staticmethod
 89    def is_episodic() -> bool:
 90        return True
 91
 92
 93class RandomAgentContinuous(RandomAgent):
 94    @staticmethod
 95    def get_agent_instance_from_parameters(
 96        seed: int,
 97        optimization_horizon: int,
 98        mdp_specs: MDPSpec,
 99        parameters: Dict[str, Any],
100    ) -> "BaseAgent":
101        return RandomAgentContinuous(seed, mdp_specs)
102
103    @staticmethod
104    def is_episodic() -> bool:
105        return False

class RandomAgent(colosseum.agent.agents.base.BaseAgent, abc.ABC): View Source

16class RandomAgent(BaseAgent, abc.ABC):
17    """
18    The `RandomAgent` implements a uniformly randomly acting agent.
19    """
20
21    @staticmethod
22    def is_emission_map_accepted(emission_map: "EmissionMap") -> bool:
23        return True
24
25    @staticmethod
26    def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]:
27        return {}
28
29    @staticmethod
30    def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0):
31        return ""
32
33    @property
34    def current_optimal_stochastic_policy(self) -> np.ndarray:
35        return self._policy
36
37    def __init__(self, seed: int, mdp_specs: MDPSpec):
38        """
39        Parameters
40        ----------
41        seed : int
42            The random seed.
43        mdp_specs : MDPSpec
44            The full specification of the MDP.
45        """
46        super(RandomAgent, self).__init__(
47            seed,
48            mdp_specs,
49            mdp_model=None,
50            actor=RandomActor(seed, mdp_specs),
51            optimization_horizon=0,
52        )
53
54        if type(self._mdp_spec.observations) == DiscreteArray:
55            if type(self._mdp_spec.actions) == DiscreteArray:
56                n_s = self._mdp_spec.observations.num_values
57                n_a = self._mdp_spec.actions.num_values
58
59                self._policy = (
60                    np.ones(
61                        (n_s, n_a)
62                        if self._time_horizon == np.inf
63                        else (self._time_horizon, n_s, n_a)
64                    )
65                    / n_a
66                )
67        else:
68            raise NotImplementedError(
69                "The RandomAgent is implemented for discrete MDP only."
70            )
71
72    def episode_end_update(self):
73        pass
74
75    def before_start_interacting(self):
76        pass

The RandomAgent implements a uniformly randomly acting agent.

RandomAgent(seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec) View Source

37    def __init__(self, seed: int, mdp_specs: MDPSpec):
38        """
39        Parameters
40        ----------
41        seed : int
42            The random seed.
43        mdp_specs : MDPSpec
44            The full specification of the MDP.
45        """
46        super(RandomAgent, self).__init__(
47            seed,
48            mdp_specs,
49            mdp_model=None,
50            actor=RandomActor(seed, mdp_specs),
51            optimization_horizon=0,
52        )
53
54        if type(self._mdp_spec.observations) == DiscreteArray:
55            if type(self._mdp_spec.actions) == DiscreteArray:
56                n_s = self._mdp_spec.observations.num_values
57                n_a = self._mdp_spec.actions.num_values
58
59                self._policy = (
60                    np.ones(
61                        (n_s, n_a)
62                        if self._time_horizon == np.inf
63                        else (self._time_horizon, n_s, n_a)
64                    )
65                    / n_a
66                )
67        else:
68            raise NotImplementedError(
69                "The RandomAgent is implemented for discrete MDP only."
70            )

Parameters

seed (int): The random seed.
mdp_specs (MDPSpec): The full specification of the MDP.

@staticmethod

def is_emission_map_accepted(emission_map: colosseum.emission_maps.base.EmissionMap) -> bool: View Source

21    @staticmethod
22    def is_emission_map_accepted(emission_map: "EmissionMap") -> bool:
23        return True

Returns

bool: True if the agent class accepts the emission map.

@staticmethod

def get_hyperparameters_search_spaces() -> Dict[str, ray.tune.sample.Domain]: View Source

25    @staticmethod
26    def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]:
27        return {}

Returns

Dict[str, tune.sample.Domain]: The dictionary with key value pairs corresponding to hyperparameter names and corresponding ray.tune samplers.

@staticmethod

def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0): View Source

29    @staticmethod
30    def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0):
31        return ""

produces a string containing the gin config file corresponding to the parameters given in input.

Parameters

parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.
index (int): The index assigned to the gin configuration.

Returns

gin_config (str): The gin configuration file.

current_optimal_stochastic_policy: numpy.ndarray

Returns

np.ndarray: The estimates of the best optimal policy given the current knowledge of the agent in the form of distribution over actions.

def episode_end_update(self): View Source

72    def episode_end_update(self):
73        pass

is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.

def before_start_interacting(self): View Source

75    def before_start_interacting(self):
76        pass

is called before the agent starts interacting with the MDP.

Inherited Members

colosseum.agent.agents.base.BaseAgent: is_episodic; get_agent_instance_from_parameters; is_episode_end; select_action; step_update; agent_logs

class RandomAgentEpisodic(RandomAgent): View Source

79class RandomAgentEpisodic(RandomAgent):
80    @staticmethod
81    def get_agent_instance_from_parameters(
82        seed: int,
83        optimization_horizon: int,
84        mdp_specs: MDPSpec,
85        parameters: Dict[str, Any],
86    ) -> "BaseAgent":
87        return RandomAgentEpisodic(seed, mdp_specs)
88
89    @staticmethod
90    def is_episodic() -> bool:
91        return True

The RandomAgent implements a uniformly randomly acting agent.

@staticmethod

def get_agent_instance_from_parameters( seed: int, optimization_horizon: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, parameters: Dict[str, Any]) -> colosseum.agent.agents.base.BaseAgent: View Source

80    @staticmethod
81    def get_agent_instance_from_parameters(
82        seed: int,
83        optimization_horizon: int,
84        mdp_specs: MDPSpec,
85        parameters: Dict[str, Any],
86    ) -> "BaseAgent":
87        return RandomAgentEpisodic(seed, mdp_specs)

returns an agent instance for the mdp specification and agent parameters given in input.

Parameters

seed (int): The random seed.
optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.
mdp_specs (MDPSpec): The full specification of the MDP.
parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.

Returns

BaseAgent: The agent instance.

@staticmethod

def is_episodic() -> bool: View Source

89    @staticmethod
90    def is_episodic() -> bool:
91        return True

Returns

bool: True if the agent is suited for the episodic setting.

Inherited Members

RandomAgent: RandomAgent; is_emission_map_accepted; get_hyperparameters_search_spaces; produce_gin_file_from_parameters; current_optimal_stochastic_policy; episode_end_update; before_start_interacting
colosseum.agent.agents.base.BaseAgent: is_episode_end; select_action; step_update; agent_logs

class RandomAgentContinuous(RandomAgent): View Source

 94class RandomAgentContinuous(RandomAgent):
 95    @staticmethod
 96    def get_agent_instance_from_parameters(
 97        seed: int,
 98        optimization_horizon: int,
 99        mdp_specs: MDPSpec,
100        parameters: Dict[str, Any],
101    ) -> "BaseAgent":
102        return RandomAgentContinuous(seed, mdp_specs)
103
104    @staticmethod
105    def is_episodic() -> bool:
106        return False

The RandomAgent implements a uniformly randomly acting agent.

@staticmethod

 95    @staticmethod
 96    def get_agent_instance_from_parameters(
 97        seed: int,
 98        optimization_horizon: int,
 99        mdp_specs: MDPSpec,
100        parameters: Dict[str, Any],
101    ) -> "BaseAgent":
102        return RandomAgentContinuous(seed, mdp_specs)

returns an agent instance for the mdp specification and agent parameters given in input.

Parameters

seed (int): The random seed.
optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.
mdp_specs (MDPSpec): The full specification of the MDP.
parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.

Returns

BaseAgent: The agent instance.

@staticmethod

def is_episodic() -> bool: View Source

104    @staticmethod
105    def is_episodic() -> bool:
106        return False

Returns

bool: True if the agent is suited for the episodic setting.

Inherited Members

RandomAgent: RandomAgent; is_emission_map_accepted; get_hyperparameters_search_spaces; produce_gin_file_from_parameters; current_optimal_stochastic_policy; episode_end_update; before_start_interacting
colosseum.agent.agents.base.BaseAgent: is_episode_end; select_action; step_update; agent_logs