colosseum.agent.agents.random
1import abc 2from typing import Any, Dict, TYPE_CHECKING 3 4import numpy as np 5from ray import tune 6 7from colosseum.agent.actors import RandomActor 8from colosseum.agent.agents.base import BaseAgent 9from colosseum.utils.acme.specs import DiscreteArray, MDPSpec 10 11if TYPE_CHECKING: 12 from colosseum.emission_maps import EmissionMap 13 14 15class RandomAgent(BaseAgent, abc.ABC): 16 """ 17 The `RandomAgent` implements a uniformly randomly acting agent. 18 """ 19 20 @staticmethod 21 def is_emission_map_accepted(emission_map: "EmissionMap") -> bool: 22 return True 23 24 @staticmethod 25 def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]: 26 return {} 27 28 @staticmethod 29 def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0): 30 return "" 31 32 @property 33 def current_optimal_stochastic_policy(self) -> np.ndarray: 34 return self._policy 35 36 def __init__(self, seed: int, mdp_specs: MDPSpec): 37 """ 38 Parameters 39 ---------- 40 seed : int 41 The random seed. 42 mdp_specs : MDPSpec 43 The full specification of the MDP. 44 """ 45 super(RandomAgent, self).__init__( 46 seed, 47 mdp_specs, 48 mdp_model=None, 49 actor=RandomActor(seed, mdp_specs), 50 optimization_horizon=0, 51 ) 52 53 if type(self._mdp_spec.observations) == DiscreteArray: 54 if type(self._mdp_spec.actions) == DiscreteArray: 55 n_s = self._mdp_spec.observations.num_values 56 n_a = self._mdp_spec.actions.num_values 57 58 self._policy = ( 59 np.ones( 60 (n_s, n_a) 61 if self._time_horizon == np.inf 62 else (self._time_horizon, n_s, n_a) 63 ) 64 / n_a 65 ) 66 else: 67 raise NotImplementedError( 68 "The RandomAgent is implemented for discrete MDP only." 69 ) 70 71 def episode_end_update(self): 72 pass 73 74 def before_start_interacting(self): 75 pass 76 77 78class RandomAgentEpisodic(RandomAgent): 79 @staticmethod 80 def get_agent_instance_from_parameters( 81 seed: int, 82 optimization_horizon: int, 83 mdp_specs: MDPSpec, 84 parameters: Dict[str, Any], 85 ) -> "BaseAgent": 86 return RandomAgentEpisodic(seed, mdp_specs) 87 88 @staticmethod 89 def is_episodic() -> bool: 90 return True 91 92 93class RandomAgentContinuous(RandomAgent): 94 @staticmethod 95 def get_agent_instance_from_parameters( 96 seed: int, 97 optimization_horizon: int, 98 mdp_specs: MDPSpec, 99 parameters: Dict[str, Any], 100 ) -> "BaseAgent": 101 return RandomAgentContinuous(seed, mdp_specs) 102 103 @staticmethod 104 def is_episodic() -> bool: 105 return False
16class RandomAgent(BaseAgent, abc.ABC): 17 """ 18 The `RandomAgent` implements a uniformly randomly acting agent. 19 """ 20 21 @staticmethod 22 def is_emission_map_accepted(emission_map: "EmissionMap") -> bool: 23 return True 24 25 @staticmethod 26 def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]: 27 return {} 28 29 @staticmethod 30 def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0): 31 return "" 32 33 @property 34 def current_optimal_stochastic_policy(self) -> np.ndarray: 35 return self._policy 36 37 def __init__(self, seed: int, mdp_specs: MDPSpec): 38 """ 39 Parameters 40 ---------- 41 seed : int 42 The random seed. 43 mdp_specs : MDPSpec 44 The full specification of the MDP. 45 """ 46 super(RandomAgent, self).__init__( 47 seed, 48 mdp_specs, 49 mdp_model=None, 50 actor=RandomActor(seed, mdp_specs), 51 optimization_horizon=0, 52 ) 53 54 if type(self._mdp_spec.observations) == DiscreteArray: 55 if type(self._mdp_spec.actions) == DiscreteArray: 56 n_s = self._mdp_spec.observations.num_values 57 n_a = self._mdp_spec.actions.num_values 58 59 self._policy = ( 60 np.ones( 61 (n_s, n_a) 62 if self._time_horizon == np.inf 63 else (self._time_horizon, n_s, n_a) 64 ) 65 / n_a 66 ) 67 else: 68 raise NotImplementedError( 69 "The RandomAgent is implemented for discrete MDP only." 70 ) 71 72 def episode_end_update(self): 73 pass 74 75 def before_start_interacting(self): 76 pass
The RandomAgent
implements a uniformly randomly acting agent.
RandomAgent(seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec)
37 def __init__(self, seed: int, mdp_specs: MDPSpec): 38 """ 39 Parameters 40 ---------- 41 seed : int 42 The random seed. 43 mdp_specs : MDPSpec 44 The full specification of the MDP. 45 """ 46 super(RandomAgent, self).__init__( 47 seed, 48 mdp_specs, 49 mdp_model=None, 50 actor=RandomActor(seed, mdp_specs), 51 optimization_horizon=0, 52 ) 53 54 if type(self._mdp_spec.observations) == DiscreteArray: 55 if type(self._mdp_spec.actions) == DiscreteArray: 56 n_s = self._mdp_spec.observations.num_values 57 n_a = self._mdp_spec.actions.num_values 58 59 self._policy = ( 60 np.ones( 61 (n_s, n_a) 62 if self._time_horizon == np.inf 63 else (self._time_horizon, n_s, n_a) 64 ) 65 / n_a 66 ) 67 else: 68 raise NotImplementedError( 69 "The RandomAgent is implemented for discrete MDP only." 70 )
Parameters
- seed (int): The random seed.
- mdp_specs (MDPSpec): The full specification of the MDP.
@staticmethod
def
is_emission_map_accepted(emission_map: colosseum.emission_maps.base.EmissionMap) -> bool:
21 @staticmethod 22 def is_emission_map_accepted(emission_map: "EmissionMap") -> bool: 23 return True
Returns
- bool: True if the agent class accepts the emission map.
@staticmethod
def
get_hyperparameters_search_spaces() -> Dict[str, ray.tune.sample.Domain]:
25 @staticmethod 26 def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]: 27 return {}
Returns
- Dict[str, tune.sample.Domain]: The dictionary with key value pairs corresponding to hyperparameter names and corresponding
ray.tune
samplers.
@staticmethod
def
produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0):
29 @staticmethod 30 def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0): 31 return ""
produces a string containing the gin config file corresponding to the parameters given in input.
Parameters
- parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.
- index (int): The index assigned to the gin configuration.
Returns
- gin_config (str): The gin configuration file.
current_optimal_stochastic_policy: numpy.ndarray
Returns
- np.ndarray: The estimates of the best optimal policy given the current knowledge of the agent in the form of distribution over actions.
def
episode_end_update(self):
is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.
79class RandomAgentEpisodic(RandomAgent): 80 @staticmethod 81 def get_agent_instance_from_parameters( 82 seed: int, 83 optimization_horizon: int, 84 mdp_specs: MDPSpec, 85 parameters: Dict[str, Any], 86 ) -> "BaseAgent": 87 return RandomAgentEpisodic(seed, mdp_specs) 88 89 @staticmethod 90 def is_episodic() -> bool: 91 return True
The RandomAgent
implements a uniformly randomly acting agent.
@staticmethod
def
get_agent_instance_from_parameters( seed: int, optimization_horizon: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, parameters: Dict[str, Any]) -> colosseum.agent.agents.base.BaseAgent:
80 @staticmethod 81 def get_agent_instance_from_parameters( 82 seed: int, 83 optimization_horizon: int, 84 mdp_specs: MDPSpec, 85 parameters: Dict[str, Any], 86 ) -> "BaseAgent": 87 return RandomAgentEpisodic(seed, mdp_specs)
returns an agent instance for the mdp specification and agent parameters given in input.
Parameters
- seed (int): The random seed.
- optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.
- mdp_specs (MDPSpec): The full specification of the MDP.
- parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.
Returns
- BaseAgent: The agent instance.
@staticmethod
def
is_episodic() -> bool:
Returns
- bool: True if the agent is suited for the episodic setting.
Inherited Members
94class RandomAgentContinuous(RandomAgent): 95 @staticmethod 96 def get_agent_instance_from_parameters( 97 seed: int, 98 optimization_horizon: int, 99 mdp_specs: MDPSpec, 100 parameters: Dict[str, Any], 101 ) -> "BaseAgent": 102 return RandomAgentContinuous(seed, mdp_specs) 103 104 @staticmethod 105 def is_episodic() -> bool: 106 return False
The RandomAgent
implements a uniformly randomly acting agent.
@staticmethod
def
get_agent_instance_from_parameters( seed: int, optimization_horizon: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, parameters: Dict[str, Any]) -> colosseum.agent.agents.base.BaseAgent:
95 @staticmethod 96 def get_agent_instance_from_parameters( 97 seed: int, 98 optimization_horizon: int, 99 mdp_specs: MDPSpec, 100 parameters: Dict[str, Any], 101 ) -> "BaseAgent": 102 return RandomAgentContinuous(seed, mdp_specs)
returns an agent instance for the mdp specification and agent parameters given in input.
Parameters
- seed (int): The random seed.
- optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.
- mdp_specs (MDPSpec): The full specification of the MDP.
- parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.
Returns
- BaseAgent: The agent instance.
@staticmethod
def
is_episodic() -> bool:
Returns
- bool: True if the agent is suited for the episodic setting.