colosseum.agent.agents.base

  1import abc
  2import random
  3from typing import TYPE_CHECKING, Any, Dict, Union
  4
  5import dm_env
  6import numpy as np
  7from ray import tune
  8
  9from colosseum.emission_maps import EmissionMap
 10from colosseum.utils.acme.specs import MDPSpec
 11
 12if TYPE_CHECKING:
 13    from colosseum.mdp import ACTION_TYPE
 14    from colosseum.agent.actors import ACTOR_TYPES
 15    from colosseum.agent.mdp_models import MODEL_TYPES
 16
 17
 18class BaseAgent(abc.ABC):
 19    """
 20    The base class for Colosseum agents.
 21    """
 22
 23    @staticmethod
 24    @abc.abstractmethod
 25    def is_emission_map_accepted(emission_map: "EmissionMap") -> bool:
 26        """
 27        Returns
 28        -------
 29        bool
 30            True if the agent class accepts the emission map.
 31        """
 32
 33    @staticmethod
 34    @abc.abstractmethod
 35    def is_episodic() -> bool:
 36        """
 37        Returns
 38        -------
 39        bool
 40            True if the agent is suited for the episodic setting.
 41        """
 42
 43    @staticmethod
 44    @abc.abstractmethod
 45    def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]:
 46        """
 47        Returns
 48        -------
 49        Dict[str, tune.sample.Domain]
 50            The dictionary with key value pairs corresponding to hyperparameter names and corresponding `ray.tune` samplers.
 51        """
 52
 53    @staticmethod
 54    @abc.abstractmethod
 55    def produce_gin_file_from_parameters(
 56        parameters: Dict[str, Any], index: int = 0
 57    ) -> str:
 58        """
 59        produces a string containing the gin config file corresponding to the parameters given in input.
 60
 61        Parameters
 62        ----------
 63        parameters : Dict[str, Any]
 64            The dictionary containing the parameters of the agent.
 65        index : int
 66            The index assigned to the gin configuration.
 67
 68        Returns
 69        -------
 70        gin_config : str
 71            The gin configuration file.
 72        """
 73
 74    @staticmethod
 75    @abc.abstractmethod
 76    def get_agent_instance_from_parameters(
 77        seed: int,
 78        optimization_horizon: int,
 79        mdp_specs: MDPSpec,
 80        parameters: Dict[str, Any],
 81    ) -> "BaseAgent":
 82        """
 83        returns an agent instance for the mdp specification and agent parameters given in input.
 84
 85        Parameters
 86        ----------
 87        seed : int
 88            The random seed.
 89        optimization_horizon : int
 90            The total number of interactions that the agent is expected to have with the MDP.
 91        mdp_specs : MDPSpec
 92            The full specification of the MDP.
 93        parameters : Dict[str, Any]
 94            The dictionary containing the parameters of the agent.
 95
 96        Returns
 97        -------
 98        BaseAgent
 99            The agent instance.
100        """
101
102    @abc.abstractmethod
103    def __init__(
104        self,
105        seed: int,
106        mdp_specs: "MDPSpec",
107        mdp_model: Union[None, "MODEL_TYPES"],
108        actor: "ACTOR_TYPES",
109        optimization_horizon: int,
110    ):
111        """
112        Parameters
113        ----------
114        seed : int
115            The random seed.
116        mdp_specs : MDPSpec
117            The full specification of the MDP.
118        mdp_model : BaseMDPModel
119            The component of the agent that contains the knowledge acquired from the interactions with
120            the MDP.
121        actor : BaseActor
122            The component of the agent that provide a mapping from MDP estimates to actions.
123        optimization_horizon : int
124            The total number of interactions that the agent is expected to have with the MDP.
125        """
126        self._mdp_spec = mdp_specs
127        self._mdp_model = mdp_model
128        self._actor = actor
129        self._optimization_horizon = optimization_horizon
130        self._time_horizon = mdp_specs.time_horizon
131
132        self._rng = np.random.RandomState(seed)
133        self._rng_fast = random.Random(seed)
134
135    @property
136    @abc.abstractmethod
137    def current_optimal_stochastic_policy(self) -> np.ndarray:
138        """
139        Returns
140        -------
141        np.ndarray
142            The estimates of the best optimal policy given the current knowledge of the agent in the form of
143            distribution over actions.
144        """
145
146    @abc.abstractmethod
147    def episode_end_update(self):
148        """
149        is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.
150        """
151
152    @abc.abstractmethod
153    def before_start_interacting(self):
154        """
155        is called before the agent starts interacting with the MDP.
156        """
157
158    def is_episode_end(
159        self,
160        ts_t: dm_env.TimeStep,
161        a_t: "ACTION_TYPE",
162        ts_tp1: dm_env.TimeStep,
163        time: int,
164    ) -> bool:
165        """
166        checks whether the episode is terminated. By default, this checks whether the current time step exceeds the time
167        horizon. In the continuous case, this can be used to define artificial episodes.
168
169        Parameters
170        ----------
171        ts_t : dm_env.TimeStep
172            The TimeStep at time t.
173        a_t : "ACTION_TYPE"
174            The action taken by the agent at time t.
175        ts_tp1 : dm_env.TimeStep
176            The TimeStep at time t + 1.
177        time : int
178            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
179            the continuous case this refers to the total number of previous interactions.
180
181        Returns
182        -------
183        bool
184            True if the episode terminated at time t+1.
185        """
186        return ts_tp1.last()
187
188    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
189        """
190        Parameters
191        ----------
192        ts : dm_env.TimeStep
193            The TimeStep for which the agent is required to calculate the next action.
194        time : int
195            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
196            the continuous case this refers to the total number of previous interactions.
197
198        Returns
199        -------
200        action : ACTION_TYPE
201            The action that the agent suggests to take given the observation and the time step.
202        """
203        return self._actor.select_action(ts, time)
204
205    @abc.abstractmethod
206    def step_update(
207        self,
208        ts_t: dm_env.TimeStep,
209        a_t: "ACTION_TYPE",
210        ts_tp1: dm_env.TimeStep,
211        time: int,
212    ):
213        """
214        adds the transition in input to the MDP model.
215
216        Parameters
217        ----------
218        ts_t : dm_env.TimeStep
219            The TimeStep at time t.
220        a_t : "ACTION_TYPE"
221            The action taken by the agent at time t.
222        ts_tp1 : dm_env.TimeStep
223            The TimeStep at time t + 1.
224        time : int
225            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
226            the continuous case this refers to the total number of previous interactions.
227        """
228        if self._mdp_model:
229            self._mdp_model.step_update(ts_t, a_t, ts_tp1, time)
230
231    def agent_logs(self):
232        """
233        is called during the agent MDP interaction at lagging time. It can be used to log additional information.
234        """
class BaseAgent(abc.ABC):
 19class BaseAgent(abc.ABC):
 20    """
 21    The base class for Colosseum agents.
 22    """
 23
 24    @staticmethod
 25    @abc.abstractmethod
 26    def is_emission_map_accepted(emission_map: "EmissionMap") -> bool:
 27        """
 28        Returns
 29        -------
 30        bool
 31            True if the agent class accepts the emission map.
 32        """
 33
 34    @staticmethod
 35    @abc.abstractmethod
 36    def is_episodic() -> bool:
 37        """
 38        Returns
 39        -------
 40        bool
 41            True if the agent is suited for the episodic setting.
 42        """
 43
 44    @staticmethod
 45    @abc.abstractmethod
 46    def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]:
 47        """
 48        Returns
 49        -------
 50        Dict[str, tune.sample.Domain]
 51            The dictionary with key value pairs corresponding to hyperparameter names and corresponding `ray.tune` samplers.
 52        """
 53
 54    @staticmethod
 55    @abc.abstractmethod
 56    def produce_gin_file_from_parameters(
 57        parameters: Dict[str, Any], index: int = 0
 58    ) -> str:
 59        """
 60        produces a string containing the gin config file corresponding to the parameters given in input.
 61
 62        Parameters
 63        ----------
 64        parameters : Dict[str, Any]
 65            The dictionary containing the parameters of the agent.
 66        index : int
 67            The index assigned to the gin configuration.
 68
 69        Returns
 70        -------
 71        gin_config : str
 72            The gin configuration file.
 73        """
 74
 75    @staticmethod
 76    @abc.abstractmethod
 77    def get_agent_instance_from_parameters(
 78        seed: int,
 79        optimization_horizon: int,
 80        mdp_specs: MDPSpec,
 81        parameters: Dict[str, Any],
 82    ) -> "BaseAgent":
 83        """
 84        returns an agent instance for the mdp specification and agent parameters given in input.
 85
 86        Parameters
 87        ----------
 88        seed : int
 89            The random seed.
 90        optimization_horizon : int
 91            The total number of interactions that the agent is expected to have with the MDP.
 92        mdp_specs : MDPSpec
 93            The full specification of the MDP.
 94        parameters : Dict[str, Any]
 95            The dictionary containing the parameters of the agent.
 96
 97        Returns
 98        -------
 99        BaseAgent
100            The agent instance.
101        """
102
103    @abc.abstractmethod
104    def __init__(
105        self,
106        seed: int,
107        mdp_specs: "MDPSpec",
108        mdp_model: Union[None, "MODEL_TYPES"],
109        actor: "ACTOR_TYPES",
110        optimization_horizon: int,
111    ):
112        """
113        Parameters
114        ----------
115        seed : int
116            The random seed.
117        mdp_specs : MDPSpec
118            The full specification of the MDP.
119        mdp_model : BaseMDPModel
120            The component of the agent that contains the knowledge acquired from the interactions with
121            the MDP.
122        actor : BaseActor
123            The component of the agent that provide a mapping from MDP estimates to actions.
124        optimization_horizon : int
125            The total number of interactions that the agent is expected to have with the MDP.
126        """
127        self._mdp_spec = mdp_specs
128        self._mdp_model = mdp_model
129        self._actor = actor
130        self._optimization_horizon = optimization_horizon
131        self._time_horizon = mdp_specs.time_horizon
132
133        self._rng = np.random.RandomState(seed)
134        self._rng_fast = random.Random(seed)
135
136    @property
137    @abc.abstractmethod
138    def current_optimal_stochastic_policy(self) -> np.ndarray:
139        """
140        Returns
141        -------
142        np.ndarray
143            The estimates of the best optimal policy given the current knowledge of the agent in the form of
144            distribution over actions.
145        """
146
147    @abc.abstractmethod
148    def episode_end_update(self):
149        """
150        is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.
151        """
152
153    @abc.abstractmethod
154    def before_start_interacting(self):
155        """
156        is called before the agent starts interacting with the MDP.
157        """
158
159    def is_episode_end(
160        self,
161        ts_t: dm_env.TimeStep,
162        a_t: "ACTION_TYPE",
163        ts_tp1: dm_env.TimeStep,
164        time: int,
165    ) -> bool:
166        """
167        checks whether the episode is terminated. By default, this checks whether the current time step exceeds the time
168        horizon. In the continuous case, this can be used to define artificial episodes.
169
170        Parameters
171        ----------
172        ts_t : dm_env.TimeStep
173            The TimeStep at time t.
174        a_t : "ACTION_TYPE"
175            The action taken by the agent at time t.
176        ts_tp1 : dm_env.TimeStep
177            The TimeStep at time t + 1.
178        time : int
179            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
180            the continuous case this refers to the total number of previous interactions.
181
182        Returns
183        -------
184        bool
185            True if the episode terminated at time t+1.
186        """
187        return ts_tp1.last()
188
189    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
190        """
191        Parameters
192        ----------
193        ts : dm_env.TimeStep
194            The TimeStep for which the agent is required to calculate the next action.
195        time : int
196            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
197            the continuous case this refers to the total number of previous interactions.
198
199        Returns
200        -------
201        action : ACTION_TYPE
202            The action that the agent suggests to take given the observation and the time step.
203        """
204        return self._actor.select_action(ts, time)
205
206    @abc.abstractmethod
207    def step_update(
208        self,
209        ts_t: dm_env.TimeStep,
210        a_t: "ACTION_TYPE",
211        ts_tp1: dm_env.TimeStep,
212        time: int,
213    ):
214        """
215        adds the transition in input to the MDP model.
216
217        Parameters
218        ----------
219        ts_t : dm_env.TimeStep
220            The TimeStep at time t.
221        a_t : "ACTION_TYPE"
222            The action taken by the agent at time t.
223        ts_tp1 : dm_env.TimeStep
224            The TimeStep at time t + 1.
225        time : int
226            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
227            the continuous case this refers to the total number of previous interactions.
228        """
229        if self._mdp_model:
230            self._mdp_model.step_update(ts_t, a_t, ts_tp1, time)
231
232    def agent_logs(self):
233        """
234        is called during the agent MDP interaction at lagging time. It can be used to log additional information.
235        """

The base class for Colosseum agents.

@abc.abstractmethod
BaseAgent( seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, mdp_model: Optional[ForwardRef('MODEL_TYPES')], actor: Union[colosseum.agent.actors.base.BaseActor, colosseum.agent.actors.Q_values_actor.QValuesActor], optimization_horizon: int)
103    @abc.abstractmethod
104    def __init__(
105        self,
106        seed: int,
107        mdp_specs: "MDPSpec",
108        mdp_model: Union[None, "MODEL_TYPES"],
109        actor: "ACTOR_TYPES",
110        optimization_horizon: int,
111    ):
112        """
113        Parameters
114        ----------
115        seed : int
116            The random seed.
117        mdp_specs : MDPSpec
118            The full specification of the MDP.
119        mdp_model : BaseMDPModel
120            The component of the agent that contains the knowledge acquired from the interactions with
121            the MDP.
122        actor : BaseActor
123            The component of the agent that provide a mapping from MDP estimates to actions.
124        optimization_horizon : int
125            The total number of interactions that the agent is expected to have with the MDP.
126        """
127        self._mdp_spec = mdp_specs
128        self._mdp_model = mdp_model
129        self._actor = actor
130        self._optimization_horizon = optimization_horizon
131        self._time_horizon = mdp_specs.time_horizon
132
133        self._rng = np.random.RandomState(seed)
134        self._rng_fast = random.Random(seed)
Parameters
  • seed (int): The random seed.
  • mdp_specs (MDPSpec): The full specification of the MDP.
  • mdp_model (BaseMDPModel): The component of the agent that contains the knowledge acquired from the interactions with the MDP.
  • actor (BaseActor): The component of the agent that provide a mapping from MDP estimates to actions.
  • optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.
@staticmethod
@abc.abstractmethod
def is_emission_map_accepted(emission_map: colosseum.emission_maps.base.EmissionMap) -> bool:
24    @staticmethod
25    @abc.abstractmethod
26    def is_emission_map_accepted(emission_map: "EmissionMap") -> bool:
27        """
28        Returns
29        -------
30        bool
31            True if the agent class accepts the emission map.
32        """
Returns
  • bool: True if the agent class accepts the emission map.
@staticmethod
@abc.abstractmethod
def is_episodic() -> bool:
34    @staticmethod
35    @abc.abstractmethod
36    def is_episodic() -> bool:
37        """
38        Returns
39        -------
40        bool
41            True if the agent is suited for the episodic setting.
42        """
Returns
  • bool: True if the agent is suited for the episodic setting.
@staticmethod
@abc.abstractmethod
def get_hyperparameters_search_spaces() -> Dict[str, ray.tune.sample.Domain]:
44    @staticmethod
45    @abc.abstractmethod
46    def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]:
47        """
48        Returns
49        -------
50        Dict[str, tune.sample.Domain]
51            The dictionary with key value pairs corresponding to hyperparameter names and corresponding `ray.tune` samplers.
52        """
Returns
  • Dict[str, tune.sample.Domain]: The dictionary with key value pairs corresponding to hyperparameter names and corresponding ray.tune samplers.
@staticmethod
@abc.abstractmethod
def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0) -> str:
54    @staticmethod
55    @abc.abstractmethod
56    def produce_gin_file_from_parameters(
57        parameters: Dict[str, Any], index: int = 0
58    ) -> str:
59        """
60        produces a string containing the gin config file corresponding to the parameters given in input.
61
62        Parameters
63        ----------
64        parameters : Dict[str, Any]
65            The dictionary containing the parameters of the agent.
66        index : int
67            The index assigned to the gin configuration.
68
69        Returns
70        -------
71        gin_config : str
72            The gin configuration file.
73        """

produces a string containing the gin config file corresponding to the parameters given in input.

Parameters
  • parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.
  • index (int): The index assigned to the gin configuration.
Returns
  • gin_config (str): The gin configuration file.
@staticmethod
@abc.abstractmethod
def get_agent_instance_from_parameters( seed: int, optimization_horizon: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, parameters: Dict[str, Any]) -> colosseum.agent.agents.base.BaseAgent:
 75    @staticmethod
 76    @abc.abstractmethod
 77    def get_agent_instance_from_parameters(
 78        seed: int,
 79        optimization_horizon: int,
 80        mdp_specs: MDPSpec,
 81        parameters: Dict[str, Any],
 82    ) -> "BaseAgent":
 83        """
 84        returns an agent instance for the mdp specification and agent parameters given in input.
 85
 86        Parameters
 87        ----------
 88        seed : int
 89            The random seed.
 90        optimization_horizon : int
 91            The total number of interactions that the agent is expected to have with the MDP.
 92        mdp_specs : MDPSpec
 93            The full specification of the MDP.
 94        parameters : Dict[str, Any]
 95            The dictionary containing the parameters of the agent.
 96
 97        Returns
 98        -------
 99        BaseAgent
100            The agent instance.
101        """

returns an agent instance for the mdp specification and agent parameters given in input.

Parameters
  • seed (int): The random seed.
  • optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.
  • mdp_specs (MDPSpec): The full specification of the MDP.
  • parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.
Returns
  • BaseAgent: The agent instance.
current_optimal_stochastic_policy: numpy.ndarray
Returns
  • np.ndarray: The estimates of the best optimal policy given the current knowledge of the agent in the form of distribution over actions.
@abc.abstractmethod
def episode_end_update(self):
147    @abc.abstractmethod
148    def episode_end_update(self):
149        """
150        is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.
151        """

is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.

@abc.abstractmethod
def before_start_interacting(self):
153    @abc.abstractmethod
154    def before_start_interacting(self):
155        """
156        is called before the agent starts interacting with the MDP.
157        """

is called before the agent starts interacting with the MDP.

def is_episode_end( self, ts_t: dm_env._environment.TimeStep, a_t: Union[int, float, numpy.ndarray], ts_tp1: dm_env._environment.TimeStep, time: int) -> bool:
159    def is_episode_end(
160        self,
161        ts_t: dm_env.TimeStep,
162        a_t: "ACTION_TYPE",
163        ts_tp1: dm_env.TimeStep,
164        time: int,
165    ) -> bool:
166        """
167        checks whether the episode is terminated. By default, this checks whether the current time step exceeds the time
168        horizon. In the continuous case, this can be used to define artificial episodes.
169
170        Parameters
171        ----------
172        ts_t : dm_env.TimeStep
173            The TimeStep at time t.
174        a_t : "ACTION_TYPE"
175            The action taken by the agent at time t.
176        ts_tp1 : dm_env.TimeStep
177            The TimeStep at time t + 1.
178        time : int
179            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
180            the continuous case this refers to the total number of previous interactions.
181
182        Returns
183        -------
184        bool
185            True if the episode terminated at time t+1.
186        """
187        return ts_tp1.last()

checks whether the episode is terminated. By default, this checks whether the current time step exceeds the time horizon. In the continuous case, this can be used to define artificial episodes.

Parameters
  • ts_t (dm_env.TimeStep): The TimeStep at time t.
  • a_t ("ACTION_TYPE"): The action taken by the agent at time t.
  • ts_tp1 (dm_env.TimeStep): The TimeStep at time t + 1.
  • time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.
Returns
  • bool: True if the episode terminated at time t+1.
def select_action( self, ts: dm_env._environment.TimeStep, time: int) -> Union[int, float, numpy.ndarray]:
189    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
190        """
191        Parameters
192        ----------
193        ts : dm_env.TimeStep
194            The TimeStep for which the agent is required to calculate the next action.
195        time : int
196            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
197            the continuous case this refers to the total number of previous interactions.
198
199        Returns
200        -------
201        action : ACTION_TYPE
202            The action that the agent suggests to take given the observation and the time step.
203        """
204        return self._actor.select_action(ts, time)
Parameters
  • ts (dm_env.TimeStep): The TimeStep for which the agent is required to calculate the next action.
  • time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.
Returns
  • action (ACTION_TYPE): The action that the agent suggests to take given the observation and the time step.
@abc.abstractmethod
def step_update( self, ts_t: dm_env._environment.TimeStep, a_t: Union[int, float, numpy.ndarray], ts_tp1: dm_env._environment.TimeStep, time: int):
206    @abc.abstractmethod
207    def step_update(
208        self,
209        ts_t: dm_env.TimeStep,
210        a_t: "ACTION_TYPE",
211        ts_tp1: dm_env.TimeStep,
212        time: int,
213    ):
214        """
215        adds the transition in input to the MDP model.
216
217        Parameters
218        ----------
219        ts_t : dm_env.TimeStep
220            The TimeStep at time t.
221        a_t : "ACTION_TYPE"
222            The action taken by the agent at time t.
223        ts_tp1 : dm_env.TimeStep
224            The TimeStep at time t + 1.
225        time : int
226            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
227            the continuous case this refers to the total number of previous interactions.
228        """
229        if self._mdp_model:
230            self._mdp_model.step_update(ts_t, a_t, ts_tp1, time)

adds the transition in input to the MDP model.

Parameters
  • ts_t (dm_env.TimeStep): The TimeStep at time t.
  • a_t ("ACTION_TYPE"): The action taken by the agent at time t.
  • ts_tp1 (dm_env.TimeStep): The TimeStep at time t + 1.
  • time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.
def agent_logs(self):
232    def agent_logs(self):
233        """
234        is called during the agent MDP interaction at lagging time. It can be used to log additional information.
235        """

is called during the agent MDP interaction at lagging time. It can be used to log additional information.