colosseum.agent.agents.base API documentation

@abc.abstractmethod

BaseAgent( seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, mdp_model: Optional[ForwardRef('MODEL_TYPES')], actor: Union[colosseum.agent.actors.base.BaseActor, colosseum.agent.actors.Q_values_actor.QValuesActor], optimization_horizon: int) View Source

103    @abc.abstractmethod
104    def __init__(
105        self,
106        seed: int,
107        mdp_specs: "MDPSpec",
108        mdp_model: Union[None, "MODEL_TYPES"],
109        actor: "ACTOR_TYPES",
110        optimization_horizon: int,
111    ):
112        """
113        Parameters
114        ----------
115        seed : int
116            The random seed.
117        mdp_specs : MDPSpec
118            The full specification of the MDP.
119        mdp_model : BaseMDPModel
120            The component of the agent that contains the knowledge acquired from the interactions with
121            the MDP.
122        actor : BaseActor
123            The component of the agent that provide a mapping from MDP estimates to actions.
124        optimization_horizon : int
125            The total number of interactions that the agent is expected to have with the MDP.
126        """
127        self._mdp_spec = mdp_specs
128        self._mdp_model = mdp_model
129        self._actor = actor
130        self._optimization_horizon = optimization_horizon
131        self._time_horizon = mdp_specs.time_horizon
132
133        self._rng = np.random.RandomState(seed)
134        self._rng_fast = random.Random(seed)

Parameters

seed (int): The random seed.
mdp_specs (MDPSpec): The full specification of the MDP.
mdp_model (BaseMDPModel): The component of the agent that contains the knowledge acquired from the interactions with the MDP.
actor (BaseActor): The component of the agent that provide a mapping from MDP estimates to actions.
optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.

@staticmethod

@abc.abstractmethod

def is_emission_map_accepted(emission_map: colosseum.emission_maps.base.EmissionMap) -> bool: View Source

24    @staticmethod
25    @abc.abstractmethod
26    def is_emission_map_accepted(emission_map: "EmissionMap") -> bool:
27        """
28        Returns
29        -------
30        bool
31            True if the agent class accepts the emission map.
32        """

Returns

bool: True if the agent class accepts the emission map.

@staticmethod

@abc.abstractmethod

def is_episodic() -> bool: View Source

34    @staticmethod
35    @abc.abstractmethod
36    def is_episodic() -> bool:
37        """
38        Returns
39        -------
40        bool
41            True if the agent is suited for the episodic setting.
42        """

Returns

bool: True if the agent is suited for the episodic setting.

@staticmethod

@abc.abstractmethod

def get_hyperparameters_search_spaces() -> Dict[str, ray.tune.sample.Domain]: View Source

44    @staticmethod
45    @abc.abstractmethod
46    def get_hyperparameters_search_spaces() -> Dict[str, tune.sample.Domain]:
47        """
48        Returns
49        -------
50        Dict[str, tune.sample.Domain]
51            The dictionary with key value pairs corresponding to hyperparameter names and corresponding `ray.tune` samplers.
52        """

Returns

Dict[str, tune.sample.Domain]: The dictionary with key value pairs corresponding to hyperparameter names and corresponding ray.tune samplers.

@staticmethod

@abc.abstractmethod

def produce_gin_file_from_parameters(parameters: Dict[str, Any], index: int = 0) -> str: View Source

54    @staticmethod
55    @abc.abstractmethod
56    def produce_gin_file_from_parameters(
57        parameters: Dict[str, Any], index: int = 0
58    ) -> str:
59        """
60        produces a string containing the gin config file corresponding to the parameters given in input.
61
62        Parameters
63        ----------
64        parameters : Dict[str, Any]
65            The dictionary containing the parameters of the agent.
66        index : int
67            The index assigned to the gin configuration.
68
69        Returns
70        -------
71        gin_config : str
72            The gin configuration file.
73        """

produces a string containing the gin config file corresponding to the parameters given in input.

Parameters

parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.
index (int): The index assigned to the gin configuration.

Returns

gin_config (str): The gin configuration file.

@staticmethod

@abc.abstractmethod

def get_agent_instance_from_parameters( seed: int, optimization_horizon: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec, parameters: Dict[str, Any]) -> colosseum.agent.agents.base.BaseAgent: View Source

 75    @staticmethod
 76    @abc.abstractmethod
 77    def get_agent_instance_from_parameters(
 78        seed: int,
 79        optimization_horizon: int,
 80        mdp_specs: MDPSpec,
 81        parameters: Dict[str, Any],
 82    ) -> "BaseAgent":
 83        """
 84        returns an agent instance for the mdp specification and agent parameters given in input.
 85
 86        Parameters
 87        ----------
 88        seed : int
 89            The random seed.
 90        optimization_horizon : int
 91            The total number of interactions that the agent is expected to have with the MDP.
 92        mdp_specs : MDPSpec
 93            The full specification of the MDP.
 94        parameters : Dict[str, Any]
 95            The dictionary containing the parameters of the agent.
 96
 97        Returns
 98        -------
 99        BaseAgent
100            The agent instance.
101        """

returns an agent instance for the mdp specification and agent parameters given in input.

Parameters

seed (int): The random seed.
optimization_horizon (int): The total number of interactions that the agent is expected to have with the MDP.
mdp_specs (MDPSpec): The full specification of the MDP.
parameters (Dict[str, Any]): The dictionary containing the parameters of the agent.

Returns

BaseAgent: The agent instance.

current_optimal_stochastic_policy: numpy.ndarray

Returns

np.ndarray: The estimates of the best optimal policy given the current knowledge of the agent in the form of distribution over actions.

@abc.abstractmethod

def episode_end_update(self): View Source

147    @abc.abstractmethod
148    def episode_end_update(self):
149        """
150        is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.
151        """

is called when an episode ends. In the infinite horizon case, we refer to artificial episodes.

@abc.abstractmethod

def before_start_interacting(self): View Source

153    @abc.abstractmethod
154    def before_start_interacting(self):
155        """
156        is called before the agent starts interacting with the MDP.
157        """

is called before the agent starts interacting with the MDP.

def is_episode_end( self, ts_t: dm_env._environment.TimeStep, a_t: Union[int, float, numpy.ndarray], ts_tp1: dm_env._environment.TimeStep, time: int) -> bool: View Source

159    def is_episode_end(
160        self,
161        ts_t: dm_env.TimeStep,
162        a_t: "ACTION_TYPE",
163        ts_tp1: dm_env.TimeStep,
164        time: int,
165    ) -> bool:
166        """
167        checks whether the episode is terminated. By default, this checks whether the current time step exceeds the time
168        horizon. In the continuous case, this can be used to define artificial episodes.
169
170        Parameters
171        ----------
172        ts_t : dm_env.TimeStep
173            The TimeStep at time t.
174        a_t : "ACTION_TYPE"
175            The action taken by the agent at time t.
176        ts_tp1 : dm_env.TimeStep
177            The TimeStep at time t + 1.
178        time : int
179            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
180            the continuous case this refers to the total number of previous interactions.
181
182        Returns
183        -------
184        bool
185            True if the episode terminated at time t+1.
186        """
187        return ts_tp1.last()

checks whether the episode is terminated. By default, this checks whether the current time step exceeds the time horizon. In the continuous case, this can be used to define artificial episodes.

Parameters

ts_t (dm_env.TimeStep): The TimeStep at time t.
a_t ("ACTION_TYPE"): The action taken by the agent at time t.
ts_tp1 (dm_env.TimeStep): The TimeStep at time t + 1.
time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.

Returns

bool: True if the episode terminated at time t+1.

def select_action( self, ts: dm_env._environment.TimeStep, time: int) -> Union[int, float, numpy.ndarray]: View Source

189    def select_action(self, ts: dm_env.TimeStep, time: int) -> "ACTION_TYPE":
190        """
191        Parameters
192        ----------
193        ts : dm_env.TimeStep
194            The TimeStep for which the agent is required to calculate the next action.
195        time : int
196            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
197            the continuous case this refers to the total number of previous interactions.
198
199        Returns
200        -------
201        action : ACTION_TYPE
202            The action that the agent suggests to take given the observation and the time step.
203        """
204        return self._actor.select_action(ts, time)

Parameters

ts (dm_env.TimeStep): The TimeStep for which the agent is required to calculate the next action.
time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.

Returns

action (ACTION_TYPE): The action that the agent suggests to take given the observation and the time step.

@abc.abstractmethod

def step_update( self, ts_t: dm_env._environment.TimeStep, a_t: Union[int, float, numpy.ndarray], ts_tp1: dm_env._environment.TimeStep, time: int): View Source

206    @abc.abstractmethod
207    def step_update(
208        self,
209        ts_t: dm_env.TimeStep,
210        a_t: "ACTION_TYPE",
211        ts_tp1: dm_env.TimeStep,
212        time: int,
213    ):
214        """
215        adds the transition in input to the MDP model.
216
217        Parameters
218        ----------
219        ts_t : dm_env.TimeStep
220            The TimeStep at time t.
221        a_t : "ACTION_TYPE"
222            The action taken by the agent at time t.
223        ts_tp1 : dm_env.TimeStep
224            The TimeStep at time t + 1.
225        time : int
226            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
227            the continuous case this refers to the total number of previous interactions.
228        """
229        if self._mdp_model:
230            self._mdp_model.step_update(ts_t, a_t, ts_tp1, time)

adds the transition in input to the MDP model.

Parameters

ts_t (dm_env.TimeStep): The TimeStep at time t.
a_t ("ACTION_TYPE"): The action taken by the agent at time t.
ts_tp1 (dm_env.TimeStep): The TimeStep at time t + 1.
time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.

def agent_logs(self): View Source

232    def agent_logs(self):
233        """
234        is called during the agent MDP interaction at lagging time. It can be used to log additional information.
235        """

is called during the agent MDP interaction at lagging time. It can be used to log additional information.