colosseum.agent.mdp_models.base
1import abc 2 3import dm_env 4import numpy as np 5 6from colosseum.mdp import ACTION_TYPE 7from colosseum.utils.acme.specs import MDPSpec 8 9 10class BaseMDPModel(abc.ABC): 11 """ 12 The `BaseMDPModel` is the base class for MDP models. 13 """ 14 15 def __init__(self, seed: int, mdp_specs: MDPSpec): 16 """ 17 Parameters 18 ---------- 19 seed : int 20 The random seed. 21 mdp_specs : MDPSpec 22 The full specification of the MDP. 23 """ 24 25 self._seed = seed 26 self._n_states = mdp_specs.observations.num_values 27 self._n_actions = mdp_specs.actions.num_values 28 self._reward_range = mdp_specs.rewards_range 29 self._H = mdp_specs.time_horizon 30 self._rng = np.random.RandomState(seed) 31 32 @abc.abstractmethod 33 def step_update( 34 self, 35 ts_t: dm_env.TimeStep, 36 a_t: "ACTION_TYPE", 37 ts_tp1: dm_env.TimeStep, 38 time: int, 39 ): 40 """ 41 updates the model with the transition in input. 42 43 Parameters 44 ---------- 45 ts_t : dm_env.TimeStep 46 The TimeStep at time t. 47 a_t : "ACTION_TYPE" 48 The action taken by the agent at time t. 49 ts_tp1 : dm_env.TimeStep 50 The TimeStep at time t + 1. 51 time : int 52 The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in 53 the continuous case this refers to the total number of previous interactions. 54 """
class
BaseMDPModel(abc.ABC):
11class BaseMDPModel(abc.ABC): 12 """ 13 The `BaseMDPModel` is the base class for MDP models. 14 """ 15 16 def __init__(self, seed: int, mdp_specs: MDPSpec): 17 """ 18 Parameters 19 ---------- 20 seed : int 21 The random seed. 22 mdp_specs : MDPSpec 23 The full specification of the MDP. 24 """ 25 26 self._seed = seed 27 self._n_states = mdp_specs.observations.num_values 28 self._n_actions = mdp_specs.actions.num_values 29 self._reward_range = mdp_specs.rewards_range 30 self._H = mdp_specs.time_horizon 31 self._rng = np.random.RandomState(seed) 32 33 @abc.abstractmethod 34 def step_update( 35 self, 36 ts_t: dm_env.TimeStep, 37 a_t: "ACTION_TYPE", 38 ts_tp1: dm_env.TimeStep, 39 time: int, 40 ): 41 """ 42 updates the model with the transition in input. 43 44 Parameters 45 ---------- 46 ts_t : dm_env.TimeStep 47 The TimeStep at time t. 48 a_t : "ACTION_TYPE" 49 The action taken by the agent at time t. 50 ts_tp1 : dm_env.TimeStep 51 The TimeStep at time t + 1. 52 time : int 53 The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in 54 the continuous case this refers to the total number of previous interactions. 55 """
The BaseMDPModel
is the base class for MDP models.
BaseMDPModel(seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec)
16 def __init__(self, seed: int, mdp_specs: MDPSpec): 17 """ 18 Parameters 19 ---------- 20 seed : int 21 The random seed. 22 mdp_specs : MDPSpec 23 The full specification of the MDP. 24 """ 25 26 self._seed = seed 27 self._n_states = mdp_specs.observations.num_values 28 self._n_actions = mdp_specs.actions.num_values 29 self._reward_range = mdp_specs.rewards_range 30 self._H = mdp_specs.time_horizon 31 self._rng = np.random.RandomState(seed)
Parameters
- seed (int): The random seed.
- mdp_specs (MDPSpec): The full specification of the MDP.
@abc.abstractmethod
def
step_update( self, ts_t: dm_env._environment.TimeStep, a_t: Union[int, float, numpy.ndarray], ts_tp1: dm_env._environment.TimeStep, time: int):
33 @abc.abstractmethod 34 def step_update( 35 self, 36 ts_t: dm_env.TimeStep, 37 a_t: "ACTION_TYPE", 38 ts_tp1: dm_env.TimeStep, 39 time: int, 40 ): 41 """ 42 updates the model with the transition in input. 43 44 Parameters 45 ---------- 46 ts_t : dm_env.TimeStep 47 The TimeStep at time t. 48 a_t : "ACTION_TYPE" 49 The action taken by the agent at time t. 50 ts_tp1 : dm_env.TimeStep 51 The TimeStep at time t + 1. 52 time : int 53 The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in 54 the continuous case this refers to the total number of previous interactions. 55 """
updates the model with the transition in input.
Parameters
- ts_t (dm_env.TimeStep): The TimeStep at time t.
- a_t ("ACTION_TYPE"): The action taken by the agent at time t.
- ts_tp1 (dm_env.TimeStep): The TimeStep at time t + 1.
- time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.