colosseum.agent.mdp_models.base

 1import abc
 2
 3import dm_env
 4import numpy as np
 5
 6from colosseum.mdp import ACTION_TYPE
 7from colosseum.utils.acme.specs import MDPSpec
 8
 9
10class BaseMDPModel(abc.ABC):
11    """
12    The `BaseMDPModel` is the base class for MDP models.
13    """
14
15    def __init__(self, seed: int, mdp_specs: MDPSpec):
16        """
17        Parameters
18        ----------
19        seed : int
20            The random seed.
21        mdp_specs : MDPSpec
22            The full specification of the MDP.
23        """
24
25        self._seed = seed
26        self._n_states = mdp_specs.observations.num_values
27        self._n_actions = mdp_specs.actions.num_values
28        self._reward_range = mdp_specs.rewards_range
29        self._H = mdp_specs.time_horizon
30        self._rng = np.random.RandomState(seed)
31
32    @abc.abstractmethod
33    def step_update(
34        self,
35        ts_t: dm_env.TimeStep,
36        a_t: "ACTION_TYPE",
37        ts_tp1: dm_env.TimeStep,
38        time: int,
39    ):
40        """
41        updates the model with the transition in input.
42
43        Parameters
44        ----------
45        ts_t : dm_env.TimeStep
46            The TimeStep at time t.
47        a_t : "ACTION_TYPE"
48            The action taken by the agent at time t.
49        ts_tp1 : dm_env.TimeStep
50            The TimeStep at time t + 1.
51        time : int
52            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
53            the continuous case this refers to the total number of previous interactions.
54        """
class BaseMDPModel(abc.ABC):
11class BaseMDPModel(abc.ABC):
12    """
13    The `BaseMDPModel` is the base class for MDP models.
14    """
15
16    def __init__(self, seed: int, mdp_specs: MDPSpec):
17        """
18        Parameters
19        ----------
20        seed : int
21            The random seed.
22        mdp_specs : MDPSpec
23            The full specification of the MDP.
24        """
25
26        self._seed = seed
27        self._n_states = mdp_specs.observations.num_values
28        self._n_actions = mdp_specs.actions.num_values
29        self._reward_range = mdp_specs.rewards_range
30        self._H = mdp_specs.time_horizon
31        self._rng = np.random.RandomState(seed)
32
33    @abc.abstractmethod
34    def step_update(
35        self,
36        ts_t: dm_env.TimeStep,
37        a_t: "ACTION_TYPE",
38        ts_tp1: dm_env.TimeStep,
39        time: int,
40    ):
41        """
42        updates the model with the transition in input.
43
44        Parameters
45        ----------
46        ts_t : dm_env.TimeStep
47            The TimeStep at time t.
48        a_t : "ACTION_TYPE"
49            The action taken by the agent at time t.
50        ts_tp1 : dm_env.TimeStep
51            The TimeStep at time t + 1.
52        time : int
53            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
54            the continuous case this refers to the total number of previous interactions.
55        """

The BaseMDPModel is the base class for MDP models.

BaseMDPModel(seed: int, mdp_specs: colosseum.utils.acme.specs.MDPSpec)
16    def __init__(self, seed: int, mdp_specs: MDPSpec):
17        """
18        Parameters
19        ----------
20        seed : int
21            The random seed.
22        mdp_specs : MDPSpec
23            The full specification of the MDP.
24        """
25
26        self._seed = seed
27        self._n_states = mdp_specs.observations.num_values
28        self._n_actions = mdp_specs.actions.num_values
29        self._reward_range = mdp_specs.rewards_range
30        self._H = mdp_specs.time_horizon
31        self._rng = np.random.RandomState(seed)
Parameters
  • seed (int): The random seed.
  • mdp_specs (MDPSpec): The full specification of the MDP.
@abc.abstractmethod
def step_update( self, ts_t: dm_env._environment.TimeStep, a_t: Union[int, float, numpy.ndarray], ts_tp1: dm_env._environment.TimeStep, time: int):
33    @abc.abstractmethod
34    def step_update(
35        self,
36        ts_t: dm_env.TimeStep,
37        a_t: "ACTION_TYPE",
38        ts_tp1: dm_env.TimeStep,
39        time: int,
40    ):
41        """
42        updates the model with the transition in input.
43
44        Parameters
45        ----------
46        ts_t : dm_env.TimeStep
47            The TimeStep at time t.
48        a_t : "ACTION_TYPE"
49            The action taken by the agent at time t.
50        ts_tp1 : dm_env.TimeStep
51            The TimeStep at time t + 1.
52        time : int
53            The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in
54            the continuous case this refers to the total number of previous interactions.
55        """

updates the model with the transition in input.

Parameters
  • ts_t (dm_env.TimeStep): The TimeStep at time t.
  • a_t ("ACTION_TYPE"): The action taken by the agent at time t.
  • ts_tp1 (dm_env.TimeStep): The TimeStep at time t + 1.
  • time (int): The current time of the environment. In the episodic case, this refers to the in-episode time, whereas in the continuous case this refers to the total number of previous interactions.