colosseum.mdp.custom_mdp

  1import abc
  2from dataclasses import dataclass
  3from typing import Tuple, Dict, Any, List, Type, Union, TYPE_CHECKING
  4
  5import numpy as np
  6import toolz
  7from scipy.stats import rv_continuous
  8
  9from colosseum.mdp import BaseMDP, EpisodicMDP, ContinuousMDP
 10from colosseum.mdp.utils.custom_samplers import NextStateSampler
 11from colosseum.utils.miscellanea import deterministic
 12
 13if TYPE_CHECKING:
 14    from colosseum.mdp import ACTION_TYPE, NODE_TYPE
 15
 16
 17@dataclass(frozen=True)
 18class CustomNode:
 19    """
 20    The node for the CustomMDP.
 21    """
 22
 23    ID: int
 24    """The id associated to the node."""
 25
 26    def __str__(self):
 27        return str(self.ID + 1)
 28
 29
 30def _merge_grid(grid, axis):
 31    indices = np.where(
 32        (grid == -1).sum(1 if axis == 0 else 0) == grid.shape[1 if axis == 0 else 0] - 1
 33    )[0][::2]
 34    for ind in indices:
 35        if axis == 1:
 36            grid = grid.T
 37        grid[ind + 1 : ind + 2][grid[ind : ind + 1] != -1] = grid[ind : ind + 1][
 38            grid[ind : ind + 1] != -1
 39        ]
 40        if axis == 1:
 41            grid = grid.T
 42    return np.delete(grid, indices, axis)
 43
 44
 45class CustomMDP(BaseMDP, abc.ABC):
 46    """
 47    The base class for the Custom MDP.
 48    """
 49
 50    @staticmethod
 51    def get_unique_symbols() -> List[str]:
 52        return ["X", " ", "A"]
 53
 54    @staticmethod
 55    def does_seed_change_MDP_structure() -> bool:
 56        raise NotImplementedError(
 57            "does_seed_change_MDP_structure is not implemented for the Custom MDP."
 58        )
 59
 60    @staticmethod
 61    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
 62        raise NotImplementedError(
 63            "sample_parameters is not implemented for the Custom MDP."
 64        )
 65
 66    @staticmethod
 67    def sample_mdp_parameters(
 68        n: int, is_episodic: bool, seed: int = None
 69    ) -> List[Dict[str, Any]]:
 70        raise NotImplementedError(
 71            "sample_mdp_parameters is not implemented for the Custom MDP."
 72        )
 73
 74    @staticmethod
 75    def get_node_class() -> Type["NODE_TYPE"]:
 76        return CustomNode
 77
 78    @property
 79    def n_actions(self) -> int:
 80        return self._num_actions
 81
 82    def _get_next_nodes_parameters(
 83        self, node: "NODE_TYPE", action: "ACTION_TYPE"
 84    ) -> Tuple[Tuple[dict, float], ...]:
 85        return tuple(
 86            (dict(ID=next_node), self.T[node.ID, action, next_node])
 87            for next_node in range(len(self.T))
 88            if self.T[node.ID, action, next_node] > 0.0
 89        )
 90
 91    def _get_reward_distribution(
 92        self, node: "NODE_TYPE", action: "ACTION_TYPE", next_node: "NODE_TYPE"
 93    ) -> rv_continuous:
 94        if type(self.R) == dict:
 95            return self.R[node, action]
 96        return deterministic(self.R[node.ID, action])
 97
 98    def _get_starting_node_sampler(self) -> NextStateSampler:
 99        return NextStateSampler(
100            next_nodes=self._possible_starting_nodes,
101            probs=list(self.T_0.values()),
102            seed=self._produce_random_seed(),
103        )
104
105    def _check_parameters_in_input(self):
106        super(CustomMDP, self)._check_parameters_in_input()
107
108        assert self.T.ndim == 3
109        assert type(self.R) in [dict, np.ndarray]
110        assert np.isclose(np.sum(list(self.T_0.values())), 1)
111
112        for s in range(len(self.T)):
113            for a in range(self.T.shape[1]):
114                assert np.isclose(self.T[s, a].sum(), 1), (
115                    f"The transition kernel associated with state {s} and action {a} "
116                    f"is not a well defined probability distribution."
117                )
118
119    def get_gin_parameters(self, index: int) -> str:
120        raise NotImplementedError()
121
122    @property
123    def str_grid_node_order(self):
124        if self._str_grid_node_order is None:
125            self._str_grid_node_order = dict(
126                zip(self.graph_layout.keys(), range(self.n_states))
127            )
128        return self._str_grid_node_order
129
130    def get_node_pos_in_grid(self, node) -> Tuple[int, int]:
131        """
132        Returns
133        -------
134        Tuple[int, int]
135            The position of the node in the visualization grid.
136        """
137        if node not in self._nodes_pos_in_grid:
138            x, y = np.where((self.str_grid_node_order[node] == self.grid))
139            self._nodes_pos_in_grid[node] = x[0], y[0]
140        return self._nodes_pos_in_grid[node]
141
142    @property
143    def grid(self) -> np.ndarray:
144        if self._grid is None:
145            coo = np.array(list(self.graph_layout.values()))
146
147            X = sorted(coo[:, 0])
148            Y = sorted(coo[:, 1])
149
150            grid = np.zeros((len(coo), len(coo)), dtype=int) - 1
151
152            for ind, (x, y) in enumerate(coo):
153                grid[np.where(X == x)[0][0], np.where(Y == y)[0][0]] = ind
154
155            has_changed = True
156            while has_changed:
157                has_changed = False
158                if any((grid == -1).sum(0) == grid.shape[0] - 1):
159                    grid = _merge_grid(grid, 1)
160                    has_changed = True
161                if any((grid == -1).sum(1) == grid.shape[1] - 1):
162                    grid = _merge_grid(grid, 0)
163                    has_changed = True
164
165            self._grid = grid
166        return self._grid
167
168    def _get_grid_representation(self, node: "NODE_TYPE") -> np.ndarray:
169        str_grid = np.zeros(self.grid.shape[:2], dtype=str)
170        str_grid[self.grid == -1] = "X"
171        str_grid[self.grid != -1] = " "
172        x, y = self.get_node_pos_in_grid(node)
173        str_grid[x, y] = "A"
174        return str_grid[::-1, :]
175
176    @property
177    def _possible_starting_nodes(self) -> List["NODE_TYPE"]:
178        return list(self.T_0.keys())
179
180    @property
181    def parameters(self) -> Dict[str, Any]:
182        return super(CustomMDP, self).parameters
183
184    def __init__(
185        self,
186        seed: int,
187        T_0: Dict[int, float],
188        T: np.ndarray,
189        R: Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]],
190        **kwargs,
191    ):
192        """
193        Parameters
194        ----------
195        seed : int
196            the seed used for sampling rewards and next states.
197        T_0 : Dict[int, float]
198            the starting distribution. Note that the values of the dictionary should sum to one.
199        T : np.ndarray
200            the |S| x |A| x |S| transition distribution matrix.
201        R : Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]]
202            the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of
203            state action pairs and rv_continuous objects.
204        """
205
206        self.n_states, self._num_actions, _ = T.shape
207
208        if type(R) == dict:
209            _R = np.zeros((self.n_states, self._num_actions), np.float32)
210            for (s, a), d in R.items():
211                _R[s, a] = d.mean()
212        elif type(R) == np.ndarray:
213            pass
214        else:
215            raise NotImplementedError(
216                f"The type of R, {type(R)}, is not accepted as input."
217            )
218        if type(T_0) == np.ndarray:
219            self.T_0 = {CustomNode(ID=i): p for i, p in enumerate(T_0) if T_0[i] > 0}
220        elif type(T_0) == dict:
221            self.T_0 = toolz.keymap(lambda x: CustomNode(ID=x), T_0)
222        else:
223            raise NotImplementedError(
224                f"The type of T_0, {type(T_0)}, is not accepted as input."
225            )
226
227        self._transition_matrix_and_rewards = T, _R
228        self._str_grid_node_order = None
229        self._grid = None
230        self._nodes_pos_in_grid = dict()
231
232        super(CustomMDP, self).__init__(seed=seed, **kwargs)
233
234
235class CustomEpisodic(CustomMDP, EpisodicMDP):
236    """
237    The episodic Custom MDP.
238    """
239
240
241class CustomContinuous(CustomMDP, ContinuousMDP):
242    """
243    The continuous Custom MDP.
244    """
@dataclass(frozen=True)
class CustomNode:
18@dataclass(frozen=True)
19class CustomNode:
20    """
21    The node for the CustomMDP.
22    """
23
24    ID: int
25    """The id associated to the node."""
26
27    def __str__(self):
28        return str(self.ID + 1)

The node for the CustomMDP.

CustomNode(ID: int)
ID: int

The id associated to the node.

class CustomMDP(colosseum.mdp.base.BaseMDP, abc.ABC):
 46class CustomMDP(BaseMDP, abc.ABC):
 47    """
 48    The base class for the Custom MDP.
 49    """
 50
 51    @staticmethod
 52    def get_unique_symbols() -> List[str]:
 53        return ["X", " ", "A"]
 54
 55    @staticmethod
 56    def does_seed_change_MDP_structure() -> bool:
 57        raise NotImplementedError(
 58            "does_seed_change_MDP_structure is not implemented for the Custom MDP."
 59        )
 60
 61    @staticmethod
 62    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
 63        raise NotImplementedError(
 64            "sample_parameters is not implemented for the Custom MDP."
 65        )
 66
 67    @staticmethod
 68    def sample_mdp_parameters(
 69        n: int, is_episodic: bool, seed: int = None
 70    ) -> List[Dict[str, Any]]:
 71        raise NotImplementedError(
 72            "sample_mdp_parameters is not implemented for the Custom MDP."
 73        )
 74
 75    @staticmethod
 76    def get_node_class() -> Type["NODE_TYPE"]:
 77        return CustomNode
 78
 79    @property
 80    def n_actions(self) -> int:
 81        return self._num_actions
 82
 83    def _get_next_nodes_parameters(
 84        self, node: "NODE_TYPE", action: "ACTION_TYPE"
 85    ) -> Tuple[Tuple[dict, float], ...]:
 86        return tuple(
 87            (dict(ID=next_node), self.T[node.ID, action, next_node])
 88            for next_node in range(len(self.T))
 89            if self.T[node.ID, action, next_node] > 0.0
 90        )
 91
 92    def _get_reward_distribution(
 93        self, node: "NODE_TYPE", action: "ACTION_TYPE", next_node: "NODE_TYPE"
 94    ) -> rv_continuous:
 95        if type(self.R) == dict:
 96            return self.R[node, action]
 97        return deterministic(self.R[node.ID, action])
 98
 99    def _get_starting_node_sampler(self) -> NextStateSampler:
100        return NextStateSampler(
101            next_nodes=self._possible_starting_nodes,
102            probs=list(self.T_0.values()),
103            seed=self._produce_random_seed(),
104        )
105
106    def _check_parameters_in_input(self):
107        super(CustomMDP, self)._check_parameters_in_input()
108
109        assert self.T.ndim == 3
110        assert type(self.R) in [dict, np.ndarray]
111        assert np.isclose(np.sum(list(self.T_0.values())), 1)
112
113        for s in range(len(self.T)):
114            for a in range(self.T.shape[1]):
115                assert np.isclose(self.T[s, a].sum(), 1), (
116                    f"The transition kernel associated with state {s} and action {a} "
117                    f"is not a well defined probability distribution."
118                )
119
120    def get_gin_parameters(self, index: int) -> str:
121        raise NotImplementedError()
122
123    @property
124    def str_grid_node_order(self):
125        if self._str_grid_node_order is None:
126            self._str_grid_node_order = dict(
127                zip(self.graph_layout.keys(), range(self.n_states))
128            )
129        return self._str_grid_node_order
130
131    def get_node_pos_in_grid(self, node) -> Tuple[int, int]:
132        """
133        Returns
134        -------
135        Tuple[int, int]
136            The position of the node in the visualization grid.
137        """
138        if node not in self._nodes_pos_in_grid:
139            x, y = np.where((self.str_grid_node_order[node] == self.grid))
140            self._nodes_pos_in_grid[node] = x[0], y[0]
141        return self._nodes_pos_in_grid[node]
142
143    @property
144    def grid(self) -> np.ndarray:
145        if self._grid is None:
146            coo = np.array(list(self.graph_layout.values()))
147
148            X = sorted(coo[:, 0])
149            Y = sorted(coo[:, 1])
150
151            grid = np.zeros((len(coo), len(coo)), dtype=int) - 1
152
153            for ind, (x, y) in enumerate(coo):
154                grid[np.where(X == x)[0][0], np.where(Y == y)[0][0]] = ind
155
156            has_changed = True
157            while has_changed:
158                has_changed = False
159                if any((grid == -1).sum(0) == grid.shape[0] - 1):
160                    grid = _merge_grid(grid, 1)
161                    has_changed = True
162                if any((grid == -1).sum(1) == grid.shape[1] - 1):
163                    grid = _merge_grid(grid, 0)
164                    has_changed = True
165
166            self._grid = grid
167        return self._grid
168
169    def _get_grid_representation(self, node: "NODE_TYPE") -> np.ndarray:
170        str_grid = np.zeros(self.grid.shape[:2], dtype=str)
171        str_grid[self.grid == -1] = "X"
172        str_grid[self.grid != -1] = " "
173        x, y = self.get_node_pos_in_grid(node)
174        str_grid[x, y] = "A"
175        return str_grid[::-1, :]
176
177    @property
178    def _possible_starting_nodes(self) -> List["NODE_TYPE"]:
179        return list(self.T_0.keys())
180
181    @property
182    def parameters(self) -> Dict[str, Any]:
183        return super(CustomMDP, self).parameters
184
185    def __init__(
186        self,
187        seed: int,
188        T_0: Dict[int, float],
189        T: np.ndarray,
190        R: Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]],
191        **kwargs,
192    ):
193        """
194        Parameters
195        ----------
196        seed : int
197            the seed used for sampling rewards and next states.
198        T_0 : Dict[int, float]
199            the starting distribution. Note that the values of the dictionary should sum to one.
200        T : np.ndarray
201            the |S| x |A| x |S| transition distribution matrix.
202        R : Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]]
203            the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of
204            state action pairs and rv_continuous objects.
205        """
206
207        self.n_states, self._num_actions, _ = T.shape
208
209        if type(R) == dict:
210            _R = np.zeros((self.n_states, self._num_actions), np.float32)
211            for (s, a), d in R.items():
212                _R[s, a] = d.mean()
213        elif type(R) == np.ndarray:
214            pass
215        else:
216            raise NotImplementedError(
217                f"The type of R, {type(R)}, is not accepted as input."
218            )
219        if type(T_0) == np.ndarray:
220            self.T_0 = {CustomNode(ID=i): p for i, p in enumerate(T_0) if T_0[i] > 0}
221        elif type(T_0) == dict:
222            self.T_0 = toolz.keymap(lambda x: CustomNode(ID=x), T_0)
223        else:
224            raise NotImplementedError(
225                f"The type of T_0, {type(T_0)}, is not accepted as input."
226            )
227
228        self._transition_matrix_and_rewards = T, _R
229        self._str_grid_node_order = None
230        self._grid = None
231        self._nodes_pos_in_grid = dict()
232
233        super(CustomMDP, self).__init__(seed=seed, **kwargs)

The base class for the Custom MDP.

CustomMDP( seed: int, T_0: Dict[int, float], T: numpy.ndarray, R: Union[numpy.ndarray, Dict[Tuple[int, int], scipy.stats._distn_infrastructure.rv_continuous]], **kwargs)
185    def __init__(
186        self,
187        seed: int,
188        T_0: Dict[int, float],
189        T: np.ndarray,
190        R: Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]],
191        **kwargs,
192    ):
193        """
194        Parameters
195        ----------
196        seed : int
197            the seed used for sampling rewards and next states.
198        T_0 : Dict[int, float]
199            the starting distribution. Note that the values of the dictionary should sum to one.
200        T : np.ndarray
201            the |S| x |A| x |S| transition distribution matrix.
202        R : Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]]
203            the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of
204            state action pairs and rv_continuous objects.
205        """
206
207        self.n_states, self._num_actions, _ = T.shape
208
209        if type(R) == dict:
210            _R = np.zeros((self.n_states, self._num_actions), np.float32)
211            for (s, a), d in R.items():
212                _R[s, a] = d.mean()
213        elif type(R) == np.ndarray:
214            pass
215        else:
216            raise NotImplementedError(
217                f"The type of R, {type(R)}, is not accepted as input."
218            )
219        if type(T_0) == np.ndarray:
220            self.T_0 = {CustomNode(ID=i): p for i, p in enumerate(T_0) if T_0[i] > 0}
221        elif type(T_0) == dict:
222            self.T_0 = toolz.keymap(lambda x: CustomNode(ID=x), T_0)
223        else:
224            raise NotImplementedError(
225                f"The type of T_0, {type(T_0)}, is not accepted as input."
226            )
227
228        self._transition_matrix_and_rewards = T, _R
229        self._str_grid_node_order = None
230        self._grid = None
231        self._nodes_pos_in_grid = dict()
232
233        super(CustomMDP, self).__init__(seed=seed, **kwargs)
Parameters
  • seed (int): the seed used for sampling rewards and next states.
  • T_0 (Dict[int, float]): the starting distribution. Note that the values of the dictionary should sum to one.
  • T (np.ndarray): the |S| x |A| x |S| transition distribution matrix.
  • R (Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]]): the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of state action pairs and rv_continuous objects.
@staticmethod
def get_unique_symbols() -> List[str]:
51    @staticmethod
52    def get_unique_symbols() -> List[str]:
53        return ["X", " ", "A"]
Returns
  • List[str]: the unique symbols of the grid representation of the MDP.
@staticmethod
def does_seed_change_MDP_structure() -> bool:
55    @staticmethod
56    def does_seed_change_MDP_structure() -> bool:
57        raise NotImplementedError(
58            "does_seed_change_MDP_structure is not implemented for the Custom MDP."
59        )
Returns
  • bool: True if when changing the seed the transition matrix and/or rewards matrix change. This for example may
  • happen when there are fewer starting states that possible one and the effective starting states are picked
  • randomly based on the seed.
@staticmethod
def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
61    @staticmethod
62    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
63        raise NotImplementedError(
64            "sample_parameters is not implemented for the Custom MDP."
65        )
Returns
  • List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
@staticmethod
def sample_mdp_parameters(n: int, is_episodic: bool, seed: int = None) -> List[Dict[str, Any]]:
67    @staticmethod
68    def sample_mdp_parameters(
69        n: int, is_episodic: bool, seed: int = None
70    ) -> List[Dict[str, Any]]:
71        raise NotImplementedError(
72            "sample_mdp_parameters is not implemented for the Custom MDP."
73        )
Returns
  • List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
n_actions: int
Returns
  • int: The number of available actions.
def get_gin_parameters(self, index: int) -> str:
120    def get_gin_parameters(self, index: int) -> str:
121        raise NotImplementedError()
Returns
  • str: The gin config of the MDP instance.
def get_node_pos_in_grid(self, node) -> Tuple[int, int]:
131    def get_node_pos_in_grid(self, node) -> Tuple[int, int]:
132        """
133        Returns
134        -------
135        Tuple[int, int]
136            The position of the node in the visualization grid.
137        """
138        if node not in self._nodes_pos_in_grid:
139            x, y = np.where((self.str_grid_node_order[node] == self.grid))
140            self._nodes_pos_in_grid[node] = x[0], y[0]
141        return self._nodes_pos_in_grid[node]
Returns
  • Tuple[int, int]: The position of the node in the visualization grid.
parameters: Dict[str, Any]
Returns
  • Dict[str, Any]: The parameters of the MDP.
class CustomEpisodic(CustomMDP, colosseum.mdp.base_finite.EpisodicMDP):
236class CustomEpisodic(CustomMDP, EpisodicMDP):
237    """
238    The episodic Custom MDP.
239    """

The episodic Custom MDP.

Inherited Members
CustomMDP
CustomMDP
get_unique_symbols
does_seed_change_MDP_structure
sample_parameters
sample_mdp_parameters
get_node_class
n_actions
get_gin_parameters
get_node_pos_in_grid
parameters
colosseum.mdp.base_finite.EpisodicMDP
is_episodic
H
random_policy_cf
random_policy
reachable_states
T_cf
R_cf
optimal_value_continuous_form
worst_value_continuous_form
random_value_continuous_form
episodic_optimal_average_reward
episodic_worst_average_reward
episodic_random_average_reward
continuous_form_episodic_transition_matrix_and_rewards
episodic_transition_matrix_and_rewards
get_optimal_policy_continuous_form
get_worst_policy_continuous_form
get_random_policy_continuous_form
get_minimal_regret_for_starting_node
get_optimal_policy_starting_value
get_worst_policy_starting_value
get_random_policy_starting_value
get_episodic_graph
get_grid_representation
colosseum.mdp.base.BaseMDP
get_available_hardness_measures
produce_gin_file_from_mdp_parameters
get_gin_config
get_node_labels
get_node_action_labels
hash
instantiate_MDP
T
R
recurrent_nodes_set
communication_class
get_optimal_policy
get_worst_policy
get_value_functions
optimal_value_functions
worst_value_functions
random_value_functions
optimal_transition_probabilities
worst_transition_probabilities
random_transition_probabilities
optimal_markov_chain
worst_markov_chain
random_markov_chain
get_stationary_distribution
optimal_stationary_distribution
worst_stationary_distribution
random_stationary_distribution
optimal_average_rewards
worst_average_rewards
random_average_rewards
get_average_reward
optimal_average_reward
worst_average_reward
random_average_reward
transition_matrix_and_rewards
graph_layout
graph_metrics
diameter
sum_reciprocals_suboptimality_gaps
discounted_value_norm
undiscounted_value_norm
value_norm
measures_of_hardness
summary
hardness_report
get_info_class
get_transition_distributions
get_reward_distribution
sample_reward
get_measure_from_name
action_spec
observation_spec
get_observation
reset
step
random_steps
random_step
get_visitation_counts
reset_visitation_counts
get_value_node_labels
dm_env._environment.Environment
reward_spec
discount_spec
close
class CustomContinuous(CustomMDP, colosseum.mdp.base_infinite.ContinuousMDP):
242class CustomContinuous(CustomMDP, ContinuousMDP):
243    """
244    The continuous Custom MDP.
245    """

The continuous Custom MDP.

Inherited Members
CustomMDP
CustomMDP
get_unique_symbols
does_seed_change_MDP_structure
sample_parameters
sample_mdp_parameters
get_node_class
n_actions
get_gin_parameters
get_node_pos_in_grid
parameters
colosseum.mdp.base_infinite.ContinuousMDP
is_episodic
get_grid_representation
colosseum.mdp.base.BaseMDP
get_available_hardness_measures
produce_gin_file_from_mdp_parameters
get_gin_config
get_node_labels
get_node_action_labels
hash
instantiate_MDP
T
R
recurrent_nodes_set
communication_class
get_optimal_policy
get_worst_policy
get_value_functions
optimal_value_functions
worst_value_functions
random_value_functions
optimal_transition_probabilities
worst_transition_probabilities
random_transition_probabilities
optimal_markov_chain
worst_markov_chain
random_markov_chain
get_stationary_distribution
optimal_stationary_distribution
worst_stationary_distribution
random_stationary_distribution
optimal_average_rewards
worst_average_rewards
random_average_rewards
get_average_reward
optimal_average_reward
worst_average_reward
random_average_reward
transition_matrix_and_rewards
graph_layout
graph_metrics
diameter
sum_reciprocals_suboptimality_gaps
discounted_value_norm
undiscounted_value_norm
value_norm
measures_of_hardness
summary
hardness_report
get_info_class
get_transition_distributions
get_reward_distribution
sample_reward
get_measure_from_name
action_spec
observation_spec
get_observation
reset
step
random_steps
random_step
get_visitation_counts
reset_visitation_counts
get_value_node_labels
dm_env._environment.Environment
reward_spec
discount_spec
close