colosseum.mdp.custom_mdp
1import abc 2from dataclasses import dataclass 3from typing import Tuple, Dict, Any, List, Type, Union, TYPE_CHECKING 4 5import numpy as np 6import toolz 7from scipy.stats import rv_continuous 8 9from colosseum.mdp import BaseMDP, EpisodicMDP, ContinuousMDP 10from colosseum.mdp.utils.custom_samplers import NextStateSampler 11from colosseum.utils.miscellanea import deterministic 12 13if TYPE_CHECKING: 14 from colosseum.mdp import ACTION_TYPE, NODE_TYPE 15 16 17@dataclass(frozen=True) 18class CustomNode: 19 """ 20 The node for the CustomMDP. 21 """ 22 23 ID: int 24 """The id associated to the node.""" 25 26 def __str__(self): 27 return str(self.ID + 1) 28 29 30def _merge_grid(grid, axis): 31 indices = np.where( 32 (grid == -1).sum(1 if axis == 0 else 0) == grid.shape[1 if axis == 0 else 0] - 1 33 )[0][::2] 34 for ind in indices: 35 if axis == 1: 36 grid = grid.T 37 grid[ind + 1 : ind + 2][grid[ind : ind + 1] != -1] = grid[ind : ind + 1][ 38 grid[ind : ind + 1] != -1 39 ] 40 if axis == 1: 41 grid = grid.T 42 return np.delete(grid, indices, axis) 43 44 45class CustomMDP(BaseMDP, abc.ABC): 46 """ 47 The base class for the Custom MDP. 48 """ 49 50 @staticmethod 51 def get_unique_symbols() -> List[str]: 52 return ["X", " ", "A"] 53 54 @staticmethod 55 def does_seed_change_MDP_structure() -> bool: 56 raise NotImplementedError( 57 "does_seed_change_MDP_structure is not implemented for the Custom MDP." 58 ) 59 60 @staticmethod 61 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 62 raise NotImplementedError( 63 "sample_parameters is not implemented for the Custom MDP." 64 ) 65 66 @staticmethod 67 def sample_mdp_parameters( 68 n: int, is_episodic: bool, seed: int = None 69 ) -> List[Dict[str, Any]]: 70 raise NotImplementedError( 71 "sample_mdp_parameters is not implemented for the Custom MDP." 72 ) 73 74 @staticmethod 75 def get_node_class() -> Type["NODE_TYPE"]: 76 return CustomNode 77 78 @property 79 def n_actions(self) -> int: 80 return self._num_actions 81 82 def _get_next_nodes_parameters( 83 self, node: "NODE_TYPE", action: "ACTION_TYPE" 84 ) -> Tuple[Tuple[dict, float], ...]: 85 return tuple( 86 (dict(ID=next_node), self.T[node.ID, action, next_node]) 87 for next_node in range(len(self.T)) 88 if self.T[node.ID, action, next_node] > 0.0 89 ) 90 91 def _get_reward_distribution( 92 self, node: "NODE_TYPE", action: "ACTION_TYPE", next_node: "NODE_TYPE" 93 ) -> rv_continuous: 94 if type(self.R) == dict: 95 return self.R[node, action] 96 return deterministic(self.R[node.ID, action]) 97 98 def _get_starting_node_sampler(self) -> NextStateSampler: 99 return NextStateSampler( 100 next_nodes=self._possible_starting_nodes, 101 probs=list(self.T_0.values()), 102 seed=self._produce_random_seed(), 103 ) 104 105 def _check_parameters_in_input(self): 106 super(CustomMDP, self)._check_parameters_in_input() 107 108 assert self.T.ndim == 3 109 assert type(self.R) in [dict, np.ndarray] 110 assert np.isclose(np.sum(list(self.T_0.values())), 1) 111 112 for s in range(len(self.T)): 113 for a in range(self.T.shape[1]): 114 assert np.isclose(self.T[s, a].sum(), 1), ( 115 f"The transition kernel associated with state {s} and action {a} " 116 f"is not a well defined probability distribution." 117 ) 118 119 def get_gin_parameters(self, index: int) -> str: 120 raise NotImplementedError() 121 122 @property 123 def str_grid_node_order(self): 124 if self._str_grid_node_order is None: 125 self._str_grid_node_order = dict( 126 zip(self.graph_layout.keys(), range(self.n_states)) 127 ) 128 return self._str_grid_node_order 129 130 def get_node_pos_in_grid(self, node) -> Tuple[int, int]: 131 """ 132 Returns 133 ------- 134 Tuple[int, int] 135 The position of the node in the visualization grid. 136 """ 137 if node not in self._nodes_pos_in_grid: 138 x, y = np.where((self.str_grid_node_order[node] == self.grid)) 139 self._nodes_pos_in_grid[node] = x[0], y[0] 140 return self._nodes_pos_in_grid[node] 141 142 @property 143 def grid(self) -> np.ndarray: 144 if self._grid is None: 145 coo = np.array(list(self.graph_layout.values())) 146 147 X = sorted(coo[:, 0]) 148 Y = sorted(coo[:, 1]) 149 150 grid = np.zeros((len(coo), len(coo)), dtype=int) - 1 151 152 for ind, (x, y) in enumerate(coo): 153 grid[np.where(X == x)[0][0], np.where(Y == y)[0][0]] = ind 154 155 has_changed = True 156 while has_changed: 157 has_changed = False 158 if any((grid == -1).sum(0) == grid.shape[0] - 1): 159 grid = _merge_grid(grid, 1) 160 has_changed = True 161 if any((grid == -1).sum(1) == grid.shape[1] - 1): 162 grid = _merge_grid(grid, 0) 163 has_changed = True 164 165 self._grid = grid 166 return self._grid 167 168 def _get_grid_representation(self, node: "NODE_TYPE") -> np.ndarray: 169 str_grid = np.zeros(self.grid.shape[:2], dtype=str) 170 str_grid[self.grid == -1] = "X" 171 str_grid[self.grid != -1] = " " 172 x, y = self.get_node_pos_in_grid(node) 173 str_grid[x, y] = "A" 174 return str_grid[::-1, :] 175 176 @property 177 def _possible_starting_nodes(self) -> List["NODE_TYPE"]: 178 return list(self.T_0.keys()) 179 180 @property 181 def parameters(self) -> Dict[str, Any]: 182 return super(CustomMDP, self).parameters 183 184 def __init__( 185 self, 186 seed: int, 187 T_0: Dict[int, float], 188 T: np.ndarray, 189 R: Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]], 190 **kwargs, 191 ): 192 """ 193 Parameters 194 ---------- 195 seed : int 196 the seed used for sampling rewards and next states. 197 T_0 : Dict[int, float] 198 the starting distribution. Note that the values of the dictionary should sum to one. 199 T : np.ndarray 200 the |S| x |A| x |S| transition distribution matrix. 201 R : Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]] 202 the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of 203 state action pairs and rv_continuous objects. 204 """ 205 206 self.n_states, self._num_actions, _ = T.shape 207 208 if type(R) == dict: 209 _R = np.zeros((self.n_states, self._num_actions), np.float32) 210 for (s, a), d in R.items(): 211 _R[s, a] = d.mean() 212 elif type(R) == np.ndarray: 213 pass 214 else: 215 raise NotImplementedError( 216 f"The type of R, {type(R)}, is not accepted as input." 217 ) 218 if type(T_0) == np.ndarray: 219 self.T_0 = {CustomNode(ID=i): p for i, p in enumerate(T_0) if T_0[i] > 0} 220 elif type(T_0) == dict: 221 self.T_0 = toolz.keymap(lambda x: CustomNode(ID=x), T_0) 222 else: 223 raise NotImplementedError( 224 f"The type of T_0, {type(T_0)}, is not accepted as input." 225 ) 226 227 self._transition_matrix_and_rewards = T, _R 228 self._str_grid_node_order = None 229 self._grid = None 230 self._nodes_pos_in_grid = dict() 231 232 super(CustomMDP, self).__init__(seed=seed, **kwargs) 233 234 235class CustomEpisodic(CustomMDP, EpisodicMDP): 236 """ 237 The episodic Custom MDP. 238 """ 239 240 241class CustomContinuous(CustomMDP, ContinuousMDP): 242 """ 243 The continuous Custom MDP. 244 """
@dataclass(frozen=True)
class
CustomNode:
18@dataclass(frozen=True) 19class CustomNode: 20 """ 21 The node for the CustomMDP. 22 """ 23 24 ID: int 25 """The id associated to the node.""" 26 27 def __str__(self): 28 return str(self.ID + 1)
The node for the CustomMDP.
46class CustomMDP(BaseMDP, abc.ABC): 47 """ 48 The base class for the Custom MDP. 49 """ 50 51 @staticmethod 52 def get_unique_symbols() -> List[str]: 53 return ["X", " ", "A"] 54 55 @staticmethod 56 def does_seed_change_MDP_structure() -> bool: 57 raise NotImplementedError( 58 "does_seed_change_MDP_structure is not implemented for the Custom MDP." 59 ) 60 61 @staticmethod 62 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 63 raise NotImplementedError( 64 "sample_parameters is not implemented for the Custom MDP." 65 ) 66 67 @staticmethod 68 def sample_mdp_parameters( 69 n: int, is_episodic: bool, seed: int = None 70 ) -> List[Dict[str, Any]]: 71 raise NotImplementedError( 72 "sample_mdp_parameters is not implemented for the Custom MDP." 73 ) 74 75 @staticmethod 76 def get_node_class() -> Type["NODE_TYPE"]: 77 return CustomNode 78 79 @property 80 def n_actions(self) -> int: 81 return self._num_actions 82 83 def _get_next_nodes_parameters( 84 self, node: "NODE_TYPE", action: "ACTION_TYPE" 85 ) -> Tuple[Tuple[dict, float], ...]: 86 return tuple( 87 (dict(ID=next_node), self.T[node.ID, action, next_node]) 88 for next_node in range(len(self.T)) 89 if self.T[node.ID, action, next_node] > 0.0 90 ) 91 92 def _get_reward_distribution( 93 self, node: "NODE_TYPE", action: "ACTION_TYPE", next_node: "NODE_TYPE" 94 ) -> rv_continuous: 95 if type(self.R) == dict: 96 return self.R[node, action] 97 return deterministic(self.R[node.ID, action]) 98 99 def _get_starting_node_sampler(self) -> NextStateSampler: 100 return NextStateSampler( 101 next_nodes=self._possible_starting_nodes, 102 probs=list(self.T_0.values()), 103 seed=self._produce_random_seed(), 104 ) 105 106 def _check_parameters_in_input(self): 107 super(CustomMDP, self)._check_parameters_in_input() 108 109 assert self.T.ndim == 3 110 assert type(self.R) in [dict, np.ndarray] 111 assert np.isclose(np.sum(list(self.T_0.values())), 1) 112 113 for s in range(len(self.T)): 114 for a in range(self.T.shape[1]): 115 assert np.isclose(self.T[s, a].sum(), 1), ( 116 f"The transition kernel associated with state {s} and action {a} " 117 f"is not a well defined probability distribution." 118 ) 119 120 def get_gin_parameters(self, index: int) -> str: 121 raise NotImplementedError() 122 123 @property 124 def str_grid_node_order(self): 125 if self._str_grid_node_order is None: 126 self._str_grid_node_order = dict( 127 zip(self.graph_layout.keys(), range(self.n_states)) 128 ) 129 return self._str_grid_node_order 130 131 def get_node_pos_in_grid(self, node) -> Tuple[int, int]: 132 """ 133 Returns 134 ------- 135 Tuple[int, int] 136 The position of the node in the visualization grid. 137 """ 138 if node not in self._nodes_pos_in_grid: 139 x, y = np.where((self.str_grid_node_order[node] == self.grid)) 140 self._nodes_pos_in_grid[node] = x[0], y[0] 141 return self._nodes_pos_in_grid[node] 142 143 @property 144 def grid(self) -> np.ndarray: 145 if self._grid is None: 146 coo = np.array(list(self.graph_layout.values())) 147 148 X = sorted(coo[:, 0]) 149 Y = sorted(coo[:, 1]) 150 151 grid = np.zeros((len(coo), len(coo)), dtype=int) - 1 152 153 for ind, (x, y) in enumerate(coo): 154 grid[np.where(X == x)[0][0], np.where(Y == y)[0][0]] = ind 155 156 has_changed = True 157 while has_changed: 158 has_changed = False 159 if any((grid == -1).sum(0) == grid.shape[0] - 1): 160 grid = _merge_grid(grid, 1) 161 has_changed = True 162 if any((grid == -1).sum(1) == grid.shape[1] - 1): 163 grid = _merge_grid(grid, 0) 164 has_changed = True 165 166 self._grid = grid 167 return self._grid 168 169 def _get_grid_representation(self, node: "NODE_TYPE") -> np.ndarray: 170 str_grid = np.zeros(self.grid.shape[:2], dtype=str) 171 str_grid[self.grid == -1] = "X" 172 str_grid[self.grid != -1] = " " 173 x, y = self.get_node_pos_in_grid(node) 174 str_grid[x, y] = "A" 175 return str_grid[::-1, :] 176 177 @property 178 def _possible_starting_nodes(self) -> List["NODE_TYPE"]: 179 return list(self.T_0.keys()) 180 181 @property 182 def parameters(self) -> Dict[str, Any]: 183 return super(CustomMDP, self).parameters 184 185 def __init__( 186 self, 187 seed: int, 188 T_0: Dict[int, float], 189 T: np.ndarray, 190 R: Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]], 191 **kwargs, 192 ): 193 """ 194 Parameters 195 ---------- 196 seed : int 197 the seed used for sampling rewards and next states. 198 T_0 : Dict[int, float] 199 the starting distribution. Note that the values of the dictionary should sum to one. 200 T : np.ndarray 201 the |S| x |A| x |S| transition distribution matrix. 202 R : Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]] 203 the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of 204 state action pairs and rv_continuous objects. 205 """ 206 207 self.n_states, self._num_actions, _ = T.shape 208 209 if type(R) == dict: 210 _R = np.zeros((self.n_states, self._num_actions), np.float32) 211 for (s, a), d in R.items(): 212 _R[s, a] = d.mean() 213 elif type(R) == np.ndarray: 214 pass 215 else: 216 raise NotImplementedError( 217 f"The type of R, {type(R)}, is not accepted as input." 218 ) 219 if type(T_0) == np.ndarray: 220 self.T_0 = {CustomNode(ID=i): p for i, p in enumerate(T_0) if T_0[i] > 0} 221 elif type(T_0) == dict: 222 self.T_0 = toolz.keymap(lambda x: CustomNode(ID=x), T_0) 223 else: 224 raise NotImplementedError( 225 f"The type of T_0, {type(T_0)}, is not accepted as input." 226 ) 227 228 self._transition_matrix_and_rewards = T, _R 229 self._str_grid_node_order = None 230 self._grid = None 231 self._nodes_pos_in_grid = dict() 232 233 super(CustomMDP, self).__init__(seed=seed, **kwargs)
The base class for the Custom MDP.
CustomMDP( seed: int, T_0: Dict[int, float], T: numpy.ndarray, R: Union[numpy.ndarray, Dict[Tuple[int, int], scipy.stats._distn_infrastructure.rv_continuous]], **kwargs)
185 def __init__( 186 self, 187 seed: int, 188 T_0: Dict[int, float], 189 T: np.ndarray, 190 R: Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]], 191 **kwargs, 192 ): 193 """ 194 Parameters 195 ---------- 196 seed : int 197 the seed used for sampling rewards and next states. 198 T_0 : Dict[int, float] 199 the starting distribution. Note that the values of the dictionary should sum to one. 200 T : np.ndarray 201 the |S| x |A| x |S| transition distribution matrix. 202 R : Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]] 203 the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of 204 state action pairs and rv_continuous objects. 205 """ 206 207 self.n_states, self._num_actions, _ = T.shape 208 209 if type(R) == dict: 210 _R = np.zeros((self.n_states, self._num_actions), np.float32) 211 for (s, a), d in R.items(): 212 _R[s, a] = d.mean() 213 elif type(R) == np.ndarray: 214 pass 215 else: 216 raise NotImplementedError( 217 f"The type of R, {type(R)}, is not accepted as input." 218 ) 219 if type(T_0) == np.ndarray: 220 self.T_0 = {CustomNode(ID=i): p for i, p in enumerate(T_0) if T_0[i] > 0} 221 elif type(T_0) == dict: 222 self.T_0 = toolz.keymap(lambda x: CustomNode(ID=x), T_0) 223 else: 224 raise NotImplementedError( 225 f"The type of T_0, {type(T_0)}, is not accepted as input." 226 ) 227 228 self._transition_matrix_and_rewards = T, _R 229 self._str_grid_node_order = None 230 self._grid = None 231 self._nodes_pos_in_grid = dict() 232 233 super(CustomMDP, self).__init__(seed=seed, **kwargs)
Parameters
- seed (int): the seed used for sampling rewards and next states.
- T_0 (Dict[int, float]): the starting distribution. Note that the values of the dictionary should sum to one.
- T (np.ndarray): the |S| x |A| x |S| transition distribution matrix.
- R (Union[np.ndarray, Dict[Tuple[int, int], rv_continuous]]): the rewards can be either passed as a |S| x |A| array filled with deterministic values or with a dictionary of state action pairs and rv_continuous objects.
@staticmethod
def
get_unique_symbols() -> List[str]:
Returns
- List[str]: the unique symbols of the grid representation of the MDP.
@staticmethod
def
does_seed_change_MDP_structure() -> bool:
55 @staticmethod 56 def does_seed_change_MDP_structure() -> bool: 57 raise NotImplementedError( 58 "does_seed_change_MDP_structure is not implemented for the Custom MDP." 59 )
Returns
- bool: True if when changing the seed the transition matrix and/or rewards matrix change. This for example may
- happen when there are fewer starting states that possible one and the effective starting states are picked
- randomly based on the seed.
@staticmethod
def
sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
61 @staticmethod 62 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 63 raise NotImplementedError( 64 "sample_parameters is not implemented for the Custom MDP." 65 )
Returns
- List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
@staticmethod
def
sample_mdp_parameters(n: int, is_episodic: bool, seed: int = None) -> List[Dict[str, Any]]:
67 @staticmethod 68 def sample_mdp_parameters( 69 n: int, is_episodic: bool, seed: int = None 70 ) -> List[Dict[str, Any]]: 71 raise NotImplementedError( 72 "sample_mdp_parameters is not implemented for the Custom MDP." 73 )
Returns
- List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
@staticmethod
def
get_node_class() -> Type[Union[colosseum.mdp.custom_mdp.CustomNode, colosseum.mdp.river_swim.base.RiverSwimNode, colosseum.mdp.deep_sea.base.DeepSeaNode, colosseum.mdp.frozen_lake.base.FrozenLakeNode, colosseum.mdp.simple_grid.base.SimpleGridNode, colosseum.mdp.minigrid_empty.base.MiniGridEmptyNode, colosseum.mdp.minigrid_rooms.base.MiniGridRoomsNode, colosseum.mdp.taxi.base.TaxiNode]]:
Returns
- Type["NODE_TYPE"]: The class of the nodes of the MDP.
def
get_node_pos_in_grid(self, node) -> Tuple[int, int]:
131 def get_node_pos_in_grid(self, node) -> Tuple[int, int]: 132 """ 133 Returns 134 ------- 135 Tuple[int, int] 136 The position of the node in the visualization grid. 137 """ 138 if node not in self._nodes_pos_in_grid: 139 x, y = np.where((self.str_grid_node_order[node] == self.grid)) 140 self._nodes_pos_in_grid[node] = x[0], y[0] 141 return self._nodes_pos_in_grid[node]
Returns
- Tuple[int, int]: The position of the node in the visualization grid.
Inherited Members
- colosseum.mdp.base.BaseMDP
- get_available_hardness_measures
- produce_gin_file_from_mdp_parameters
- is_episodic
- get_grid_representation
- get_gin_config
- get_node_labels
- get_node_action_labels
- hash
- instantiate_MDP
- T
- R
- recurrent_nodes_set
- communication_class
- get_optimal_policy
- get_worst_policy
- get_value_functions
- optimal_value_functions
- worst_value_functions
- random_value_functions
- optimal_transition_probabilities
- worst_transition_probabilities
- random_transition_probabilities
- optimal_markov_chain
- worst_markov_chain
- random_markov_chain
- get_stationary_distribution
- optimal_stationary_distribution
- worst_stationary_distribution
- random_stationary_distribution
- optimal_average_rewards
- worst_average_rewards
- random_average_rewards
- get_average_reward
- optimal_average_reward
- worst_average_reward
- random_average_reward
- transition_matrix_and_rewards
- graph_layout
- graph_metrics
- diameter
- sum_reciprocals_suboptimality_gaps
- discounted_value_norm
- undiscounted_value_norm
- value_norm
- measures_of_hardness
- summary
- hardness_report
- get_info_class
- get_transition_distributions
- get_reward_distribution
- sample_reward
- get_measure_from_name
- action_spec
- observation_spec
- get_observation
- reset
- step
- random_steps
- random_step
- get_visitation_counts
- reset_visitation_counts
- get_value_node_labels
- dm_env._environment.Environment
- reward_spec
- discount_spec
- close
The episodic Custom MDP.
Inherited Members
- CustomMDP
- CustomMDP
- get_unique_symbols
- does_seed_change_MDP_structure
- sample_parameters
- sample_mdp_parameters
- get_node_class
- n_actions
- get_gin_parameters
- get_node_pos_in_grid
- parameters
- colosseum.mdp.base_finite.EpisodicMDP
- is_episodic
- H
- random_policy_cf
- random_policy
- reachable_states
- T_cf
- R_cf
- optimal_value_continuous_form
- worst_value_continuous_form
- random_value_continuous_form
- episodic_optimal_average_reward
- episodic_worst_average_reward
- episodic_random_average_reward
- continuous_form_episodic_transition_matrix_and_rewards
- episodic_transition_matrix_and_rewards
- get_optimal_policy_continuous_form
- get_worst_policy_continuous_form
- get_random_policy_continuous_form
- get_minimal_regret_for_starting_node
- get_optimal_policy_starting_value
- get_worst_policy_starting_value
- get_random_policy_starting_value
- get_episodic_graph
- get_grid_representation
- colosseum.mdp.base.BaseMDP
- get_available_hardness_measures
- produce_gin_file_from_mdp_parameters
- get_gin_config
- get_node_labels
- get_node_action_labels
- hash
- instantiate_MDP
- T
- R
- recurrent_nodes_set
- communication_class
- get_optimal_policy
- get_worst_policy
- get_value_functions
- optimal_value_functions
- worst_value_functions
- random_value_functions
- optimal_transition_probabilities
- worst_transition_probabilities
- random_transition_probabilities
- optimal_markov_chain
- worst_markov_chain
- random_markov_chain
- get_stationary_distribution
- optimal_stationary_distribution
- worst_stationary_distribution
- random_stationary_distribution
- optimal_average_rewards
- worst_average_rewards
- random_average_rewards
- get_average_reward
- optimal_average_reward
- worst_average_reward
- random_average_reward
- transition_matrix_and_rewards
- graph_layout
- graph_metrics
- diameter
- sum_reciprocals_suboptimality_gaps
- discounted_value_norm
- undiscounted_value_norm
- value_norm
- measures_of_hardness
- summary
- hardness_report
- get_info_class
- get_transition_distributions
- get_reward_distribution
- sample_reward
- get_measure_from_name
- action_spec
- observation_spec
- get_observation
- reset
- step
- random_steps
- random_step
- get_visitation_counts
- reset_visitation_counts
- get_value_node_labels
- dm_env._environment.Environment
- reward_spec
- discount_spec
- close
The continuous Custom MDP.
Inherited Members
- CustomMDP
- CustomMDP
- get_unique_symbols
- does_seed_change_MDP_structure
- sample_parameters
- sample_mdp_parameters
- get_node_class
- n_actions
- get_gin_parameters
- get_node_pos_in_grid
- parameters
- colosseum.mdp.base.BaseMDP
- get_available_hardness_measures
- produce_gin_file_from_mdp_parameters
- get_gin_config
- get_node_labels
- get_node_action_labels
- hash
- instantiate_MDP
- T
- R
- recurrent_nodes_set
- communication_class
- get_optimal_policy
- get_worst_policy
- get_value_functions
- optimal_value_functions
- worst_value_functions
- random_value_functions
- optimal_transition_probabilities
- worst_transition_probabilities
- random_transition_probabilities
- optimal_markov_chain
- worst_markov_chain
- random_markov_chain
- get_stationary_distribution
- optimal_stationary_distribution
- worst_stationary_distribution
- random_stationary_distribution
- optimal_average_rewards
- worst_average_rewards
- random_average_rewards
- get_average_reward
- optimal_average_reward
- worst_average_reward
- random_average_reward
- transition_matrix_and_rewards
- graph_layout
- graph_metrics
- diameter
- sum_reciprocals_suboptimality_gaps
- discounted_value_norm
- undiscounted_value_norm
- value_norm
- measures_of_hardness
- summary
- hardness_report
- get_info_class
- get_transition_distributions
- get_reward_distribution
- sample_reward
- get_measure_from_name
- action_spec
- observation_spec
- get_observation
- reset
- step
- random_steps
- random_step
- get_visitation_counts
- reset_visitation_counts
- get_value_node_labels
- dm_env._environment.Environment
- reward_spec
- discount_spec
- close