colosseum.mdp.minigrid_empty.infinite_horizon
1from typing import Any, Dict, List 2 3import gin 4 5from colosseum.mdp import ContinuousMDP 6from colosseum.mdp.minigrid_empty.base import MiniGridEmptyMDP 7 8 9@gin.configurable 10class MiniGridEmptyContinuous(ContinuousMDP, MiniGridEmptyMDP): 11 """ 12 The continuous MiniGridEmpty MDP. 13 """ 14 15 @staticmethod 16 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 17 return MiniGridEmptyMDP.sample_mdp_parameters(n, False, seed)
@gin.configurable
class
MiniGridEmptyContinuous10@gin.configurable 11class MiniGridEmptyContinuous(ContinuousMDP, MiniGridEmptyMDP): 12 """ 13 The continuous MiniGridEmpty MDP. 14 """ 15 16 @staticmethod 17 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 18 return MiniGridEmptyMDP.sample_mdp_parameters(n, False, seed)
The continuous MiniGridEmpty MDP.
MiniGridEmptyContinuous( seed: int, size: int, n_starting_states: int = 1, optimal_distribution: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, other_distribution: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, make_reward_stochastic=False, reward_variance_multiplier: float = 1.0, **kwargs)
279 def __init__( 280 self, 281 seed: int, 282 size: int, 283 n_starting_states: int = 1, 284 optimal_distribution: Union[Tuple, rv_continuous] = None, 285 other_distribution: Union[Tuple, rv_continuous] = None, 286 make_reward_stochastic=False, 287 reward_variance_multiplier: float = 1.0, 288 **kwargs, 289 ): 290 """ 291 Parameters 292 ---------- 293 seed : int 294 The seed used for sampling rewards and next states. 295 size : int 296 The size of the grid. 297 n_starting_states : int 298 The number of possible starting states. 299 optimal_distribution : Union[Tuple, rv_continuous] 300 The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters 301 or as a rv_continuous object. 302 other_distribution : Union[Tuple, rv_continuous] 303 The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a 304 rv_continuous object. 305 make_reward_stochastic : bool 306 If True, the rewards of the MDP will be stochastic. By default, it is set to False. 307 reward_variance_multiplier : float 308 A constant that can be used to increase the variance of the reward distributions without changing their means. 309 The lower the value, the higher the variance. By default, it is set to 1. 310 """ 311 312 if type(optimal_distribution) == tuple: 313 optimal_distribution = get_dist( 314 optimal_distribution[0], optimal_distribution[1] 315 ) 316 if type(other_distribution) == tuple: 317 other_distribution = get_dist(other_distribution[0], other_distribution[1]) 318 319 self._n_starting_states = n_starting_states 320 self._size = size 321 322 dists = [ 323 optimal_distribution, 324 other_distribution, 325 ] 326 if dists.count(None) == 0: 327 self._optimal_distribution = optimal_distribution 328 self._other_distribution = other_distribution 329 else: 330 if make_reward_stochastic: 331 self._other_distribution = beta( 332 reward_variance_multiplier, 333 reward_variance_multiplier * (size ** 2 - 1), 334 ) 335 self._optimal_distribution = beta( 336 reward_variance_multiplier * (size ** 2 - 1), 337 reward_variance_multiplier, 338 ) 339 else: 340 self._optimal_distribution = deterministic(1.0) 341 self._other_distribution = deterministic(0.0) 342 343 super(MiniGridEmptyMDP, self).__init__( 344 seed=seed, 345 reward_variance_multiplier=reward_variance_multiplier, 346 make_reward_stochastic=make_reward_stochastic, 347 **kwargs, 348 )
Parameters
- seed (int): The seed used for sampling rewards and next states.
- size (int): The size of the grid.
- n_starting_states (int): The number of possible starting states.
- optimal_distribution (Union[Tuple, rv_continuous]): The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- other_distribution (Union[Tuple, rv_continuous]): The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
- reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.
@staticmethod
def
sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
16 @staticmethod 17 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 18 return MiniGridEmptyMDP.sample_mdp_parameters(n, False, seed)
Returns
- List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
Inherited Members
- colosseum.mdp.minigrid_empty.base.MiniGridEmptyMDP
- get_unique_symbols
- does_seed_change_MDP_structure
- sample_mdp_parameters
- get_node_class
- get_gin_parameters
- n_actions
- get_positions_on_side
- parameters
- colosseum.mdp.base.BaseMDP
- get_available_hardness_measures
- produce_gin_file_from_mdp_parameters
- get_gin_config
- get_node_labels
- get_node_action_labels
- hash
- instantiate_MDP
- T
- R
- recurrent_nodes_set
- communication_class
- get_optimal_policy
- get_worst_policy
- get_value_functions
- optimal_value_functions
- worst_value_functions
- random_value_functions
- optimal_transition_probabilities
- worst_transition_probabilities
- random_transition_probabilities
- optimal_markov_chain
- worst_markov_chain
- random_markov_chain
- get_stationary_distribution
- optimal_stationary_distribution
- worst_stationary_distribution
- random_stationary_distribution
- optimal_average_rewards
- worst_average_rewards
- random_average_rewards
- get_average_reward
- optimal_average_reward
- worst_average_reward
- random_average_reward
- transition_matrix_and_rewards
- graph_layout
- graph_metrics
- diameter
- sum_reciprocals_suboptimality_gaps
- discounted_value_norm
- undiscounted_value_norm
- value_norm
- measures_of_hardness
- summary
- hardness_report
- get_info_class
- get_transition_distributions
- get_reward_distribution
- sample_reward
- get_measure_from_name
- action_spec
- observation_spec
- get_observation
- reset
- step
- random_steps
- random_step
- get_visitation_counts
- reset_visitation_counts
- get_value_node_labels
- dm_env._environment.Environment
- reward_spec
- discount_spec
- close