colosseum.mdp.minigrid_rooms.infinite_horizon
1from typing import Any, Dict, List 2 3import gin 4 5from colosseum.mdp import ContinuousMDP 6from colosseum.mdp.minigrid_rooms.base import MiniGridRoomsMDP 7 8 9@gin.configurable 10class MiniGridRoomsContinuous(ContinuousMDP, MiniGridRoomsMDP): 11 @staticmethod 12 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 13 return MiniGridRoomsMDP.sample_mdp_parameters(n, False, seed)
@gin.configurable
class
MiniGridRoomsContinuous10@gin.configurable 11class MiniGridRoomsContinuous(ContinuousMDP, MiniGridRoomsMDP): 12 @staticmethod 13 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 14 return MiniGridRoomsMDP.sample_mdp_parameters(n, False, seed)
The base class for continuous MDPs.
MiniGridRoomsContinuous( seed: int, room_size: int, n_rooms: int = 4, n_starting_states: int = 2, optimal_distribution: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, other_distribution: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, make_reward_stochastic=False, reward_variance_multiplier: float = 1.0, **kwargs)
352 def __init__( 353 self, 354 seed: int, 355 room_size: int, 356 n_rooms: int = 4, 357 n_starting_states: int = 2, 358 optimal_distribution: Union[Tuple, rv_continuous] = None, 359 other_distribution: Union[Tuple, rv_continuous] = None, 360 make_reward_stochastic=False, 361 reward_variance_multiplier: float = 1.0, 362 **kwargs, 363 ): 364 """ 365 366 Parameters 367 ---------- 368 seed : int 369 The seed used for sampling rewards and next states. 370 room_size : int 371 The size of the roorms. 372 n_rooms : int 373 The number of rooms. This must be a squared number. 374 n_starting_states : int 375 The number of possible starting states. 376 optimal_distribution : Union[Tuple, rv_continuous] 377 The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters 378 or as a rv_continuous object. 379 other_distribution : Union[Tuple, rv_continuous] 380 The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a 381 rv_continuous object. 382 make_reward_stochastic : bool 383 If True, the rewards of the MDP will be stochastic. By default, it is set to False. 384 reward_variance_multiplier : float 385 A constant that can be used to increase the variance of the reward distributions without changing their means. 386 The lower the value, the higher the variance. By default, it is set to 1. 387 """ 388 389 if type(optimal_distribution) == tuple: 390 optimal_distribution = get_dist( 391 optimal_distribution[0], optimal_distribution[1] 392 ) 393 if type(other_distribution) == tuple: 394 other_distribution = get_dist(other_distribution[0], other_distribution[1]) 395 396 self._n_starting_states = n_starting_states 397 self._room_size = room_size 398 self._n_rooms = n_rooms 399 400 dists = [ 401 optimal_distribution, 402 other_distribution, 403 ] 404 if dists.count(None) == 0: 405 self._optimal_distribution = optimal_distribution 406 self._other_distribution = other_distribution 407 else: 408 if make_reward_stochastic: 409 size = int(room_size * n_rooms ** 0.5) 410 self._other_distribution = beta( 411 reward_variance_multiplier, 412 reward_variance_multiplier * (size ** 2 - 1), 413 ) 414 self._optimal_distribution = beta( 415 reward_variance_multiplier * (size ** 2 - 1), 416 reward_variance_multiplier, 417 ) 418 else: 419 self._optimal_distribution = deterministic(1.0) 420 self._other_distribution = deterministic(0.0) 421 422 super(MiniGridRoomsMDP, self).__init__( 423 seed=seed, 424 reward_variance_multiplier=reward_variance_multiplier, 425 make_reward_stochastic=make_reward_stochastic, 426 **kwargs, 427 )
Parameters
- seed (int): The seed used for sampling rewards and next states.
- room_size (int): The size of the roorms.
- n_rooms (int): The number of rooms. This must be a squared number.
- n_starting_states (int): The number of possible starting states.
- optimal_distribution (Union[Tuple, rv_continuous]): The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- other_distribution (Union[Tuple, rv_continuous]): The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
- reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.
@staticmethod
def
sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
12 @staticmethod 13 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 14 return MiniGridRoomsMDP.sample_mdp_parameters(n, False, seed)
Returns
- List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
Inherited Members
- colosseum.mdp.minigrid_rooms.base.MiniGridRoomsMDP
- get_unique_symbols
- does_seed_change_MDP_structure
- sample_mdp_parameters
- get_node_class
- get_gin_parameters
- n_actions
- get_positions_coords_in_room
- parameters
- colosseum.mdp.base.BaseMDP
- get_available_hardness_measures
- produce_gin_file_from_mdp_parameters
- get_gin_config
- get_node_labels
- get_node_action_labels
- hash
- instantiate_MDP
- T
- R
- recurrent_nodes_set
- communication_class
- get_optimal_policy
- get_worst_policy
- get_value_functions
- optimal_value_functions
- worst_value_functions
- random_value_functions
- optimal_transition_probabilities
- worst_transition_probabilities
- random_transition_probabilities
- optimal_markov_chain
- worst_markov_chain
- random_markov_chain
- get_stationary_distribution
- optimal_stationary_distribution
- worst_stationary_distribution
- random_stationary_distribution
- optimal_average_rewards
- worst_average_rewards
- random_average_rewards
- get_average_reward
- optimal_average_reward
- worst_average_reward
- random_average_reward
- transition_matrix_and_rewards
- graph_layout
- graph_metrics
- diameter
- sum_reciprocals_suboptimality_gaps
- discounted_value_norm
- undiscounted_value_norm
- value_norm
- measures_of_hardness
- summary
- hardness_report
- get_info_class
- get_transition_distributions
- get_reward_distribution
- sample_reward
- get_measure_from_name
- action_spec
- observation_spec
- get_observation
- reset
- step
- random_steps
- random_step
- get_visitation_counts
- reset_visitation_counts
- get_value_node_labels
- dm_env._environment.Environment
- reward_spec
- discount_spec
- close