colosseum.mdp.deep_sea.finite_horizon
1from typing import Any, Dict, List, Tuple 2 3import gin 4 5from colosseum.mdp import EpisodicMDP 6from colosseum.mdp.deep_sea.base import DeepSeaMDP, DeepSeaNode 7 8 9@gin.configurable 10class DeepSeaEpisodic(EpisodicMDP, DeepSeaMDP): 11 """ 12 The episodic DeepSea MDP class. 13 """ 14 15 @staticmethod 16 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 17 return DeepSeaMDP.sample_mdp_parameters(n, True, seed) 18 19 def custom_graph_layout(self) -> Dict[DeepSeaNode, Tuple[int, int]]: 20 """ 21 Returns 22 ------- 23 Dict[DeepSeaNode, Tuple[int, int]] 24 The custom layout to draw a nx.Graph. 25 """ 26 return {node: (node.X, node.Y) for node in self.G} 27 28 def __init__(self, *args, **kwargs): 29 if "size" in kwargs: 30 H = kwargs["size"] 31 else: 32 raise NotImplementedError( 33 "The 'size' parameter should be given as a keyword parameter." 34 ) 35 36 super(DeepSeaEpisodic, self).__init__(*args, H=H, **kwargs)
@gin.configurable
class
DeepSeaEpisodic10@gin.configurable 11class DeepSeaEpisodic(EpisodicMDP, DeepSeaMDP): 12 """ 13 The episodic DeepSea MDP class. 14 """ 15 16 @staticmethod 17 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 18 return DeepSeaMDP.sample_mdp_parameters(n, True, seed) 19 20 def custom_graph_layout(self) -> Dict[DeepSeaNode, Tuple[int, int]]: 21 """ 22 Returns 23 ------- 24 Dict[DeepSeaNode, Tuple[int, int]] 25 The custom layout to draw a nx.Graph. 26 """ 27 return {node: (node.X, node.Y) for node in self.G} 28 29 def __init__(self, *args, **kwargs): 30 if "size" in kwargs: 31 H = kwargs["size"] 32 else: 33 raise NotImplementedError( 34 "The 'size' parameter should be given as a keyword parameter." 35 ) 36 37 super(DeepSeaEpisodic, self).__init__(*args, H=H, **kwargs)
The episodic DeepSea MDP class.
DeepSeaEpisodic(*args, **kwargs)
29 def __init__(self, *args, **kwargs): 30 if "size" in kwargs: 31 H = kwargs["size"] 32 else: 33 raise NotImplementedError( 34 "The 'size' parameter should be given as a keyword parameter." 35 ) 36 37 super(DeepSeaEpisodic, self).__init__(*args, H=H, **kwargs)
Parameters
- seed (int): The seed used for sampling rewards and next states.
- size (int): The size of the grid.
- optimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory. By default, it is set to 1.
- suboptimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories. By default, it is set to 0.5.
- optimal_distribution (Union[Tuple, rv_continuous]): The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- sub_optimal_distribution (Union[Tuple, rv_continuous]): The distribution of the suboptimal rewarding states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- other_distribution (Union[Tuple, rv_continuous]): The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
- reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.
@staticmethod
def
sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
16 @staticmethod 17 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 18 return DeepSeaMDP.sample_mdp_parameters(n, True, seed)
Returns
- List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
20 def custom_graph_layout(self) -> Dict[DeepSeaNode, Tuple[int, int]]: 21 """ 22 Returns 23 ------- 24 Dict[DeepSeaNode, Tuple[int, int]] 25 The custom layout to draw a nx.Graph. 26 """ 27 return {node: (node.X, node.Y) for node in self.G}
Returns
- Dict[DeepSeaNode, Tuple[int, int]]: The custom layout to draw a nx.Graph.
Inherited Members
- colosseum.mdp.base_finite.EpisodicMDP
- is_episodic
- H
- random_policy_cf
- random_policy
- parameters
- reachable_states
- T_cf
- R_cf
- optimal_value_continuous_form
- worst_value_continuous_form
- random_value_continuous_form
- episodic_optimal_average_reward
- episodic_worst_average_reward
- episodic_random_average_reward
- continuous_form_episodic_transition_matrix_and_rewards
- episodic_transition_matrix_and_rewards
- get_optimal_policy_continuous_form
- get_worst_policy_continuous_form
- get_random_policy_continuous_form
- get_minimal_regret_for_starting_node
- get_optimal_policy_starting_value
- get_worst_policy_starting_value
- get_random_policy_starting_value
- get_episodic_graph
- get_grid_representation
- colosseum.mdp.deep_sea.base.DeepSeaMDP
- get_unique_symbols
- does_seed_change_MDP_structure
- sample_mdp_parameters
- get_node_class
- get_gin_parameters
- n_actions
- colosseum.mdp.base.BaseMDP
- get_available_hardness_measures
- produce_gin_file_from_mdp_parameters
- get_gin_config
- get_node_labels
- get_node_action_labels
- hash
- instantiate_MDP
- T
- R
- recurrent_nodes_set
- communication_class
- get_optimal_policy
- get_worst_policy
- get_value_functions
- optimal_value_functions
- worst_value_functions
- random_value_functions
- optimal_transition_probabilities
- worst_transition_probabilities
- random_transition_probabilities
- optimal_markov_chain
- worst_markov_chain
- random_markov_chain
- get_stationary_distribution
- optimal_stationary_distribution
- worst_stationary_distribution
- random_stationary_distribution
- optimal_average_rewards
- worst_average_rewards
- random_average_rewards
- get_average_reward
- optimal_average_reward
- worst_average_reward
- random_average_reward
- transition_matrix_and_rewards
- graph_layout
- graph_metrics
- diameter
- sum_reciprocals_suboptimality_gaps
- discounted_value_norm
- undiscounted_value_norm
- value_norm
- measures_of_hardness
- summary
- hardness_report
- get_info_class
- get_transition_distributions
- get_reward_distribution
- sample_reward
- get_measure_from_name
- action_spec
- observation_spec
- get_observation
- reset
- step
- random_steps
- random_step
- get_visitation_counts
- reset_visitation_counts
- get_value_node_labels
- dm_env._environment.Environment
- reward_spec
- discount_spec
- close