colosseum.mdp.frozen_lake.finite_horizon
1from typing import Any, Dict, List 2 3import gin 4 5from colosseum.mdp import EpisodicMDP 6from colosseum.mdp.frozen_lake.base import FrozenLakeMDP 7 8 9@gin.configurable 10class FrozenLakeEpisodic(EpisodicMDP, FrozenLakeMDP): 11 """ 12 The FrozenLake episodic MDP. 13 """ 14 15 @staticmethod 16 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 17 return FrozenLakeMDP.sample_mdp_parameters(n, True, seed)
@gin.configurable
class
FrozenLakeEpisodic10@gin.configurable 11class FrozenLakeEpisodic(EpisodicMDP, FrozenLakeMDP): 12 """ 13 The FrozenLake episodic MDP. 14 """ 15 16 @staticmethod 17 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 18 return FrozenLakeMDP.sample_mdp_parameters(n, True, seed)
The FrozenLake episodic MDP.
FrozenLakeEpisodic(H: int = None, **kwargs)
78 def __init__(self, H: int = None, **kwargs): 79 super(EpisodicMDP, self).__init__(**kwargs) 80 81 # Computing the time horizon 82 self._input_H = H 83 self._H = None 84 85 # Episodic setting specific caching variables 86 self._reachable_states = None 87 self._episodic_graph = dict() 88 self._continuous_form_episodic_transition_matrix_and_rewards = None 89 self._episodic_transition_matrix_and_rewards = None 90 self._optimal_policy_cf = dict() 91 self._worst_policy_cf = dict() 92 self._optimal_value_cf = None 93 self._worst_value_cf = None 94 self._random_value_cf = None 95 self._eoar = None 96 self._woar = None 97 self._roar = None 98 self._random_policy_cf = None 99 self._random_policy = None 100 self._average_optimal_episodic_reward = None 101 self._average_worst_episodic_reward = None 102 self._average_random_episodic_reward = None
Parameters
- seed (int): The seed used for sampling rewards and next states.
- size (int): The size of the grid.
- p_frozen (float): The probability that a tile of the lake is frozen and does not contain a hole.
- optimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory. By default, it is set to 1.
- suboptimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories. By default, it is set to 0.1.
- is_slippery (bool): If True, the outcome of the action is stochastic due to the frozen tiles being slippery. By default, it is set to True.
- goal_r (Union[Tuple, rv_continuous]): The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- default_r (Union[Tuple, rv_continuous]): The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
- make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
- reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.
@staticmethod
def
sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
16 @staticmethod 17 def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: 18 return FrozenLakeMDP.sample_mdp_parameters(n, True, seed)
Returns
- List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
Inherited Members
- colosseum.mdp.base_finite.EpisodicMDP
- is_episodic
- H
- random_policy_cf
- random_policy
- parameters
- reachable_states
- T_cf
- R_cf
- optimal_value_continuous_form
- worst_value_continuous_form
- random_value_continuous_form
- episodic_optimal_average_reward
- episodic_worst_average_reward
- episodic_random_average_reward
- continuous_form_episodic_transition_matrix_and_rewards
- episodic_transition_matrix_and_rewards
- get_optimal_policy_continuous_form
- get_worst_policy_continuous_form
- get_random_policy_continuous_form
- get_minimal_regret_for_starting_node
- get_optimal_policy_starting_value
- get_worst_policy_starting_value
- get_random_policy_starting_value
- get_episodic_graph
- get_grid_representation
- colosseum.mdp.frozen_lake.base.FrozenLakeMDP
- get_unique_symbols
- does_seed_change_MDP_structure
- sample_mdp_parameters
- get_node_class
- get_gin_parameters
- n_actions
- colosseum.mdp.base.BaseMDP
- get_available_hardness_measures
- produce_gin_file_from_mdp_parameters
- get_gin_config
- get_node_labels
- get_node_action_labels
- hash
- instantiate_MDP
- T
- R
- recurrent_nodes_set
- communication_class
- get_optimal_policy
- get_worst_policy
- get_value_functions
- optimal_value_functions
- worst_value_functions
- random_value_functions
- optimal_transition_probabilities
- worst_transition_probabilities
- random_transition_probabilities
- optimal_markov_chain
- worst_markov_chain
- random_markov_chain
- get_stationary_distribution
- optimal_stationary_distribution
- worst_stationary_distribution
- random_stationary_distribution
- optimal_average_rewards
- worst_average_rewards
- random_average_rewards
- get_average_reward
- optimal_average_reward
- worst_average_reward
- random_average_reward
- transition_matrix_and_rewards
- graph_layout
- graph_metrics
- diameter
- sum_reciprocals_suboptimality_gaps
- discounted_value_norm
- undiscounted_value_norm
- value_norm
- measures_of_hardness
- summary
- hardness_report
- get_info_class
- get_transition_distributions
- get_reward_distribution
- sample_reward
- get_measure_from_name
- action_spec
- observation_spec
- get_observation
- reset
- step
- random_steps
- random_step
- get_visitation_counts
- reset_visitation_counts
- get_value_node_labels
- dm_env._environment.Environment
- reward_spec
- discount_spec
- close