colosseum.mdp.frozen_lake.infinite_horizon

 1from typing import Any, Dict, List, Tuple
 2
 3import gin
 4
 5from colosseum.mdp import ContinuousMDP
 6from colosseum.mdp.frozen_lake.base import FrozenLakeMDP, FrozenLakeNode
 7
 8
 9@gin.configurable
10class FrozenLakeContinuous(ContinuousMDP, FrozenLakeMDP):
11    """
12    The FrozenLake continuous MDP.
13    """
14
15    @staticmethod
16    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
17        return FrozenLakeMDP.sample_mdp_parameters(n, False, seed)
18
19    def custom_graph_layout(self) -> Dict[FrozenLakeNode, Tuple[int, int]]:
20        """
21        Returns
22        -------
23        Dict[FrozenLakeNode, Tuple[int, int]]
24            The custom layout to draw a nx.Graph.
25        """
26        return {node: tuple(node) for node in self.G}
@gin.configurable
class FrozenLakeContinuous(colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.frozen_lake.base.FrozenLakeMDP):
10@gin.configurable
11class FrozenLakeContinuous(ContinuousMDP, FrozenLakeMDP):
12    """
13    The FrozenLake continuous MDP.
14    """
15
16    @staticmethod
17    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
18        return FrozenLakeMDP.sample_mdp_parameters(n, False, seed)
19
20    def custom_graph_layout(self) -> Dict[FrozenLakeNode, Tuple[int, int]]:
21        """
22        Returns
23        -------
24        Dict[FrozenLakeNode, Tuple[int, int]]
25            The custom layout to draw a nx.Graph.
26        """
27        return {node: tuple(node) for node in self.G}

The FrozenLake continuous MDP.

FrozenLakeContinuous( seed: int, size: int, p_frozen: float, optimal_return: float = 1.0, suboptimal_return: float = 0.1, is_slippery: bool = True, goal_r: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, default_r: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, make_reward_stochastic=False, reward_variance_multiplier: float = 1.0, **kwargs)
227    def __init__(
228        self,
229        seed: int,
230        size: int,
231        p_frozen: float,
232        optimal_return: float = 1.0,
233        suboptimal_return: float = 0.1,
234        is_slippery: bool = True,
235        goal_r: Union[Tuple, rv_continuous] = None,
236        default_r: Union[Tuple, rv_continuous] = None,
237        make_reward_stochastic=False,
238        reward_variance_multiplier: float = 1.0,
239        **kwargs,
240    ):
241        """
242
243        Parameters
244        ----------
245        seed : int
246            The seed used for sampling rewards and next states.
247        size : int
248            The size of the grid.
249        p_frozen : float
250            The probability that a tile of the lake is frozen and does not contain a hole.
251        optimal_return: float
252            If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory.
253            By default, it is set to 1.
254        suboptimal_return: float
255            If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories.
256            By default, it is set to 0.1.
257        is_slippery : bool
258            If True, the outcome of the action is stochastic due to the frozen tiles being slippery. By default, it is
259            set to True.
260        goal_r : Union[Tuple, rv_continuous]
261            The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters
262            or as a rv_continuous object.
263        default_r : Union[Tuple, rv_continuous]
264            The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a
265            rv_continuous object.
266        make_reward_stochastic : bool
267            If True, the rewards of the MDP will be stochastic. By default, it is set to False.
268        reward_variance_multiplier : float
269            A constant that can be used to increase the variance of the reward distributions without changing their means.
270            The lower the value, the higher the variance. By default, it is set to 1.
271        """
272
273        if type(goal_r) == tuple:
274            goal_r = get_dist(goal_r[0], goal_r[1])
275        if type(default_r) == tuple:
276            default_r = get_dist(default_r[0], default_r[1])
277
278        self._size = size
279        self._p_frozen = p_frozen
280        self._optimal_return = optimal_return
281        self._suboptimal_return = suboptimal_return
282        self._is_slippery = is_slippery
283        self._goal_r = goal_r
284        self._default_r = default_r
285
286        np.random.seed(seed)
287        self.lake = np.array(
288            list(
289                map(
290                    lambda x: list(x),
291                    generate_random_map(size=self._size, p=self._p_frozen),
292                )
293            )
294        )
295
296        if (default_r, goal_r).count(None) == 0:
297            self._default_r = default_r
298            self._goal_r = goal_r
299        else:
300            if make_reward_stochastic:
301                self._default_r = beta(
302                    reward_variance_multiplier,
303                    reward_variance_multiplier
304                    * (size ** 2 / self._suboptimal_return - 1),
305                )
306                self._goal_r = beta(
307                    reward_variance_multiplier * (size ** 2 / self._optimal_return - 1),
308                    reward_variance_multiplier,
309                )
310            else:
311                self._default_r = deterministic(0.0)
312                self._goal_r = deterministic(1.0)
313
314        super(FrozenLakeMDP, self).__init__(
315            seed=seed,
316            reward_variance_multiplier=reward_variance_multiplier,
317            make_reward_stochastic=make_reward_stochastic,
318            **kwargs,
319        )
Parameters
  • seed (int): The seed used for sampling rewards and next states.
  • size (int): The size of the grid.
  • p_frozen (float): The probability that a tile of the lake is frozen and does not contain a hole.
  • optimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory. By default, it is set to 1.
  • suboptimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories. By default, it is set to 0.1.
  • is_slippery (bool): If True, the outcome of the action is stochastic due to the frozen tiles being slippery. By default, it is set to True.
  • goal_r (Union[Tuple, rv_continuous]): The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
  • default_r (Union[Tuple, rv_continuous]): The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
  • make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
  • reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.
@staticmethod
def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
16    @staticmethod
17    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
18        return FrozenLakeMDP.sample_mdp_parameters(n, False, seed)
Returns
  • List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
def custom_graph_layout( self) -> Dict[colosseum.mdp.frozen_lake.base.FrozenLakeNode, Tuple[int, int]]:
20    def custom_graph_layout(self) -> Dict[FrozenLakeNode, Tuple[int, int]]:
21        """
22        Returns
23        -------
24        Dict[FrozenLakeNode, Tuple[int, int]]
25            The custom layout to draw a nx.Graph.
26        """
27        return {node: tuple(node) for node in self.G}
Returns
  • Dict[FrozenLakeNode, Tuple[int, int]]: The custom layout to draw a nx.Graph.
Inherited Members
colosseum.mdp.base_infinite.ContinuousMDP
is_episodic
get_grid_representation
colosseum.mdp.frozen_lake.base.FrozenLakeMDP
get_unique_symbols
does_seed_change_MDP_structure
sample_mdp_parameters
get_node_class
get_gin_parameters
n_actions
parameters
colosseum.mdp.base.BaseMDP
get_available_hardness_measures
produce_gin_file_from_mdp_parameters
get_gin_config
get_node_labels
get_node_action_labels
hash
instantiate_MDP
T
R
recurrent_nodes_set
communication_class
get_optimal_policy
get_worst_policy
get_value_functions
optimal_value_functions
worst_value_functions
random_value_functions
optimal_transition_probabilities
worst_transition_probabilities
random_transition_probabilities
optimal_markov_chain
worst_markov_chain
random_markov_chain
get_stationary_distribution
optimal_stationary_distribution
worst_stationary_distribution
random_stationary_distribution
optimal_average_rewards
worst_average_rewards
random_average_rewards
get_average_reward
optimal_average_reward
worst_average_reward
random_average_reward
transition_matrix_and_rewards
graph_layout
graph_metrics
diameter
sum_reciprocals_suboptimality_gaps
discounted_value_norm
undiscounted_value_norm
value_norm
measures_of_hardness
summary
hardness_report
get_info_class
get_transition_distributions
get_reward_distribution
sample_reward
get_measure_from_name
action_spec
observation_spec
get_observation
reset
step
random_steps
random_step
get_visitation_counts
reset_visitation_counts
get_value_node_labels
dm_env._environment.Environment
reward_spec
discount_spec
close