colosseum.mdp.frozen_lake.infinite_horizon

View Source

 1from typing import Any, Dict, List, Tuple
 2
 3import gin
 4
 5from colosseum.mdp import ContinuousMDP
 6from colosseum.mdp.frozen_lake.base import FrozenLakeMDP, FrozenLakeNode
 7
 8
 9@gin.configurable
10class FrozenLakeContinuous(ContinuousMDP, FrozenLakeMDP):
11    """
12    The FrozenLake continuous MDP.
13    """
14
15    @staticmethod
16    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
17        return FrozenLakeMDP.sample_mdp_parameters(n, False, seed)
18
19    def custom_graph_layout(self) -> Dict[FrozenLakeNode, Tuple[int, int]]:
20        """
21        Returns
22        -------
23        Dict[FrozenLakeNode, Tuple[int, int]]
24            The custom layout to draw a nx.Graph.
25        """
26        return {node: tuple(node) for node in self.G}

@gin.configurable

class FrozenLakeContinuous(colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.frozen_lake.base.FrozenLakeMDP): View Source

10@gin.configurable
11class FrozenLakeContinuous(ContinuousMDP, FrozenLakeMDP):
12    """
13    The FrozenLake continuous MDP.
14    """
15
16    @staticmethod
17    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
18        return FrozenLakeMDP.sample_mdp_parameters(n, False, seed)
19
20    def custom_graph_layout(self) -> Dict[FrozenLakeNode, Tuple[int, int]]:
21        """
22        Returns
23        -------
24        Dict[FrozenLakeNode, Tuple[int, int]]
25            The custom layout to draw a nx.Graph.
26        """
27        return {node: tuple(node) for node in self.G}

The FrozenLake continuous MDP.

FrozenLakeContinuous( seed: int, size: int, p_frozen: float, optimal_return: float = 1.0, suboptimal_return: float = 0.1, is_slippery: bool = True, goal_r: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, default_r: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, make_reward_stochastic=False, reward_variance_multiplier: float = 1.0, **kwargs) View Source

227    def __init__(
228        self,
229        seed: int,
230        size: int,
231        p_frozen: float,
232        optimal_return: float = 1.0,
233        suboptimal_return: float = 0.1,
234        is_slippery: bool = True,
235        goal_r: Union[Tuple, rv_continuous] = None,
236        default_r: Union[Tuple, rv_continuous] = None,
237        make_reward_stochastic=False,
238        reward_variance_multiplier: float = 1.0,
239        **kwargs,
240    ):
241        """
242
243        Parameters
244        ----------
245        seed : int
246            The seed used for sampling rewards and next states.
247        size : int
248            The size of the grid.
249        p_frozen : float
250            The probability that a tile of the lake is frozen and does not contain a hole.
251        optimal_return: float
252            If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory.
253            By default, it is set to 1.
254        suboptimal_return: float
255            If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories.
256            By default, it is set to 0.1.
257        is_slippery : bool
258            If True, the outcome of the action is stochastic due to the frozen tiles being slippery. By default, it is
259            set to True.
260        goal_r : Union[Tuple, rv_continuous]
261            The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters
262            or as a rv_continuous object.
263        default_r : Union[Tuple, rv_continuous]
264            The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a
265            rv_continuous object.
266        make_reward_stochastic : bool
267            If True, the rewards of the MDP will be stochastic. By default, it is set to False.
268        reward_variance_multiplier : float
269            A constant that can be used to increase the variance of the reward distributions without changing their means.
270            The lower the value, the higher the variance. By default, it is set to 1.
271        """
272
273        if type(goal_r) == tuple:
274            goal_r = get_dist(goal_r[0], goal_r[1])
275        if type(default_r) == tuple:
276            default_r = get_dist(default_r[0], default_r[1])
277
278        self._size = size
279        self._p_frozen = p_frozen
280        self._optimal_return = optimal_return
281        self._suboptimal_return = suboptimal_return
282        self._is_slippery = is_slippery
283        self._goal_r = goal_r
284        self._default_r = default_r
285
286        np.random.seed(seed)
287        self.lake = np.array(
288            list(
289                map(
290                    lambda x: list(x),
291                    generate_random_map(size=self._size, p=self._p_frozen),
292                )
293            )
294        )
295
296        if (default_r, goal_r).count(None) == 0:
297            self._default_r = default_r
298            self._goal_r = goal_r
299        else:
300            if make_reward_stochastic:
301                self._default_r = beta(
302                    reward_variance_multiplier,
303                    reward_variance_multiplier
304                    * (size ** 2 / self._suboptimal_return - 1),
305                )
306                self._goal_r = beta(
307                    reward_variance_multiplier * (size ** 2 / self._optimal_return - 1),
308                    reward_variance_multiplier,
309                )
310            else:
311                self._default_r = deterministic(0.0)
312                self._goal_r = deterministic(1.0)
313
314        super(FrozenLakeMDP, self).__init__(
315            seed=seed,
316            reward_variance_multiplier=reward_variance_multiplier,
317            make_reward_stochastic=make_reward_stochastic,
318            **kwargs,
319        )

Parameters

seed (int): The seed used for sampling rewards and next states.
size (int): The size of the grid.
p_frozen (float): The probability that a tile of the lake is frozen and does not contain a hole.
optimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory. By default, it is set to 1.
suboptimal_return (float): If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories. By default, it is set to 0.1.
is_slippery (bool): If True, the outcome of the action is stochastic due to the frozen tiles being slippery. By default, it is set to True.
goal_r (Union[Tuple, rv_continuous]): The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
default_r (Union[Tuple, rv_continuous]): The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.

@staticmethod

def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]: View Source

16    @staticmethod
17    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
18        return FrozenLakeMDP.sample_mdp_parameters(n, False, seed)

Returns

List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.

def custom_graph_layout( self) -> Dict[colosseum.mdp.frozen_lake.base.FrozenLakeNode, Tuple[int, int]]: View Source

20    def custom_graph_layout(self) -> Dict[FrozenLakeNode, Tuple[int, int]]:
21        """
22        Returns
23        -------
24        Dict[FrozenLakeNode, Tuple[int, int]]
25            The custom layout to draw a nx.Graph.
26        """
27        return {node: tuple(node) for node in self.G}

Returns

Dict[FrozenLakeNode, Tuple[int, int]]: The custom layout to draw a nx.Graph.

Inherited Members

colosseum.mdp.base_infinite.ContinuousMDP: is_episodic; get_grid_representation
colosseum.mdp.frozen_lake.base.FrozenLakeMDP: get_unique_symbols; does_seed_change_MDP_structure; sample_mdp_parameters; get_node_class; get_gin_parameters; n_actions; parameters
colosseum.mdp.base.BaseMDP: get_available_hardness_measures; produce_gin_file_from_mdp_parameters; get_gin_config; get_node_labels; get_node_action_labels; hash; instantiate_MDP; T; R; recurrent_nodes_set; communication_class; get_optimal_policy; get_worst_policy; get_value_functions; optimal_value_functions; worst_value_functions; random_value_functions; optimal_transition_probabilities; worst_transition_probabilities; random_transition_probabilities; optimal_markov_chain; worst_markov_chain; random_markov_chain; get_stationary_distribution; optimal_stationary_distribution; worst_stationary_distribution; random_stationary_distribution; optimal_average_rewards; worst_average_rewards; random_average_rewards; get_average_reward; optimal_average_reward; worst_average_reward; random_average_reward; transition_matrix_and_rewards; graph_layout; graph_metrics; diameter; sum_reciprocals_suboptimality_gaps; discounted_value_norm; undiscounted_value_norm; value_norm; measures_of_hardness; summary; hardness_report; get_info_class; get_transition_distributions; get_reward_distribution; sample_reward; get_measure_from_name; action_spec; observation_spec; get_observation; reset; step; random_steps; random_step; get_visitation_counts; reset_visitation_counts; get_value_node_labels
dm_env._environment.Environment: reward_spec; discount_spec; close