colosseum.mdp.minigrid_rooms.finite_horizon

 1from typing import Any, Dict, List
 2
 3import gin
 4
 5from colosseum.mdp import EpisodicMDP
 6from colosseum.mdp.minigrid_rooms.base import MiniGridRoomsMDP
 7
 8
 9@gin.configurable
10class MiniGridRoomsEpisodic(EpisodicMDP, MiniGridRoomsMDP):
11    @staticmethod
12    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
13        return MiniGridRoomsMDP.sample_mdp_parameters(n, True, seed)
@gin.configurable
class MiniGridRoomsEpisodic(colosseum.mdp.base_finite.EpisodicMDP, colosseum.mdp.minigrid_rooms.base.MiniGridRoomsMDP):
10@gin.configurable
11class MiniGridRoomsEpisodic(EpisodicMDP, MiniGridRoomsMDP):
12    @staticmethod
13    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
14        return MiniGridRoomsMDP.sample_mdp_parameters(n, True, seed)

The base class for episodic MDPs.

MiniGridRoomsEpisodic(H: int = None, **kwargs)
 78    def __init__(self, H: int = None, **kwargs):
 79        super(EpisodicMDP, self).__init__(**kwargs)
 80
 81        # Computing the time horizon
 82        self._input_H = H
 83        self._H = None
 84
 85        # Episodic setting specific caching variables
 86        self._reachable_states = None
 87        self._episodic_graph = dict()
 88        self._continuous_form_episodic_transition_matrix_and_rewards = None
 89        self._episodic_transition_matrix_and_rewards = None
 90        self._optimal_policy_cf = dict()
 91        self._worst_policy_cf = dict()
 92        self._optimal_value_cf = None
 93        self._worst_value_cf = None
 94        self._random_value_cf = None
 95        self._eoar = None
 96        self._woar = None
 97        self._roar = None
 98        self._random_policy_cf = None
 99        self._random_policy = None
100        self._average_optimal_episodic_reward = None
101        self._average_worst_episodic_reward = None
102        self._average_random_episodic_reward = None
Parameters
  • seed (int): The seed used for sampling rewards and next states.
  • room_size (int): The size of the roorms.
  • n_rooms (int): The number of rooms. This must be a squared number.
  • n_starting_states (int): The number of possible starting states.
  • optimal_distribution (Union[Tuple, rv_continuous]): The distribution of the highly rewarding state. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
  • other_distribution (Union[Tuple, rv_continuous]): The distribution of the other states. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
  • make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
  • reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.
@staticmethod
def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
12    @staticmethod
13    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
14        return MiniGridRoomsMDP.sample_mdp_parameters(n, True, seed)
Returns
  • List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
Inherited Members
colosseum.mdp.base_finite.EpisodicMDP
is_episodic
H
random_policy_cf
random_policy
parameters
reachable_states
T_cf
R_cf
optimal_value_continuous_form
worst_value_continuous_form
random_value_continuous_form
episodic_optimal_average_reward
episodic_worst_average_reward
episodic_random_average_reward
continuous_form_episodic_transition_matrix_and_rewards
episodic_transition_matrix_and_rewards
get_optimal_policy_continuous_form
get_worst_policy_continuous_form
get_random_policy_continuous_form
get_minimal_regret_for_starting_node
get_optimal_policy_starting_value
get_worst_policy_starting_value
get_random_policy_starting_value
get_episodic_graph
get_grid_representation
colosseum.mdp.minigrid_rooms.base.MiniGridRoomsMDP
get_unique_symbols
does_seed_change_MDP_structure
sample_mdp_parameters
get_node_class
get_gin_parameters
n_actions
get_positions_coords_in_room
colosseum.mdp.base.BaseMDP
get_available_hardness_measures
produce_gin_file_from_mdp_parameters
get_gin_config
get_node_labels
get_node_action_labels
hash
instantiate_MDP
T
R
recurrent_nodes_set
communication_class
get_optimal_policy
get_worst_policy
get_value_functions
optimal_value_functions
worst_value_functions
random_value_functions
optimal_transition_probabilities
worst_transition_probabilities
random_transition_probabilities
optimal_markov_chain
worst_markov_chain
random_markov_chain
get_stationary_distribution
optimal_stationary_distribution
worst_stationary_distribution
random_stationary_distribution
optimal_average_rewards
worst_average_rewards
random_average_rewards
get_average_reward
optimal_average_reward
worst_average_reward
random_average_reward
transition_matrix_and_rewards
graph_layout
graph_metrics
diameter
sum_reciprocals_suboptimality_gaps
discounted_value_norm
undiscounted_value_norm
value_norm
measures_of_hardness
summary
hardness_report
get_info_class
get_transition_distributions
get_reward_distribution
sample_reward
get_measure_from_name
action_spec
observation_spec
get_observation
reset
step
random_steps
random_step
get_visitation_counts
reset_visitation_counts
get_value_node_labels
dm_env._environment.Environment
reward_spec
discount_spec
close