colosseum.mdp.taxi.infinite_horizon

 1from typing import Any, Dict, List
 2
 3import gin
 4
 5from colosseum.mdp import ContinuousMDP
 6from colosseum.mdp.taxi.base import TaxiMDP
 7
 8
 9@gin.configurable
10class TaxiContinuous(ContinuousMDP, TaxiMDP):
11    @staticmethod
12    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
13        return TaxiMDP.sample_mdp_parameters(n, False, seed)
@gin.configurable
class TaxiContinuous(colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.taxi.base.TaxiMDP):
10@gin.configurable
11class TaxiContinuous(ContinuousMDP, TaxiMDP):
12    @staticmethod
13    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
14        return TaxiMDP.sample_mdp_parameters(n, False, seed)

The base class for continuous MDPs.

TaxiContinuous( seed: int, size: int, length=2, width=1, space=1, n_locations=4, optimal_mean_reward: float = 0.9, sub_optimal_mean_reward: float = 0.2, default_r: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, successfully_delivery_r: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, failure_delivery_r: Union[Tuple, scipy.stats._distn_infrastructure.rv_continuous] = None, make_reward_stochastic=False, reward_variance_multiplier: float = 1.0, **kwargs)
390    def __init__(
391        self,
392        seed: int,
393        size: int,
394        length=2,
395        width=1,
396        space=1,
397        n_locations=2 ** 2,
398        optimal_mean_reward: float = 0.9,
399        sub_optimal_mean_reward: float = 0.2,
400        default_r: Union[Tuple, rv_continuous] = None,
401        successfully_delivery_r: Union[Tuple, rv_continuous] = None,
402        failure_delivery_r: Union[Tuple, rv_continuous] = None,
403        make_reward_stochastic=False,
404        reward_variance_multiplier: float = 1.0,
405        **kwargs,
406    ):
407        """
408        Parameters
409        ----------
410        seed : int
411            The seed used for sampling rewards and next states.
412        size : int
413            The size of the grid.
414        length : int
415            The length of the walls.
416        width : int
417            The width of the walls.
418        space : int
419            The space between walls.
420        n_locations : int
421            The number of possible spawn locations. It must be a squared number.
422        optimal_mean_reward : float
423            If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory.
424            By default, it is set to 0.9.
425        sub_optimal_mean_reward: float
426            If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories.
427            By default, it is set to 0.1.
428        default_r
429        successfully_delivery_r : Union[Tuple, rv_continuous]
430            The reward distribution for successfully delivering a passenger. It can be either passed as a tuple
431            containing Beta parameters or as a rv_continuous object.
432        failure_delivery_r
433            The reward distribution for failing to deliver a passenger. It can be either passed as a tuple containing
434            Beta parameters or as a rv_continuous object.
435        make_reward_stochastic : bool
436            If True, the rewards of the MDP will be stochastic. By default, it is set to False.
437        reward_variance_multiplier : float
438            A constant that can be used to increase the variance of the reward distributions without changing their means.
439            The lower the value, the higher the variance. By default, it is set to 1.
440        """
441
442        if type(successfully_delivery_r) == tuple:
443            successfully_delivery_r = get_dist(
444                successfully_delivery_r[0], successfully_delivery_r[1]
445            )
446        if type(failure_delivery_r) == tuple:
447            failure_delivery_r = get_dist(failure_delivery_r[0], failure_delivery_r[1])
448
449        if type(default_r) == tuple:
450            default_r = get_dist(default_r[0], default_r[1])
451
452        self._size = size
453        self._length = length
454        self._width = width
455        self._space = space
456        self.n_locations = n_locations
457        self._n_locations = int(np.ceil(n_locations ** 0.5) ** 2)
458        self._optimal_mean_reward = optimal_mean_reward
459        self._sub_optimal_mean_reward = sub_optimal_mean_reward
460        self._locations = []
461
462        dists = [default_r, successfully_delivery_r, failure_delivery_r]
463        if dists.count(None) == 0:
464            self._default_r = default_r
465            self._successfully_delivery_r = successfully_delivery_r
466            self._failure_delivery_r = failure_delivery_r
467        else:
468            if make_reward_stochastic:
469                self._default_r = beta(
470                    reward_variance_multiplier,
471                    reward_variance_multiplier * (1 / sub_optimal_mean_reward - 1),
472                )
473                self._successfully_delivery_r = beta(
474                    reward_variance_multiplier,
475                    reward_variance_multiplier * (1 / optimal_mean_reward - 1),
476                )
477                self._failure_delivery_r = beta(
478                    reward_variance_multiplier,
479                    reward_variance_multiplier * (10 / sub_optimal_mean_reward - 1),
480                )
481            else:
482                self._default_r = deterministic(0.1)
483                self._successfully_delivery_r = deterministic(1)
484                self._failure_delivery_r = deterministic(0)
485
486        kwargs[
487            "randomize_actions"
488        ] = False  # TODO : double check whether this is actually necessary or not
489
490        super(TaxiMDP, self).__init__(
491            seed=seed,
492            reward_variance_multiplier=reward_variance_multiplier,
493            make_reward_stochastic=make_reward_stochastic,
494            **kwargs,
495        )
Parameters
  • seed (int): The seed used for sampling rewards and next states.
  • size (int): The size of the grid.
  • length (int): The length of the walls.
  • width (int): The width of the walls.
  • space (int): The space between walls.
  • n_locations (int): The number of possible spawn locations. It must be a squared number.
  • optimal_mean_reward (float): If the rewards are made stochastic, this parameter controls the mean reward for the optimal trajectory. By default, it is set to 0.9.
  • sub_optimal_mean_reward (float): If the rewards are made stochastic, this parameter controls the mean reward for suboptimal trajectories. By default, it is set to 0.1.
  • default_r
  • successfully_delivery_r (Union[Tuple, rv_continuous]): The reward distribution for successfully delivering a passenger. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
  • failure_delivery_r: The reward distribution for failing to deliver a passenger. It can be either passed as a tuple containing Beta parameters or as a rv_continuous object.
  • make_reward_stochastic (bool): If True, the rewards of the MDP will be stochastic. By default, it is set to False.
  • reward_variance_multiplier (float): A constant that can be used to increase the variance of the reward distributions without changing their means. The lower the value, the higher the variance. By default, it is set to 1.
@staticmethod
def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
12    @staticmethod
13    def sample_parameters(n: int, seed: int = None) -> List[Dict[str, Any]]:
14        return TaxiMDP.sample_mdp_parameters(n, False, seed)
Returns
  • List[Dict[str, Any]]: n sampled parameters that can be used to construct an MDP in a reasonable amount of time.
Inherited Members
colosseum.mdp.base_infinite.ContinuousMDP
is_episodic
get_grid_representation
colosseum.mdp.taxi.base.TaxiMDP
get_unique_symbols
does_seed_change_MDP_structure
sample_mdp_parameters
get_node_class
get_gin_parameters
n_actions
parameters
colosseum.mdp.base.BaseMDP
get_available_hardness_measures
produce_gin_file_from_mdp_parameters
get_gin_config
get_node_labels
get_node_action_labels
hash
instantiate_MDP
T
R
recurrent_nodes_set
communication_class
get_optimal_policy
get_worst_policy
get_value_functions
optimal_value_functions
worst_value_functions
random_value_functions
optimal_transition_probabilities
worst_transition_probabilities
random_transition_probabilities
optimal_markov_chain
worst_markov_chain
random_markov_chain
get_stationary_distribution
optimal_stationary_distribution
worst_stationary_distribution
random_stationary_distribution
optimal_average_rewards
worst_average_rewards
random_average_rewards
get_average_reward
optimal_average_reward
worst_average_reward
random_average_reward
transition_matrix_and_rewards
graph_layout
graph_metrics
diameter
sum_reciprocals_suboptimality_gaps
discounted_value_norm
undiscounted_value_norm
value_norm
measures_of_hardness
summary
hardness_report
get_info_class
get_transition_distributions
get_reward_distribution
sample_reward
get_measure_from_name
action_spec
observation_spec
get_observation
reset
step
random_steps
random_step
get_visitation_counts
reset_visitation_counts
get_value_node_labels
dm_env._environment.Environment
reward_spec
discount_spec
close