colosseum.utils.acme.specs
1from typing import TYPE_CHECKING, Any, NamedTuple, Tuple 2 3import numpy as np 4from dm_env import specs 5 6from colosseum.emission_maps import EmissionMap 7 8if TYPE_CHECKING: 9 from colosseum.mdp import BaseMDP 10 11Array = specs.Array 12BoundedArray = specs.BoundedArray 13DiscreteArray = specs.DiscreteArray 14 15 16class MDPSpec(NamedTuple): 17 """Full specification of the domains used by a given environment.""" 18 19 observations: Any 20 actions: Any 21 rewards: Any 22 discounts: Any 23 time_horizon: Any 24 rewards_range: Tuple[float, float] 25 emission_map: "EmissionMap" 26 n_states: int 27 28 29def make_mdp_spec(mdp: "BaseMDP") -> MDPSpec: 30 """Returns an `MDPSpec` describing values used by an environment.""" 31 return MDPSpec( 32 observations=mdp.observation_spec(), 33 actions=mdp.action_spec(), 34 rewards=mdp.reward_spec(), 35 discounts=mdp.discount_spec(), 36 time_horizon=mdp.H if mdp.is_episodic() else np.inf, 37 rewards_range=mdp.rewards_range, 38 emission_map=mdp.emission_map, 39 n_states=mdp.n_states, 40 )
class
MDPSpec(typing.NamedTuple):
17class MDPSpec(NamedTuple): 18 """Full specification of the domains used by a given environment.""" 19 20 observations: Any 21 actions: Any 22 rewards: Any 23 discounts: Any 24 time_horizon: Any 25 rewards_range: Tuple[float, float] 26 emission_map: "EmissionMap" 27 n_states: int
Full specification of the domains used by a given environment.
MDPSpec( observations: Any, actions: Any, rewards: Any, discounts: Any, time_horizon: Any, rewards_range: Tuple[float, float], emission_map: ForwardRef('EmissionMap'), n_states: int)
Create new instance of MDPSpec(observations, actions, rewards, discounts, time_horizon, rewards_range, emission_map, n_states)
Inherited Members
- builtins.tuple
- index
- count
30def make_mdp_spec(mdp: "BaseMDP") -> MDPSpec: 31 """Returns an `MDPSpec` describing values used by an environment.""" 32 return MDPSpec( 33 observations=mdp.observation_spec(), 34 actions=mdp.action_spec(), 35 rewards=mdp.reward_spec(), 36 discounts=mdp.discount_spec(), 37 time_horizon=mdp.H if mdp.is_episodic() else np.inf, 38 rewards_range=mdp.rewards_range, 39 emission_map=mdp.emission_map, 40 n_states=mdp.n_states, 41 )
Returns an MDPSpec
describing values used by an environment.