colosseum.hyperopt.config

 1from dataclasses import dataclass
 2from typing import Type, List, TYPE_CHECKING
 3
 4from colosseum.emission_maps import EmissionMap, StateInfo
 5from colosseum.experiment import ExperimentConfig
 6
 7if TYPE_CHECKING:
 8    from colosseum.mdp.base import BaseMDP
 9
10
11@dataclass(frozen=True)
12class HyperOptConfig:
13    seed: int
14    """The seed that controls the parameters optimization procedure."""
15    n_timesteps: int
16    """The number of time step for the Agent/MDP interaction."""
17    max_interaction_time_s: float
18    """The maximum amount of time the agent is allowed to interact with the MDP."""
19    n_samples_agents: int
20    """The number of samples from the Agent hyperparameter space."""
21    n_samples_mdps: int
22    """The number of samples from the MDP parameters spaces defined to provide an interesting but mild challenge."""
23    log_every: int
24    """The number of time steps between each time the performance metrics are computed."""
25    emission_map: Type[EmissionMap] = None
26    """The emission map that will be use to provide a state/observation to the Agent. By default, it is tabular."""
27    mdp_classes: List[Type["BaseMDP"]] = None
28    """The MDP classes to be used in the hyperparameter optimization procedure. By default, we use all the available ones."""
29    n_seeds: int = 3
30    """The number of times each Agent/MDP interaction is repeated."""
31
32    @property
33    def experiment_config(self) -> ExperimentConfig:
34        """
35        Returns
36        -------
37        ExperimentConfig
38            The experiment configuration associated to the parameters optimization procedure.
39        """
40        return ExperimentConfig(
41            n_seeds=self.n_seeds,
42            n_steps=self.n_timesteps,
43            max_interaction_time_s=self.max_interaction_time_s,
44            log_performance_indicators_every=self.log_every,
45        )
46
47
48DEFAULT_HYPEROPT_CONF = HyperOptConfig(
49    seed=42,
50    n_timesteps=250_000,
51    max_interaction_time_s=5 * 60,
52    n_samples_agents=50,
53    n_samples_mdps=5,
54    log_every=100_000,
55)
56"""The default parameters optimization configuration for the tabular setting."""
57
58SMALL_HYPEROPT_CONF = HyperOptConfig(
59    seed=42,
60    n_timesteps=30_000,
61    max_interaction_time_s=120,
62    n_samples_agents=2,
63    n_samples_mdps=2,
64    log_every=10_000,
65    n_seeds=1,
66)
67"""The default small scale parameters optimization configuration for the tabular setting."""
68
69DEFAULT_HYPEROPT_CONF_NONTABULAR = HyperOptConfig(
70    seed=42,
71    n_timesteps=250_000,
72    max_interaction_time_s=10 * 60,
73    n_samples_agents=50,
74    n_samples_mdps=5,
75    log_every=50_000,
76    emission_map=StateInfo,
77)
78"""The default small scale parameters optimization configuration for the non-tabular setting."""
79
80SMALL_HYPEROPT_CONF_NONTABULAR = HyperOptConfig(
81    seed=42,
82    n_timesteps=50_000,
83    max_interaction_time_s=1 * 60,
84    n_samples_agents=2,
85    n_samples_mdps=2,
86    log_every=10_000,
87    emission_map=StateInfo,
88    n_seeds=1,
89)
90"""The default parameters optimization configuration for the non-tabular setting."""
@dataclass(frozen=True)
class HyperOptConfig:
12@dataclass(frozen=True)
13class HyperOptConfig:
14    seed: int
15    """The seed that controls the parameters optimization procedure."""
16    n_timesteps: int
17    """The number of time step for the Agent/MDP interaction."""
18    max_interaction_time_s: float
19    """The maximum amount of time the agent is allowed to interact with the MDP."""
20    n_samples_agents: int
21    """The number of samples from the Agent hyperparameter space."""
22    n_samples_mdps: int
23    """The number of samples from the MDP parameters spaces defined to provide an interesting but mild challenge."""
24    log_every: int
25    """The number of time steps between each time the performance metrics are computed."""
26    emission_map: Type[EmissionMap] = None
27    """The emission map that will be use to provide a state/observation to the Agent. By default, it is tabular."""
28    mdp_classes: List[Type["BaseMDP"]] = None
29    """The MDP classes to be used in the hyperparameter optimization procedure. By default, we use all the available ones."""
30    n_seeds: int = 3
31    """The number of times each Agent/MDP interaction is repeated."""
32
33    @property
34    def experiment_config(self) -> ExperimentConfig:
35        """
36        Returns
37        -------
38        ExperimentConfig
39            The experiment configuration associated to the parameters optimization procedure.
40        """
41        return ExperimentConfig(
42            n_seeds=self.n_seeds,
43            n_steps=self.n_timesteps,
44            max_interaction_time_s=self.max_interaction_time_s,
45            log_performance_indicators_every=self.log_every,
46        )
HyperOptConfig( seed: int, n_timesteps: int, max_interaction_time_s: float, n_samples_agents: int, n_samples_mdps: int, log_every: int, emission_map: Type[colosseum.emission_maps.base.EmissionMap] = None, mdp_classes: List[Type[colosseum.mdp.base.BaseMDP]] = None, n_seeds: int = 3)
seed: int

The seed that controls the parameters optimization procedure.

n_timesteps: int

The number of time step for the Agent/MDP interaction.

max_interaction_time_s: float

The maximum amount of time the agent is allowed to interact with the MDP.

n_samples_agents: int

The number of samples from the Agent hyperparameter space.

n_samples_mdps: int

The number of samples from the MDP parameters spaces defined to provide an interesting but mild challenge.

log_every: int

The number of time steps between each time the performance metrics are computed.

emission_map: Type[colosseum.emission_maps.base.EmissionMap] = None

The emission map that will be use to provide a state/observation to the Agent. By default, it is tabular.

mdp_classes: List[Type[colosseum.mdp.base.BaseMDP]] = None

The MDP classes to be used in the hyperparameter optimization procedure. By default, we use all the available ones.

n_seeds: int = 3

The number of times each Agent/MDP interaction is repeated.

Returns
  • ExperimentConfig: The experiment configuration associated to the parameters optimization procedure.
DEFAULT_HYPEROPT_CONF = HyperOptConfig(seed=42, n_timesteps=250000, max_interaction_time_s=300, n_samples_agents=50, n_samples_mdps=5, log_every=100000, emission_map=None, mdp_classes=None, n_seeds=3)

The default parameters optimization configuration for the tabular setting.

SMALL_HYPEROPT_CONF = HyperOptConfig(seed=42, n_timesteps=30000, max_interaction_time_s=120, n_samples_agents=2, n_samples_mdps=2, log_every=10000, emission_map=None, mdp_classes=None, n_seeds=1)

The default small scale parameters optimization configuration for the tabular setting.

DEFAULT_HYPEROPT_CONF_NONTABULAR = HyperOptConfig(seed=42, n_timesteps=250000, max_interaction_time_s=600, n_samples_agents=50, n_samples_mdps=5, log_every=50000, emission_map=<class 'colosseum.emission_maps.state_info.StateInfo'>, mdp_classes=None, n_seeds=3)

The default small scale parameters optimization configuration for the non-tabular setting.

SMALL_HYPEROPT_CONF_NONTABULAR = HyperOptConfig(seed=42, n_timesteps=50000, max_interaction_time_s=60, n_samples_agents=2, n_samples_mdps=2, log_every=10000, emission_map=<class 'colosseum.emission_maps.state_info.StateInfo'>, mdp_classes=None, n_seeds=1)

The default parameters optimization configuration for the non-tabular setting.