colosseum.hyperopt.utils

 1from typing import List, Type, Dict, Tuple
 2
 3from colosseum.agent.agents.base import BaseAgent
 4from colosseum.agent.utils import sample_agent_gin_configs_file
 5from colosseum.benchmark.benchmark import ColosseumBenchmark
 6from colosseum.hyperopt import HyperOptConfig
 7from colosseum.utils import get_colosseum_mdp_classes
 8from colosseum.utils.miscellanea import sample_mdp_gin_configs_file
 9
10
11def sample_agent_configs_and_benchmarks_for_hyperopt(
12    agent_cls: List[Type[BaseAgent]], hpoc: HyperOptConfig
13) -> List[Tuple[Dict[Type[BaseAgent], str], ColosseumBenchmark]]:
14    """
15    samples the agent configurations from the agents parameters sample spaces and the MDP configuration from the
16    MDP sampling functions to be used in the parameters optimization procedures.
17
18    Parameters
19    ----------
20    agent_cls : Type["BaseAgent"]
21        The agent classes to be optimized.
22    hpoc : HyperOptConfig
23        The parameters optimization procedure configuration.
24    Returns
25    -------
26    List[Tuple[Dict[Type[BaseAgent], str], ColosseumBenchmark]]
27        The agents configurations and the benchmarks with the MDP configs for the parameters optimization procedure.
28    """
29
30    agents_and_benchmarks = []
31
32    episodic_benchmark_name = f"hyperopt_episodic"
33    episodic_agents_configs = dict()
34
35    continuous_benchmark_name = f"hyperopt_continuous"
36    continuous_agents_configs = dict()
37
38    for agent_cl in agent_cls:
39
40        # First sample the agent parameters
41        agent_samples = sample_agent_gin_configs_file(
42            agent_cl, hpoc.n_samples_agents, hpoc.seed
43        )
44
45        if agent_cl.is_episodic():
46            episodic_agents_configs[agent_cl] = agent_samples
47        else:
48            continuous_agents_configs[agent_cl] = agent_samples
49
50    # Sampling the episodic MDPs
51    if len(episodic_agents_configs) > 0:
52        episodic_mdps_configs = dict()
53        for cl in get_colosseum_mdp_classes(True):
54            episodic_mdps_configs[cl] = sample_mdp_gin_configs_file(
55                cl, hpoc.n_samples_mdps, hpoc.seed
56            )
57        episodic_benchmark = ColosseumBenchmark(
58            episodic_benchmark_name, episodic_mdps_configs, hpoc.experiment_config
59        )
60        agents_and_benchmarks.append((episodic_agents_configs, episodic_benchmark))
61
62    # Sampling the continuous MDPs
63    if len(continuous_agents_configs) > 0:
64        continuous_mdps_configs = dict()
65        for cl in get_colosseum_mdp_classes(False):
66            continuous_mdps_configs[cl] = sample_mdp_gin_configs_file(
67                cl, hpoc.n_samples_mdps, hpoc.seed
68            )
69        continuous_benchmark = ColosseumBenchmark(
70            continuous_benchmark_name, continuous_mdps_configs, hpoc.experiment_config
71        )
72        agents_and_benchmarks.append((continuous_agents_configs, continuous_benchmark))
73
74    return agents_and_benchmarks
def sample_agent_configs_and_benchmarks_for_hyperopt( agent_cls: List[Type[colosseum.agent.agents.base.BaseAgent]], hpoc: colosseum.hyperopt.config.HyperOptConfig) -> List[Tuple[Dict[Type[colosseum.agent.agents.base.BaseAgent], str], colosseum.benchmark.benchmark.ColosseumBenchmark]]:
12def sample_agent_configs_and_benchmarks_for_hyperopt(
13    agent_cls: List[Type[BaseAgent]], hpoc: HyperOptConfig
14) -> List[Tuple[Dict[Type[BaseAgent], str], ColosseumBenchmark]]:
15    """
16    samples the agent configurations from the agents parameters sample spaces and the MDP configuration from the
17    MDP sampling functions to be used in the parameters optimization procedures.
18
19    Parameters
20    ----------
21    agent_cls : Type["BaseAgent"]
22        The agent classes to be optimized.
23    hpoc : HyperOptConfig
24        The parameters optimization procedure configuration.
25    Returns
26    -------
27    List[Tuple[Dict[Type[BaseAgent], str], ColosseumBenchmark]]
28        The agents configurations and the benchmarks with the MDP configs for the parameters optimization procedure.
29    """
30
31    agents_and_benchmarks = []
32
33    episodic_benchmark_name = f"hyperopt_episodic"
34    episodic_agents_configs = dict()
35
36    continuous_benchmark_name = f"hyperopt_continuous"
37    continuous_agents_configs = dict()
38
39    for agent_cl in agent_cls:
40
41        # First sample the agent parameters
42        agent_samples = sample_agent_gin_configs_file(
43            agent_cl, hpoc.n_samples_agents, hpoc.seed
44        )
45
46        if agent_cl.is_episodic():
47            episodic_agents_configs[agent_cl] = agent_samples
48        else:
49            continuous_agents_configs[agent_cl] = agent_samples
50
51    # Sampling the episodic MDPs
52    if len(episodic_agents_configs) > 0:
53        episodic_mdps_configs = dict()
54        for cl in get_colosseum_mdp_classes(True):
55            episodic_mdps_configs[cl] = sample_mdp_gin_configs_file(
56                cl, hpoc.n_samples_mdps, hpoc.seed
57            )
58        episodic_benchmark = ColosseumBenchmark(
59            episodic_benchmark_name, episodic_mdps_configs, hpoc.experiment_config
60        )
61        agents_and_benchmarks.append((episodic_agents_configs, episodic_benchmark))
62
63    # Sampling the continuous MDPs
64    if len(continuous_agents_configs) > 0:
65        continuous_mdps_configs = dict()
66        for cl in get_colosseum_mdp_classes(False):
67            continuous_mdps_configs[cl] = sample_mdp_gin_configs_file(
68                cl, hpoc.n_samples_mdps, hpoc.seed
69            )
70        continuous_benchmark = ColosseumBenchmark(
71            continuous_benchmark_name, continuous_mdps_configs, hpoc.experiment_config
72        )
73        agents_and_benchmarks.append((continuous_agents_configs, continuous_benchmark))
74
75    return agents_and_benchmarks

samples the agent configurations from the agents parameters sample spaces and the MDP configuration from the MDP sampling functions to be used in the parameters optimization procedures.

Parameters
  • agent_cls (Type["BaseAgent"]): The agent classes to be optimized.
  • hpoc (HyperOptConfig): The parameters optimization procedure configuration.
Returns
  • List[Tuple[Dict[Type[BaseAgent], str], ColosseumBenchmark]]: The agents configurations and the benchmarks with the MDP configs for the parameters optimization procedure.