colosseum.hyperopt.selection

  1import csv
  2import os
  3from glob import glob
  4from typing import Dict, Type, Iterable, Callable, Collection
  5
  6import numpy as np
  7
  8from colosseum import config
  9from colosseum.agent.agents.base import BaseAgent
 10from colosseum.benchmark.utils import retrieve_agent_configs
 11from colosseum.utils.miscellanea import ensure_folder
 12
 13
 14def retrieve_best_agent_config_from_hp_folder(
 15    agent_classes: Iterable[Type["BaseAgent"]] = None,
 16    indicator="normalized_cumulative_regret",
 17    reduce_seeds: Callable[[Collection], float] = np.mean,
 18    folder: str = None,
 19) -> Dict[Type["BaseAgent"], str]:
 20    """
 21    retrieve the best agents configurations from a folder with the results of a hyperparameter optimization procedure.
 22    Note that. by default, the indicator is minimized. If you want to maximise the indicator you can pass a
 23    `reduce_seeds` function that inverts the sign of the indicators, e.g. `lambda x : -np.mean(x)`.
 24
 25    Parameters
 26    ----------
 27    agent_classes : Iterable[Type["BaseAgent"]]
 28        The agent classes for which the function retrieves the best config. By default, the agent classes are retrieved
 29         from the hyper_opt folder.
 30    indicator : str
 31        The code name of the performance indicator that will be used in the choice of the best parameters. Check
 32        `MDPLoop.get_indicators()` to get a list of the available indicators. By default, the indicator is the
 33        'normalized_cumulative_regret'.
 34    reduce_seeds : Callable[[Collection], float]
 35        The function that reduces the values of different seeds. By default, the average is used.
 36    folder : str
 37        The folder where the parameters optimization results are stored. By default, the one configured in the
 38        package is used.
 39
 40    Returns
 41    -------
 42    Dict[Type["BaseAgent"], str]
 43        A dictionary that associates to each agent class its best configuration.
 44    """
 45
 46    if folder is None:
 47        folder = config.get_hyperopt_folder()
 48    else:
 49        folder = ensure_folder(folder)
 50
 51    latest_hyprms_folder = folder + "latest_hyprms" + os.sep
 52
 53    # Retrive the agent classes from the folder if no agent classes is given
 54    if agent_classes is None:
 55        agent_classes = []
 56        if os.path.isdir(folder + "hyperopt_continuous"):
 57            agent_classes += list(
 58                retrieve_agent_configs(folder + "hyperopt_continuous").keys()
 59            )
 60        if os.path.isdir(folder + "hyperopt_episodic"):
 61            agent_classes += list(
 62                retrieve_agent_configs(folder + "hyperopt_episodic").keys()
 63            )
 64        assert len(agent_classes) > 0, f"No agent classes found in the {folder}"
 65
 66    agent_config = dict()
 67    for agent_class in agent_classes:
 68        current_hp_folder = (
 69            folder
 70            + "hyperopt_"
 71            + ("episodic" if agent_class.is_episodic() else "continuous")
 72            + os.sep
 73        )
 74
 75        if os.path.isfile(latest_hyprms_folder + agent_class.__name__ + ".gin"):
 76            with open(latest_hyprms_folder + agent_class.__name__ + ".gin", "r") as f:
 77                agent_config[agent_class] = f.read()
 78        elif os.path.isdir(current_hp_folder):
 79            agent_config[agent_class] = get_best_agent_gin_config(
 80                agent_class, current_hp_folder, indicator, reduce_seeds
 81            )
 82        else:
 83            raise FileNotFoundError(
 84                f"The hyperoptimization folder for {agent_class.__name__} could not be found "
 85                f"neither in the cache ({latest_hyprms_folder}) nor in the full logs ({current_hp_folder})"
 86            )
 87    return agent_config
 88
 89
 90def get_best_agent_gin_config(
 91    agent_class: Type["BaseAgent"],
 92    hp_folder: str,
 93    indicator="normalized_cumulative_regret",
 94    reduce_seeds: Callable[[Collection], float] = np.mean,
 95) -> str:
 96    """
 97    retrieve the best agents configurations from the folder with the results of a hyperparameter optimization procedure
 98    given an indicator. Note that. by default, the indicator is minimized. If you want to maximise the indicator you
 99    can pass a `reduce_seeds` function that inverts the sign of the indicators, e.g. `lambda x : -np.mean(x)`.
100
101    Parameters
102    ----------
103    agent_class : Type["BaseAgent"]
104        The agent class for which the function retrieves the best config.
105    hp_folder : str
106        The folder where the results of the parameters optimization procedure are located.
107    indicator : str
108        The code name of the performance indicator that will be used in the choice of the best parameters. Check
109        `MDPLoop.get_indicators()` to get a list of the available indicators. By default, the indicator is the
110        'normalized_cumulative_regret'.
111    reduce_seeds : Callable[[Collection], float]
112        The function that reduces the values of different seeds. By default, the average is used.
113    Returns
114    -------
115    str
116        The gin config of the best parameters.
117    """
118    agents_configs = retrieve_agent_configs(hp_folder, False)[agent_class]
119
120    prms_scores = dict()
121    for prm in agents_configs:
122        agent_prm_logs = glob(
123            hp_folder
124            + f"logs/*{prm}{config.EXPERIMENT_SEPARATOR_PRMS}{agent_class.__name__}/*.csv",
125            recursive=True,
126        )
127
128        scores = []
129        for l_f in agent_prm_logs:
130            with open(l_f) as f:
131                reader = csv.DictReader(f)
132                for p in reader:
133                    pass
134                scores.append(float(p[indicator]))
135        score = reduce_seeds(scores)
136        prms_scores[prm] = score
137    best_prms = min(prms_scores, key=lambda k: prms_scores[k])
138
139    return agents_configs[best_prms]
def retrieve_best_agent_config_from_hp_folder( agent_classes: Iterable[Type[colosseum.agent.agents.base.BaseAgent]] = None, indicator='normalized_cumulative_regret', reduce_seeds: Callable[[Collection], float] = <function mean>, folder: str = None) -> Dict[Type[colosseum.agent.agents.base.BaseAgent], str]:
15def retrieve_best_agent_config_from_hp_folder(
16    agent_classes: Iterable[Type["BaseAgent"]] = None,
17    indicator="normalized_cumulative_regret",
18    reduce_seeds: Callable[[Collection], float] = np.mean,
19    folder: str = None,
20) -> Dict[Type["BaseAgent"], str]:
21    """
22    retrieve the best agents configurations from a folder with the results of a hyperparameter optimization procedure.
23    Note that. by default, the indicator is minimized. If you want to maximise the indicator you can pass a
24    `reduce_seeds` function that inverts the sign of the indicators, e.g. `lambda x : -np.mean(x)`.
25
26    Parameters
27    ----------
28    agent_classes : Iterable[Type["BaseAgent"]]
29        The agent classes for which the function retrieves the best config. By default, the agent classes are retrieved
30         from the hyper_opt folder.
31    indicator : str
32        The code name of the performance indicator that will be used in the choice of the best parameters. Check
33        `MDPLoop.get_indicators()` to get a list of the available indicators. By default, the indicator is the
34        'normalized_cumulative_regret'.
35    reduce_seeds : Callable[[Collection], float]
36        The function that reduces the values of different seeds. By default, the average is used.
37    folder : str
38        The folder where the parameters optimization results are stored. By default, the one configured in the
39        package is used.
40
41    Returns
42    -------
43    Dict[Type["BaseAgent"], str]
44        A dictionary that associates to each agent class its best configuration.
45    """
46
47    if folder is None:
48        folder = config.get_hyperopt_folder()
49    else:
50        folder = ensure_folder(folder)
51
52    latest_hyprms_folder = folder + "latest_hyprms" + os.sep
53
54    # Retrive the agent classes from the folder if no agent classes is given
55    if agent_classes is None:
56        agent_classes = []
57        if os.path.isdir(folder + "hyperopt_continuous"):
58            agent_classes += list(
59                retrieve_agent_configs(folder + "hyperopt_continuous").keys()
60            )
61        if os.path.isdir(folder + "hyperopt_episodic"):
62            agent_classes += list(
63                retrieve_agent_configs(folder + "hyperopt_episodic").keys()
64            )
65        assert len(agent_classes) > 0, f"No agent classes found in the {folder}"
66
67    agent_config = dict()
68    for agent_class in agent_classes:
69        current_hp_folder = (
70            folder
71            + "hyperopt_"
72            + ("episodic" if agent_class.is_episodic() else "continuous")
73            + os.sep
74        )
75
76        if os.path.isfile(latest_hyprms_folder + agent_class.__name__ + ".gin"):
77            with open(latest_hyprms_folder + agent_class.__name__ + ".gin", "r") as f:
78                agent_config[agent_class] = f.read()
79        elif os.path.isdir(current_hp_folder):
80            agent_config[agent_class] = get_best_agent_gin_config(
81                agent_class, current_hp_folder, indicator, reduce_seeds
82            )
83        else:
84            raise FileNotFoundError(
85                f"The hyperoptimization folder for {agent_class.__name__} could not be found "
86                f"neither in the cache ({latest_hyprms_folder}) nor in the full logs ({current_hp_folder})"
87            )
88    return agent_config

retrieve the best agents configurations from a folder with the results of a hyperparameter optimization procedure. Note that. by default, the indicator is minimized. If you want to maximise the indicator you can pass a reduce_seeds function that inverts the sign of the indicators, e.g. lambda x : -np.mean(x).

Parameters
  • agent_classes (Iterable[Type["BaseAgent"]]): The agent classes for which the function retrieves the best config. By default, the agent classes are retrieved from the hyper_opt folder.
  • indicator (str): The code name of the performance indicator that will be used in the choice of the best parameters. Check MDPLoop.get_indicators() to get a list of the available indicators. By default, the indicator is the 'normalized_cumulative_regret'.
  • reduce_seeds (Callable[[Collection], float]): The function that reduces the values of different seeds. By default, the average is used.
  • folder (str): The folder where the parameters optimization results are stored. By default, the one configured in the package is used.
Returns
  • Dict[Type["BaseAgent"], str]: A dictionary that associates to each agent class its best configuration.
def get_best_agent_gin_config( agent_class: Type[colosseum.agent.agents.base.BaseAgent], hp_folder: str, indicator='normalized_cumulative_regret', reduce_seeds: Callable[[Collection], float] = <function mean>) -> str:
 91def get_best_agent_gin_config(
 92    agent_class: Type["BaseAgent"],
 93    hp_folder: str,
 94    indicator="normalized_cumulative_regret",
 95    reduce_seeds: Callable[[Collection], float] = np.mean,
 96) -> str:
 97    """
 98    retrieve the best agents configurations from the folder with the results of a hyperparameter optimization procedure
 99    given an indicator. Note that. by default, the indicator is minimized. If you want to maximise the indicator you
100    can pass a `reduce_seeds` function that inverts the sign of the indicators, e.g. `lambda x : -np.mean(x)`.
101
102    Parameters
103    ----------
104    agent_class : Type["BaseAgent"]
105        The agent class for which the function retrieves the best config.
106    hp_folder : str
107        The folder where the results of the parameters optimization procedure are located.
108    indicator : str
109        The code name of the performance indicator that will be used in the choice of the best parameters. Check
110        `MDPLoop.get_indicators()` to get a list of the available indicators. By default, the indicator is the
111        'normalized_cumulative_regret'.
112    reduce_seeds : Callable[[Collection], float]
113        The function that reduces the values of different seeds. By default, the average is used.
114    Returns
115    -------
116    str
117        The gin config of the best parameters.
118    """
119    agents_configs = retrieve_agent_configs(hp_folder, False)[agent_class]
120
121    prms_scores = dict()
122    for prm in agents_configs:
123        agent_prm_logs = glob(
124            hp_folder
125            + f"logs/*{prm}{config.EXPERIMENT_SEPARATOR_PRMS}{agent_class.__name__}/*.csv",
126            recursive=True,
127        )
128
129        scores = []
130        for l_f in agent_prm_logs:
131            with open(l_f) as f:
132                reader = csv.DictReader(f)
133                for p in reader:
134                    pass
135                scores.append(float(p[indicator]))
136        score = reduce_seeds(scores)
137        prms_scores[prm] = score
138    best_prms = min(prms_scores, key=lambda k: prms_scores[k])
139
140    return agents_configs[best_prms]

retrieve the best agents configurations from the folder with the results of a hyperparameter optimization procedure given an indicator. Note that. by default, the indicator is minimized. If you want to maximise the indicator you can pass a reduce_seeds function that inverts the sign of the indicators, e.g. lambda x : -np.mean(x).

Parameters
  • agent_class (Type["BaseAgent"]): The agent class for which the function retrieves the best config.
  • hp_folder (str): The folder where the results of the parameters optimization procedure are located.
  • indicator (str): The code name of the performance indicator that will be used in the choice of the best parameters. Check MDPLoop.get_indicators() to get a list of the available indicators. By default, the indicator is the 'normalized_cumulative_regret'.
  • reduce_seeds (Callable[[Collection], float]): The function that reduces the values of different seeds. By default, the average is used.
Returns
  • str: The gin config of the best parameters.