colosseum.hardness.measures.sum_reciprocals_suboptimality_gaps

View Source

 1from typing import List, Tuple
 2
 3import numpy as np
 4
 5
 6def get_sum_reciprocals_suboptimality_gaps(
 7    Q: np.ndarray,
 8    V: np.ndarray,
 9    reachable_states: List[Tuple[int, int]] = None,
10    regularization: float = 0.1,
11):
12    """
13    Returns
14    -------
15    float
16        The sum of the reciprocals of the sub-optimality gaps. The reachable_states parameter is necessary in the
17    episodic setting, and it should be a list of tuple with in episode time step and state for each feasible combination
18    of in episode time step and state.
19    """
20    is_episodic = V.ndim == 2
21    gaps = V[..., None] - Q
22    if is_episodic:
23        assert reachable_states is not None, (
24            "For the episodic setting, it is necessary to provide the set of nodes that are reachable for any given"
25            "in episode time step."
26        )
27        gaps = np.vstack([gaps[h, s] for h, s in reachable_states])
28    return (1 / (gaps + regularization)).sum()

def get_sum_reciprocals_suboptimality_gaps( Q: numpy.ndarray, V: numpy.ndarray, reachable_states: List[Tuple[int, int]] = None, regularization: float = 0.1): View Source

 7def get_sum_reciprocals_suboptimality_gaps(
 8    Q: np.ndarray,
 9    V: np.ndarray,
10    reachable_states: List[Tuple[int, int]] = None,
11    regularization: float = 0.1,
12):
13    """
14    Returns
15    -------
16    float
17        The sum of the reciprocals of the sub-optimality gaps. The reachable_states parameter is necessary in the
18    episodic setting, and it should be a list of tuple with in episode time step and state for each feasible combination
19    of in episode time step and state.
20    """
21    is_episodic = V.ndim == 2
22    gaps = V[..., None] - Q
23    if is_episodic:
24        assert reachable_states is not None, (
25            "For the episodic setting, it is necessary to provide the set of nodes that are reachable for any given"
26            "in episode time step."
27        )
28        gaps = np.vstack([gaps[h, s] for h, s in reachable_states])
29    return (1 / (gaps + regularization)).sum()

Returns

float: The sum of the reciprocals of the sub-optimality gaps. The reachable_states parameter is necessary in the
episodic setting, and it should be a list of tuple with in episode time step and state for each feasible combination
of in episode time step and state.