colosseum.hardness.measures.sum_reciprocals_suboptimality_gaps
1from typing import List, Tuple 2 3import numpy as np 4 5 6def get_sum_reciprocals_suboptimality_gaps( 7 Q: np.ndarray, 8 V: np.ndarray, 9 reachable_states: List[Tuple[int, int]] = None, 10 regularization: float = 0.1, 11): 12 """ 13 Returns 14 ------- 15 float 16 The sum of the reciprocals of the sub-optimality gaps. The reachable_states parameter is necessary in the 17 episodic setting, and it should be a list of tuple with in episode time step and state for each feasible combination 18 of in episode time step and state. 19 """ 20 is_episodic = V.ndim == 2 21 gaps = V[..., None] - Q 22 if is_episodic: 23 assert reachable_states is not None, ( 24 "For the episodic setting, it is necessary to provide the set of nodes that are reachable for any given" 25 "in episode time step." 26 ) 27 gaps = np.vstack([gaps[h, s] for h, s in reachable_states]) 28 return (1 / (gaps + regularization)).sum()
def
get_sum_reciprocals_suboptimality_gaps( Q: numpy.ndarray, V: numpy.ndarray, reachable_states: List[Tuple[int, int]] = None, regularization: float = 0.1):
7def get_sum_reciprocals_suboptimality_gaps( 8 Q: np.ndarray, 9 V: np.ndarray, 10 reachable_states: List[Tuple[int, int]] = None, 11 regularization: float = 0.1, 12): 13 """ 14 Returns 15 ------- 16 float 17 The sum of the reciprocals of the sub-optimality gaps. The reachable_states parameter is necessary in the 18 episodic setting, and it should be a list of tuple with in episode time step and state for each feasible combination 19 of in episode time step and state. 20 """ 21 is_episodic = V.ndim == 2 22 gaps = V[..., None] - Q 23 if is_episodic: 24 assert reachable_states is not None, ( 25 "For the episodic setting, it is necessary to provide the set of nodes that are reachable for any given" 26 "in episode time step." 27 ) 28 gaps = np.vstack([gaps[h, s] for h, s in reachable_states]) 29 return (1 / (gaps + regularization)).sum()
Returns
- float: The sum of the reciprocals of the sub-optimality gaps. The reachable_states parameter is necessary in the
- episodic setting, and it should be a list of tuple with in episode time step and state for each feasible combination
- of in episode time step and state.