colosseum.utils.loops
1from typing import TYPE_CHECKING, Iterable, List, Tuple, Union 2 3import numpy as np 4from dm_env import TimeStep 5 6from colosseum import config 7 8if TYPE_CHECKING: 9 from colosseum.mdp import ContinuousMDP, EpisodicMDP 10 11 12def human_loop(mdp: Union["ContinuousMDP", "EpisodicMDP"], other_policies: dict = None): 13 """ 14 allows a human to control an MDP. 15 """ 16 17 verba = lambda: print(mdp.get_grid_representation(mdp.cur_node)) 18 19 print("Start calculating the optimal policy") 20 optimal_policy = mdp.get_optimal_policy(False) 21 print("End calculating the optimal policy") 22 23 state = mdp.reset() 24 while True: 25 print("State:", state) 26 verba() 27 28 if mdp.is_episodic(): 29 optimal_action = optimal_policy[mdp.h, mdp.node_to_index[mdp.cur_node]] 30 else: 31 optimal_action = optimal_policy[mdp.node_to_index[mdp.cur_node]] 32 print(f"The optimal action for this state is:{optimal_action}") 33 34 if other_policies is not None: 35 for pi_name, pi in other_policies.items(): 36 print( 37 f"The action of policy {pi_name} for this state is:{np.argmax(pi[mdp.cur_node])}" 38 ) 39 40 action = int( 41 input( 42 "Available actions are: " 43 + ",".join(map(str, range(mdp.n_actions))) 44 + ".\tChoose one to act or type anything else to terminate.\n" 45 ) 46 ) 47 if action not in range(mdp.n_actions): 48 break 49 state = mdp.step(action) 50 if state.last(): 51 print("State:", state) 52 state = mdp.reset() 53 54 55def random_loop( 56 mdp: Union["ContinuousMDP", "EpisodicMDP"], 57 N: int, 58 return_actions: bool = False, 59 human_readable=False, 60) -> Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]: 61 """ 62 generates interactions data by selecting actions a random. 63 64 Parameters 65 ---------- 66 mdp: Union["ContinuousMDP", "EpisodicMDP"] 67 The MDP instance. 68 N : int 69 The number of interactions. 70 return_actions: bool, optional 71 If True, the selected actions are returned. By default, it is set to False, 72 human_readable: bool 73 If True, the state information is printed in a human interpretable form. By default, it is set to False. 74 75 Returns 76 ------- 77 Union[Tuple[List[TimeStep], List[int]], List[TimeStep]] 78 The data generated from the interactions. 79 """ 80 81 if human_readable: 82 verba = lambda: print(mdp.get_grid_repr()) 83 else: 84 verba = lambda: print("State:", state, "Action: ", action) 85 86 states = [] 87 state = mdp.reset() 88 states.append(state) 89 actions = [] 90 action = None 91 while len(states) < N: 92 if config.VERBOSE_LEVEL > 0: 93 verba() 94 state, action = mdp.random_step() 95 if return_actions: 96 actions.append(action) 97 states.append(state) 98 if state.last(): 99 if config.VERBOSE_LEVEL > 0: 100 print("Last state:", state) 101 state = mdp.reset() 102 states.append(state) 103 104 if return_actions: 105 return states, actions 106 return states 107 108 109def prefixed_action_loop( 110 mdp: Union["ContinuousMDP", "EpisodicMDP"], 111 actions: Iterable[int], 112 human_readable: bool = False, 113) -> List[TimeStep]: 114 """ 115 generates interaction with the MDP according to the actions given in input. 116 117 Parameters 118 ---------- 119 mdp: Union["ContinuousMDP", "EpisodicMDP"] 120 The MDP instance. 121 actions : Iterable[int] 122 The actions to be selected. 123 human_readable: bool 124 If True, the state information is printed in a human interpretable form. By default, it is set to False. 125 126 Returns 127 ------- 128 Union[Tuple[List[TimeStep], List[int]], List[TimeStep]] 129 The data generated from the interactions. 130 """ 131 132 if human_readable: 133 verba = lambda: print(mdp.get_grid_repr()) 134 else: 135 verba = lambda: print("State:", state, "Action: ", action) 136 137 states = [] 138 state = mdp.reset() 139 states.append(state) 140 for action in actions: 141 if config.VERBOSE_LEVEL > 0: 142 verba() 143 state = mdp.step(action) 144 states.append(state) 145 if state.last(): 146 if config.VERBOSE_LEVEL > 0: 147 print("Last state:", state) 148 state = mdp.reset() 149 states.append(state) 150 return states
def
human_loop( mdp: Union[colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.base_finite.EpisodicMDP], other_policies: dict = None):
13def human_loop(mdp: Union["ContinuousMDP", "EpisodicMDP"], other_policies: dict = None): 14 """ 15 allows a human to control an MDP. 16 """ 17 18 verba = lambda: print(mdp.get_grid_representation(mdp.cur_node)) 19 20 print("Start calculating the optimal policy") 21 optimal_policy = mdp.get_optimal_policy(False) 22 print("End calculating the optimal policy") 23 24 state = mdp.reset() 25 while True: 26 print("State:", state) 27 verba() 28 29 if mdp.is_episodic(): 30 optimal_action = optimal_policy[mdp.h, mdp.node_to_index[mdp.cur_node]] 31 else: 32 optimal_action = optimal_policy[mdp.node_to_index[mdp.cur_node]] 33 print(f"The optimal action for this state is:{optimal_action}") 34 35 if other_policies is not None: 36 for pi_name, pi in other_policies.items(): 37 print( 38 f"The action of policy {pi_name} for this state is:{np.argmax(pi[mdp.cur_node])}" 39 ) 40 41 action = int( 42 input( 43 "Available actions are: " 44 + ",".join(map(str, range(mdp.n_actions))) 45 + ".\tChoose one to act or type anything else to terminate.\n" 46 ) 47 ) 48 if action not in range(mdp.n_actions): 49 break 50 state = mdp.step(action) 51 if state.last(): 52 print("State:", state) 53 state = mdp.reset()
allows a human to control an MDP.
def
random_loop( mdp: Union[colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.base_finite.EpisodicMDP], N: int, return_actions: bool = False, human_readable=False) -> Union[Tuple[List[dm_env._environment.TimeStep], List[int]], List[dm_env._environment.TimeStep]]:
56def random_loop( 57 mdp: Union["ContinuousMDP", "EpisodicMDP"], 58 N: int, 59 return_actions: bool = False, 60 human_readable=False, 61) -> Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]: 62 """ 63 generates interactions data by selecting actions a random. 64 65 Parameters 66 ---------- 67 mdp: Union["ContinuousMDP", "EpisodicMDP"] 68 The MDP instance. 69 N : int 70 The number of interactions. 71 return_actions: bool, optional 72 If True, the selected actions are returned. By default, it is set to False, 73 human_readable: bool 74 If True, the state information is printed in a human interpretable form. By default, it is set to False. 75 76 Returns 77 ------- 78 Union[Tuple[List[TimeStep], List[int]], List[TimeStep]] 79 The data generated from the interactions. 80 """ 81 82 if human_readable: 83 verba = lambda: print(mdp.get_grid_repr()) 84 else: 85 verba = lambda: print("State:", state, "Action: ", action) 86 87 states = [] 88 state = mdp.reset() 89 states.append(state) 90 actions = [] 91 action = None 92 while len(states) < N: 93 if config.VERBOSE_LEVEL > 0: 94 verba() 95 state, action = mdp.random_step() 96 if return_actions: 97 actions.append(action) 98 states.append(state) 99 if state.last(): 100 if config.VERBOSE_LEVEL > 0: 101 print("Last state:", state) 102 state = mdp.reset() 103 states.append(state) 104 105 if return_actions: 106 return states, actions 107 return states
generates interactions data by selecting actions a random.
Parameters
- mdp (Union["ContinuousMDP", "EpisodicMDP"]): The MDP instance.
- N (int): The number of interactions.
- return_actions (bool, optional): If True, the selected actions are returned. By default, it is set to False,
- human_readable (bool): If True, the state information is printed in a human interpretable form. By default, it is set to False.
Returns
- Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]: The data generated from the interactions.
def
prefixed_action_loop( mdp: Union[colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.base_finite.EpisodicMDP], actions: Iterable[int], human_readable: bool = False) -> List[dm_env._environment.TimeStep]:
110def prefixed_action_loop( 111 mdp: Union["ContinuousMDP", "EpisodicMDP"], 112 actions: Iterable[int], 113 human_readable: bool = False, 114) -> List[TimeStep]: 115 """ 116 generates interaction with the MDP according to the actions given in input. 117 118 Parameters 119 ---------- 120 mdp: Union["ContinuousMDP", "EpisodicMDP"] 121 The MDP instance. 122 actions : Iterable[int] 123 The actions to be selected. 124 human_readable: bool 125 If True, the state information is printed in a human interpretable form. By default, it is set to False. 126 127 Returns 128 ------- 129 Union[Tuple[List[TimeStep], List[int]], List[TimeStep]] 130 The data generated from the interactions. 131 """ 132 133 if human_readable: 134 verba = lambda: print(mdp.get_grid_repr()) 135 else: 136 verba = lambda: print("State:", state, "Action: ", action) 137 138 states = [] 139 state = mdp.reset() 140 states.append(state) 141 for action in actions: 142 if config.VERBOSE_LEVEL > 0: 143 verba() 144 state = mdp.step(action) 145 states.append(state) 146 if state.last(): 147 if config.VERBOSE_LEVEL > 0: 148 print("Last state:", state) 149 state = mdp.reset() 150 states.append(state) 151 return states
generates interaction with the MDP according to the actions given in input.
Parameters
- mdp (Union["ContinuousMDP", "EpisodicMDP"]): The MDP instance.
- actions (Iterable[int]): The actions to be selected.
- human_readable (bool): If True, the state information is printed in a human interpretable form. By default, it is set to False.
Returns
- Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]: The data generated from the interactions.