colosseum.utils.loops

  1from typing import TYPE_CHECKING, Iterable, List, Tuple, Union
  2
  3import numpy as np
  4from dm_env import TimeStep
  5
  6from colosseum import config
  7
  8if TYPE_CHECKING:
  9    from colosseum.mdp import ContinuousMDP, EpisodicMDP
 10
 11
 12def human_loop(mdp: Union["ContinuousMDP", "EpisodicMDP"], other_policies: dict = None):
 13    """
 14    allows a human to control an MDP.
 15    """
 16
 17    verba = lambda: print(mdp.get_grid_representation(mdp.cur_node))
 18
 19    print("Start calculating the optimal policy")
 20    optimal_policy = mdp.get_optimal_policy(False)
 21    print("End calculating the optimal policy")
 22
 23    state = mdp.reset()
 24    while True:
 25        print("State:", state)
 26        verba()
 27
 28        if mdp.is_episodic():
 29            optimal_action = optimal_policy[mdp.h, mdp.node_to_index[mdp.cur_node]]
 30        else:
 31            optimal_action = optimal_policy[mdp.node_to_index[mdp.cur_node]]
 32        print(f"The optimal action for this state is:{optimal_action}")
 33
 34        if other_policies is not None:
 35            for pi_name, pi in other_policies.items():
 36                print(
 37                    f"The action of policy {pi_name} for this state is:{np.argmax(pi[mdp.cur_node])}"
 38                )
 39
 40        action = int(
 41            input(
 42                "Available actions are: "
 43                + ",".join(map(str, range(mdp.n_actions)))
 44                + ".\tChoose one to act or type anything else to terminate.\n"
 45            )
 46        )
 47        if action not in range(mdp.n_actions):
 48            break
 49        state = mdp.step(action)
 50        if state.last():
 51            print("State:", state)
 52            state = mdp.reset()
 53
 54
 55def random_loop(
 56    mdp: Union["ContinuousMDP", "EpisodicMDP"],
 57    N: int,
 58    return_actions: bool = False,
 59    human_readable=False,
 60) -> Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]:
 61    """
 62    generates interactions data by selecting actions a random.
 63
 64    Parameters
 65    ----------
 66    mdp: Union["ContinuousMDP", "EpisodicMDP"]
 67        The MDP instance.
 68    N : int
 69        The number of interactions.
 70    return_actions: bool, optional
 71        If True, the selected actions are returned. By default, it is set to False,
 72    human_readable: bool
 73        If True, the state information is printed in a human interpretable form. By default, it is set to False.
 74
 75    Returns
 76    -------
 77    Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]
 78        The data generated from the interactions.
 79    """
 80
 81    if human_readable:
 82        verba = lambda: print(mdp.get_grid_repr())
 83    else:
 84        verba = lambda: print("State:", state, "Action: ", action)
 85
 86    states = []
 87    state = mdp.reset()
 88    states.append(state)
 89    actions = []
 90    action = None
 91    while len(states) < N:
 92        if config.VERBOSE_LEVEL > 0:
 93            verba()
 94        state, action = mdp.random_step()
 95        if return_actions:
 96            actions.append(action)
 97        states.append(state)
 98        if state.last():
 99            if config.VERBOSE_LEVEL > 0:
100                print("Last state:", state)
101            state = mdp.reset()
102            states.append(state)
103
104    if return_actions:
105        return states, actions
106    return states
107
108
109def prefixed_action_loop(
110    mdp: Union["ContinuousMDP", "EpisodicMDP"],
111    actions: Iterable[int],
112    human_readable: bool = False,
113) -> List[TimeStep]:
114    """
115    generates interaction with the MDP according to the actions given in input.
116
117    Parameters
118    ----------
119    mdp: Union["ContinuousMDP", "EpisodicMDP"]
120        The MDP instance.
121    actions : Iterable[int]
122        The actions to be selected.
123    human_readable: bool
124        If True, the state information is printed in a human interpretable form. By default, it is set to False.
125
126    Returns
127    -------
128    Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]
129        The data generated from the interactions.
130    """
131
132    if human_readable:
133        verba = lambda: print(mdp.get_grid_repr())
134    else:
135        verba = lambda: print("State:", state, "Action: ", action)
136
137    states = []
138    state = mdp.reset()
139    states.append(state)
140    for action in actions:
141        if config.VERBOSE_LEVEL > 0:
142            verba()
143        state = mdp.step(action)
144        states.append(state)
145        if state.last():
146            if config.VERBOSE_LEVEL > 0:
147                print("Last state:", state)
148            state = mdp.reset()
149            states.append(state)
150    return states
def human_loop( mdp: Union[colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.base_finite.EpisodicMDP], other_policies: dict = None):
13def human_loop(mdp: Union["ContinuousMDP", "EpisodicMDP"], other_policies: dict = None):
14    """
15    allows a human to control an MDP.
16    """
17
18    verba = lambda: print(mdp.get_grid_representation(mdp.cur_node))
19
20    print("Start calculating the optimal policy")
21    optimal_policy = mdp.get_optimal_policy(False)
22    print("End calculating the optimal policy")
23
24    state = mdp.reset()
25    while True:
26        print("State:", state)
27        verba()
28
29        if mdp.is_episodic():
30            optimal_action = optimal_policy[mdp.h, mdp.node_to_index[mdp.cur_node]]
31        else:
32            optimal_action = optimal_policy[mdp.node_to_index[mdp.cur_node]]
33        print(f"The optimal action for this state is:{optimal_action}")
34
35        if other_policies is not None:
36            for pi_name, pi in other_policies.items():
37                print(
38                    f"The action of policy {pi_name} for this state is:{np.argmax(pi[mdp.cur_node])}"
39                )
40
41        action = int(
42            input(
43                "Available actions are: "
44                + ",".join(map(str, range(mdp.n_actions)))
45                + ".\tChoose one to act or type anything else to terminate.\n"
46            )
47        )
48        if action not in range(mdp.n_actions):
49            break
50        state = mdp.step(action)
51        if state.last():
52            print("State:", state)
53            state = mdp.reset()

allows a human to control an MDP.

def random_loop( mdp: Union[colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.base_finite.EpisodicMDP], N: int, return_actions: bool = False, human_readable=False) -> Union[Tuple[List[dm_env._environment.TimeStep], List[int]], List[dm_env._environment.TimeStep]]:
 56def random_loop(
 57    mdp: Union["ContinuousMDP", "EpisodicMDP"],
 58    N: int,
 59    return_actions: bool = False,
 60    human_readable=False,
 61) -> Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]:
 62    """
 63    generates interactions data by selecting actions a random.
 64
 65    Parameters
 66    ----------
 67    mdp: Union["ContinuousMDP", "EpisodicMDP"]
 68        The MDP instance.
 69    N : int
 70        The number of interactions.
 71    return_actions: bool, optional
 72        If True, the selected actions are returned. By default, it is set to False,
 73    human_readable: bool
 74        If True, the state information is printed in a human interpretable form. By default, it is set to False.
 75
 76    Returns
 77    -------
 78    Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]
 79        The data generated from the interactions.
 80    """
 81
 82    if human_readable:
 83        verba = lambda: print(mdp.get_grid_repr())
 84    else:
 85        verba = lambda: print("State:", state, "Action: ", action)
 86
 87    states = []
 88    state = mdp.reset()
 89    states.append(state)
 90    actions = []
 91    action = None
 92    while len(states) < N:
 93        if config.VERBOSE_LEVEL > 0:
 94            verba()
 95        state, action = mdp.random_step()
 96        if return_actions:
 97            actions.append(action)
 98        states.append(state)
 99        if state.last():
100            if config.VERBOSE_LEVEL > 0:
101                print("Last state:", state)
102            state = mdp.reset()
103            states.append(state)
104
105    if return_actions:
106        return states, actions
107    return states

generates interactions data by selecting actions a random.

Parameters
  • mdp (Union["ContinuousMDP", "EpisodicMDP"]): The MDP instance.
  • N (int): The number of interactions.
  • return_actions (bool, optional): If True, the selected actions are returned. By default, it is set to False,
  • human_readable (bool): If True, the state information is printed in a human interpretable form. By default, it is set to False.
Returns
  • Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]: The data generated from the interactions.
def prefixed_action_loop( mdp: Union[colosseum.mdp.base_infinite.ContinuousMDP, colosseum.mdp.base_finite.EpisodicMDP], actions: Iterable[int], human_readable: bool = False) -> List[dm_env._environment.TimeStep]:
110def prefixed_action_loop(
111    mdp: Union["ContinuousMDP", "EpisodicMDP"],
112    actions: Iterable[int],
113    human_readable: bool = False,
114) -> List[TimeStep]:
115    """
116    generates interaction with the MDP according to the actions given in input.
117
118    Parameters
119    ----------
120    mdp: Union["ContinuousMDP", "EpisodicMDP"]
121        The MDP instance.
122    actions : Iterable[int]
123        The actions to be selected.
124    human_readable: bool
125        If True, the state information is printed in a human interpretable form. By default, it is set to False.
126
127    Returns
128    -------
129    Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]
130        The data generated from the interactions.
131    """
132
133    if human_readable:
134        verba = lambda: print(mdp.get_grid_repr())
135    else:
136        verba = lambda: print("State:", state, "Action: ", action)
137
138    states = []
139    state = mdp.reset()
140    states.append(state)
141    for action in actions:
142        if config.VERBOSE_LEVEL > 0:
143            verba()
144        state = mdp.step(action)
145        states.append(state)
146        if state.last():
147            if config.VERBOSE_LEVEL > 0:
148                print("Last state:", state)
149            state = mdp.reset()
150            states.append(state)
151    return states

generates interaction with the MDP according to the actions given in input.

Parameters
  • mdp (Union["ContinuousMDP", "EpisodicMDP"]): The MDP instance.
  • actions (Iterable[int]): The actions to be selected.
  • human_readable (bool): If True, the state information is printed in a human interpretable form. By default, it is set to False.
Returns
  • Union[Tuple[List[TimeStep], List[int]], List[TimeStep]]: The data generated from the interactions.