Skip to content

velora.models.sac

Documentation

Customization: Modules

Soft Actor-Critic (SAC) network modules built using PyTorch.

SACActor

Bases: LiquidNCPModule

A Liquid NCP Actor Network for the SAC algorithm. Outputs a Gaussian distribution over actions.

Usable with continuous action spaces.

Source code in velora/models/sac/continuous.py
Python
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
class SACActor(LiquidNCPModule):
    """
    A Liquid NCP Actor Network for the SAC algorithm. Outputs a Gaussian
    distribution over actions.

    Usable with continuous action spaces.
    """

    action_scale: torch.Tensor
    action_bias: torch.Tensor

    def __init__(
        self,
        num_obs: int,
        n_neurons: int,
        num_actions: int,
        action_scale: torch.Tensor,
        action_bias: torch.Tensor,
        *,
        log_std_min: float = -5,
        log_std_max: float = 2,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            num_obs (int): the number of input observations
            n_neurons (int): the number of hidden neurons
            num_actions (int): the number of actions
            action_scale (torch.Tensor): scale factor to map normalized actions to
                environment's action range
            action_bias (torch.Tensor): bias/offset to center normalized actions to
                environment's action range
            log_std_min (float, optional): lower bound for the log standard
                deviation of the action distribution. Controls the minimum
                variance of actions
            log_std_max (float, optional): upper bound for the log standard
                deviation of the action distribution. Controls the maximum
                variance of actions
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(num_obs, n_neurons, num_actions * 2, device=device)

        self.log_std_min = log_std_min
        self.log_std_max = log_std_max

        self.register_buffer("action_scale", action_scale)
        self.register_buffer("action_bias", action_bias)

    @torch.jit.ignore
    def get_sample(
        self, mean: torch.Tensor, std: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Computes a set of action samples and log probabilities using the
        reparameterization trick from a Gaussian distribution.

        Parameters:
            mean (torch.Tensor): network prediction means.
            std (torch.Tensor): network standard deviation predictions.

        Returns:
            actions (torch.Tensor): action samples.
            log_probs (torch.Tensor): log probabilities.
        """
        dist = Normal(mean, std)
        x_t = dist.rsample()  # Reparameterization trick
        log_probs = dist.log_prob(x_t)

        return x_t, log_probs

    @torch.jit.ignore
    def predict(
        self, obs: torch.Tensor, hidden: torch.Tensor | None = None
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Performs a deterministic prediction.

        Parameters:
            obs (torch.Tensor): the batch of state observations
            hidden (torch.Tensor, optional): the hidden state

        Returns:
            actions (torch.Tensor): sampled actions
            hidden (torch.Tensor): the new hidden state
        """
        x, new_hidden = self.ncp(obs, hidden)

        mean, _ = torch.chunk(x, 2, dim=-1)

        # Bound actions between [-1, 1]
        actions_normalized = torch.tanh(mean)

        # Scale back to env action space
        actions = actions_normalized * self.action_scale + self.action_bias

        return actions, new_hidden

    def forward(
        self, obs: torch.Tensor, hidden: torch.Tensor | None = None
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Performs a forward pass through the network.

        Parameters:
            obs (torch.Tensor): the batch of state observations
            hidden (torch.Tensor, optional): the hidden state

        Returns:
            actions (torch.Tensor): the action predictions.
            log_prob (torch.Tensor): log probabilities of actions.
            hidden (torch.Tensor): the new hidden state.
        """
        x, new_hidden = self.ncp(obs, hidden)

        # Split output into mean and log_std
        mean, log_std = torch.chunk(x, 2, dim=-1)

        # Bound between [-20, 2]
        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
        std = log_std.exp()

        # Sample from normal distribution
        x_t, dist_log_probs = self.get_sample(mean, std)
        actions_normalized = torch.tanh(x_t)  # Bounded: [-1, 1]

        # Scale back to environment's action space
        actions = actions_normalized * self.action_scale + self.action_bias

        # Calculate log probability, accounting for tanh
        log_prob = dist_log_probs - torch.log(
            self.action_scale * (1 - actions_normalized.pow(2)) + 1e-6
        )
        log_prob = log_prob.sum(dim=-1, keepdim=True)

        return actions, log_prob, new_hidden

__init__(num_obs, n_neurons, num_actions, action_scale, action_bias, *, log_std_min=-5, log_std_max=2, device=None)

Parameters:

Name Type Description Default
num_obs int

the number of input observations

required
n_neurons int

the number of hidden neurons

required
num_actions int

the number of actions

required
action_scale torch.Tensor

scale factor to map normalized actions to environment's action range

required
action_bias torch.Tensor

bias/offset to center normalized actions to environment's action range

required
log_std_min float

lower bound for the log standard deviation of the action distribution. Controls the minimum variance of actions

-5
log_std_max float

upper bound for the log standard deviation of the action distribution. Controls the maximum variance of actions

2
device torch.device

the device to perform computations on

None
Source code in velora/models/sac/continuous.py
Python
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    num_obs: int,
    n_neurons: int,
    num_actions: int,
    action_scale: torch.Tensor,
    action_bias: torch.Tensor,
    *,
    log_std_min: float = -5,
    log_std_max: float = 2,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        num_obs (int): the number of input observations
        n_neurons (int): the number of hidden neurons
        num_actions (int): the number of actions
        action_scale (torch.Tensor): scale factor to map normalized actions to
            environment's action range
        action_bias (torch.Tensor): bias/offset to center normalized actions to
            environment's action range
        log_std_min (float, optional): lower bound for the log standard
            deviation of the action distribution. Controls the minimum
            variance of actions
        log_std_max (float, optional): upper bound for the log standard
            deviation of the action distribution. Controls the maximum
            variance of actions
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(num_obs, n_neurons, num_actions * 2, device=device)

    self.log_std_min = log_std_min
    self.log_std_max = log_std_max

    self.register_buffer("action_scale", action_scale)
    self.register_buffer("action_bias", action_bias)

forward(obs, hidden=None)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required
hidden torch.Tensor

the hidden state

None

Returns:

Name Type Description
actions torch.Tensor

the action predictions.

log_prob torch.Tensor

log probabilities of actions.

hidden torch.Tensor

the new hidden state.

Source code in velora/models/sac/continuous.py
Python
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def forward(
    self, obs: torch.Tensor, hidden: torch.Tensor | None = None
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Performs a forward pass through the network.

    Parameters:
        obs (torch.Tensor): the batch of state observations
        hidden (torch.Tensor, optional): the hidden state

    Returns:
        actions (torch.Tensor): the action predictions.
        log_prob (torch.Tensor): log probabilities of actions.
        hidden (torch.Tensor): the new hidden state.
    """
    x, new_hidden = self.ncp(obs, hidden)

    # Split output into mean and log_std
    mean, log_std = torch.chunk(x, 2, dim=-1)

    # Bound between [-20, 2]
    log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
    std = log_std.exp()

    # Sample from normal distribution
    x_t, dist_log_probs = self.get_sample(mean, std)
    actions_normalized = torch.tanh(x_t)  # Bounded: [-1, 1]

    # Scale back to environment's action space
    actions = actions_normalized * self.action_scale + self.action_bias

    # Calculate log probability, accounting for tanh
    log_prob = dist_log_probs - torch.log(
        self.action_scale * (1 - actions_normalized.pow(2)) + 1e-6
    )
    log_prob = log_prob.sum(dim=-1, keepdim=True)

    return actions, log_prob, new_hidden

get_sample(mean, std)

Computes a set of action samples and log probabilities using the reparameterization trick from a Gaussian distribution.

Parameters:

Name Type Description Default
mean torch.Tensor

network prediction means.

required
std torch.Tensor

network standard deviation predictions.

required

Returns:

Name Type Description
actions torch.Tensor

action samples.

log_probs torch.Tensor

log probabilities.

Source code in velora/models/sac/continuous.py
Python
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
@torch.jit.ignore
def get_sample(
    self, mean: torch.Tensor, std: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Computes a set of action samples and log probabilities using the
    reparameterization trick from a Gaussian distribution.

    Parameters:
        mean (torch.Tensor): network prediction means.
        std (torch.Tensor): network standard deviation predictions.

    Returns:
        actions (torch.Tensor): action samples.
        log_probs (torch.Tensor): log probabilities.
    """
    dist = Normal(mean, std)
    x_t = dist.rsample()  # Reparameterization trick
    log_probs = dist.log_prob(x_t)

    return x_t, log_probs

predict(obs, hidden=None)

Performs a deterministic prediction.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required
hidden torch.Tensor

the hidden state

None

Returns:

Name Type Description
actions torch.Tensor

sampled actions

hidden torch.Tensor

the new hidden state

Source code in velora/models/sac/continuous.py
Python
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
@torch.jit.ignore
def predict(
    self, obs: torch.Tensor, hidden: torch.Tensor | None = None
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Performs a deterministic prediction.

    Parameters:
        obs (torch.Tensor): the batch of state observations
        hidden (torch.Tensor, optional): the hidden state

    Returns:
        actions (torch.Tensor): sampled actions
        hidden (torch.Tensor): the new hidden state
    """
    x, new_hidden = self.ncp(obs, hidden)

    mean, _ = torch.chunk(x, 2, dim=-1)

    # Bound actions between [-1, 1]
    actions_normalized = torch.tanh(mean)

    # Scale back to env action space
    actions = actions_normalized * self.action_scale + self.action_bias

    return actions, new_hidden

SACActorDiscrete

Bases: LiquidNCPModule

A Liquid NCP Actor Network for the SAC algorithm. Outputs a categorical distribution over actions.

Usable with discrete action spaces.

Source code in velora/models/sac/discrete.py
Python
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class SACActorDiscrete(LiquidNCPModule):
    """
    A Liquid NCP Actor Network for the SAC algorithm. Outputs a categorical
    distribution over actions.

    Usable with discrete action spaces.
    """

    def __init__(
        self,
        num_obs: int,
        n_neurons: int,
        num_actions: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            num_obs (int): the number of input observations
            n_neurons (int): the number of hidden neurons
            num_actions (int): the number of actions
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(num_obs, n_neurons, num_actions, device=device)

        self.num_actions = num_actions

        self.softmax = nn.Softmax(dim=-1)

    @torch.jit.ignore
    def get_sample(self, probs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Computes a set of action samples and log probabilities using a
        Categorical distribution.

        Parameters:
            probs (torch.Tensor): Softmax probabilities for each action

        Returns:
            actions (torch.Tensor): action samples.
            log_probs (torch.Tensor): action log probabilities.
        """
        dist = Categorical(probs=probs)

        actions = dist.sample()
        log_probs = dist.log_prob(actions).unsqueeze(-1)

        return actions, log_probs

    @torch.jit.ignore
    def predict(
        self, obs: torch.Tensor, hidden: torch.Tensor | None = None
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Performs a deterministic prediction.

        Parameters:
            obs (torch.Tensor): the batch of state observations
            hidden (torch.Tensor, optional): the hidden state

        Returns:
            actions (torch.Tensor): sampled actions
            hidden (torch.Tensor): the new hidden state
        """
        logits, new_hidden = self.ncp(obs, hidden)
        x = self.softmax(logits)
        actions = torch.argmax(x, dim=-1)

        return actions, new_hidden

    def forward(
        self, obs: torch.Tensor, hidden: torch.Tensor | None = None
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Performs a forward pass through the network.

        Parameters:
            obs (torch.Tensor): the batch of state observations
            hidden (torch.Tensor, optional): the hidden state

        Returns:
            actions (torch.Tensor): the action predictions.
            probs (torch.Tensor): softmax probabilities for each action.
            log_prob (torch.Tensor): log probabilities of actions.
            hidden (torch.Tensor): the new hidden state.
        """
        logits, new_hidden = self.ncp(obs, hidden)
        probs = self.softmax(logits)

        actions, log_prob = self.get_sample(probs)
        return actions, probs, log_prob, new_hidden

__init__(num_obs, n_neurons, num_actions, *, device=None)

Parameters:

Name Type Description Default
num_obs int

the number of input observations

required
n_neurons int

the number of hidden neurons

required
num_actions int

the number of actions

required
device torch.device

the device to perform computations on

None
Source code in velora/models/sac/discrete.py
Python
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def __init__(
    self,
    num_obs: int,
    n_neurons: int,
    num_actions: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        num_obs (int): the number of input observations
        n_neurons (int): the number of hidden neurons
        num_actions (int): the number of actions
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(num_obs, n_neurons, num_actions, device=device)

    self.num_actions = num_actions

    self.softmax = nn.Softmax(dim=-1)

forward(obs, hidden=None)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required
hidden torch.Tensor

the hidden state

None

Returns:

Name Type Description
actions torch.Tensor

the action predictions.

probs torch.Tensor

softmax probabilities for each action.

log_prob torch.Tensor

log probabilities of actions.

hidden torch.Tensor

the new hidden state.

Source code in velora/models/sac/discrete.py
Python
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def forward(
    self, obs: torch.Tensor, hidden: torch.Tensor | None = None
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Performs a forward pass through the network.

    Parameters:
        obs (torch.Tensor): the batch of state observations
        hidden (torch.Tensor, optional): the hidden state

    Returns:
        actions (torch.Tensor): the action predictions.
        probs (torch.Tensor): softmax probabilities for each action.
        log_prob (torch.Tensor): log probabilities of actions.
        hidden (torch.Tensor): the new hidden state.
    """
    logits, new_hidden = self.ncp(obs, hidden)
    probs = self.softmax(logits)

    actions, log_prob = self.get_sample(probs)
    return actions, probs, log_prob, new_hidden

get_sample(probs)

Computes a set of action samples and log probabilities using a Categorical distribution.

Parameters:

Name Type Description Default
probs torch.Tensor

Softmax probabilities for each action

required

Returns:

Name Type Description
actions torch.Tensor

action samples.

log_probs torch.Tensor

action log probabilities.

Source code in velora/models/sac/discrete.py
Python
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@torch.jit.ignore
def get_sample(self, probs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Computes a set of action samples and log probabilities using a
    Categorical distribution.

    Parameters:
        probs (torch.Tensor): Softmax probabilities for each action

    Returns:
        actions (torch.Tensor): action samples.
        log_probs (torch.Tensor): action log probabilities.
    """
    dist = Categorical(probs=probs)

    actions = dist.sample()
    log_probs = dist.log_prob(actions).unsqueeze(-1)

    return actions, log_probs

predict(obs, hidden=None)

Performs a deterministic prediction.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required
hidden torch.Tensor

the hidden state

None

Returns:

Name Type Description
actions torch.Tensor

sampled actions

hidden torch.Tensor

the new hidden state

Source code in velora/models/sac/discrete.py
Python
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
@torch.jit.ignore
def predict(
    self, obs: torch.Tensor, hidden: torch.Tensor | None = None
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Performs a deterministic prediction.

    Parameters:
        obs (torch.Tensor): the batch of state observations
        hidden (torch.Tensor, optional): the hidden state

    Returns:
        actions (torch.Tensor): sampled actions
        hidden (torch.Tensor): the new hidden state
    """
    logits, new_hidden = self.ncp(obs, hidden)
    x = self.softmax(logits)
    actions = torch.argmax(x, dim=-1)

    return actions, new_hidden

SACCritic

Bases: LiquidNCPModule

A Liquid NCP Critic Network for the SAC algorithm. Estimates Q-values given states and actions.

Usable with continuous action spaces.

Source code in velora/models/sac/continuous.py
Python
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
class SACCritic(LiquidNCPModule):
    """
    A Liquid NCP Critic Network for the SAC algorithm. Estimates Q-values given
    states and actions.

    Usable with continuous action spaces.
    """

    def __init__(
        self,
        num_obs: int,
        n_neurons: int,
        num_actions: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            num_obs (int): the number of input observations
            n_neurons (int): the number of hidden neurons
            num_actions (int): the number of actions
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(num_obs + num_actions, n_neurons, 1, device=device)

    def forward(
        self,
        obs: torch.Tensor,
        actions: torch.Tensor,
        hidden: torch.Tensor | None = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Performs a forward pass through the network.

        Parameters:
            obs (torch.Tensor): the batch of state observations
            actions (torch.Tensor): the batch of actions
            hidden (torch.Tensor, optional): the hidden state

        Returns:
            q_values (torch.Tensor): the Q-Value predictions.
            hidden (torch.Tensor): the new hidden state.
        """
        inputs = torch.cat([obs, actions], dim=-1)
        q_values, new_hidden = self.ncp(inputs, hidden)
        return q_values, new_hidden

__init__(num_obs, n_neurons, num_actions, *, device=None)

Parameters:

Name Type Description Default
num_obs int

the number of input observations

required
n_neurons int

the number of hidden neurons

required
num_actions int

the number of actions

required
device torch.device

the device to perform computations on

None
Source code in velora/models/sac/continuous.py
Python
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def __init__(
    self,
    num_obs: int,
    n_neurons: int,
    num_actions: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        num_obs (int): the number of input observations
        n_neurons (int): the number of hidden neurons
        num_actions (int): the number of actions
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(num_obs + num_actions, n_neurons, 1, device=device)

forward(obs, actions, hidden=None)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required
actions torch.Tensor

the batch of actions

required
hidden torch.Tensor

the hidden state

None

Returns:

Name Type Description
q_values torch.Tensor

the Q-Value predictions.

hidden torch.Tensor

the new hidden state.

Source code in velora/models/sac/continuous.py
Python
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def forward(
    self,
    obs: torch.Tensor,
    actions: torch.Tensor,
    hidden: torch.Tensor | None = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Performs a forward pass through the network.

    Parameters:
        obs (torch.Tensor): the batch of state observations
        actions (torch.Tensor): the batch of actions
        hidden (torch.Tensor, optional): the hidden state

    Returns:
        q_values (torch.Tensor): the Q-Value predictions.
        hidden (torch.Tensor): the new hidden state.
    """
    inputs = torch.cat([obs, actions], dim=-1)
    q_values, new_hidden = self.ncp(inputs, hidden)
    return q_values, new_hidden

SACCriticDiscrete

Bases: LiquidNCPModule

A Liquid NCP Critic Network for the SAC algorithm. Estimates Q-values given states and actions.

Usable with discrete action spaces.

Source code in velora/models/sac/discrete.py
Python
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
class SACCriticDiscrete(LiquidNCPModule):
    """
    A Liquid NCP Critic Network for the SAC algorithm. Estimates Q-values given
    states and actions.

    Usable with discrete action spaces.
    """

    def __init__(
        self,
        num_obs: int,
        n_neurons: int,
        num_actions: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            num_obs (int): the number of input observations
            n_neurons (int): the number of hidden neurons
            num_actions (int): the number of actions
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(num_obs, n_neurons, num_actions, device=device)

    def forward(
        self,
        obs: torch.Tensor,
        hidden: torch.Tensor | None = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Performs a forward pass through the network.

        Parameters:
            obs (torch.Tensor): the batch of state observations
            hidden (torch.Tensor, optional): the hidden state

        Returns:
            q_values (torch.Tensor): the Q-Value predictions.
            hidden (torch.Tensor): the new hidden state.
        """
        q_values, new_hidden = self.ncp(obs, hidden)
        return q_values, new_hidden

__init__(num_obs, n_neurons, num_actions, *, device=None)

Parameters:

Name Type Description Default
num_obs int

the number of input observations

required
n_neurons int

the number of hidden neurons

required
num_actions int

the number of actions

required
device torch.device

the device to perform computations on

None
Source code in velora/models/sac/discrete.py
Python
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def __init__(
    self,
    num_obs: int,
    n_neurons: int,
    num_actions: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        num_obs (int): the number of input observations
        n_neurons (int): the number of hidden neurons
        num_actions (int): the number of actions
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(num_obs, n_neurons, num_actions, device=device)

forward(obs, hidden=None)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required
hidden torch.Tensor

the hidden state

None

Returns:

Name Type Description
q_values torch.Tensor

the Q-Value predictions.

hidden torch.Tensor

the new hidden state.

Source code in velora/models/sac/discrete.py
Python
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def forward(
    self,
    obs: torch.Tensor,
    hidden: torch.Tensor | None = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Performs a forward pass through the network.

    Parameters:
        obs (torch.Tensor): the batch of state observations
        hidden (torch.Tensor, optional): the hidden state

    Returns:
        q_values (torch.Tensor): the Q-Value predictions.
        hidden (torch.Tensor): the new hidden state.
    """
    q_values, new_hidden = self.ncp(obs, hidden)
    return q_values, new_hidden

SACCriticNCP

Bases: NCPModule

An NCP Critic Network for the SAC algorithm. Estimates Q-values given states and actions.

Usable with continuous action spaces.

Source code in velora/models/sac/continuous.py
Python
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
class SACCriticNCP(NCPModule):
    """
    An NCP Critic Network for the SAC algorithm. Estimates Q-values given
    states and actions.

    Usable with continuous action spaces.
    """

    def __init__(
        self,
        num_obs: int,
        n_neurons: int,
        num_actions: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            num_obs (int): the number of input observations
            n_neurons (int): the number of hidden neurons
            num_actions (int): the number of actions
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(num_obs + num_actions, n_neurons, 1, device=device)

    def forward(self, obs: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
        """
        Performs a forward pass through the network.

        Parameters:
            obs (torch.Tensor): the batch of state observations
            actions (torch.Tensor): the batch of actions

        Returns:
            q_values (torch.Tensor): the Q-Value predictions.
        """
        inputs = torch.cat([obs, actions], dim=-1)
        q_values = self.ncp(inputs)
        return q_values

__init__(num_obs, n_neurons, num_actions, *, device=None)

Parameters:

Name Type Description Default
num_obs int

the number of input observations

required
n_neurons int

the number of hidden neurons

required
num_actions int

the number of actions

required
device torch.device

the device to perform computations on

None
Source code in velora/models/sac/continuous.py
Python
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def __init__(
    self,
    num_obs: int,
    n_neurons: int,
    num_actions: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        num_obs (int): the number of input observations
        n_neurons (int): the number of hidden neurons
        num_actions (int): the number of actions
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(num_obs + num_actions, n_neurons, 1, device=device)

forward(obs, actions)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required
actions torch.Tensor

the batch of actions

required

Returns:

Name Type Description
q_values torch.Tensor

the Q-Value predictions.

Source code in velora/models/sac/continuous.py
Python
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def forward(self, obs: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
    """
    Performs a forward pass through the network.

    Parameters:
        obs (torch.Tensor): the batch of state observations
        actions (torch.Tensor): the batch of actions

    Returns:
        q_values (torch.Tensor): the Q-Value predictions.
    """
    inputs = torch.cat([obs, actions], dim=-1)
    q_values = self.ncp(inputs)
    return q_values

SACCriticNCPDiscrete

Bases: NCPModule

An NCP Critic Network for the SAC algorithm. Estimates Q-values given states and actions.

Usable with discrete action spaces.

Source code in velora/models/sac/discrete.py
Python
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
class SACCriticNCPDiscrete(NCPModule):
    """
    An NCP Critic Network for the SAC algorithm. Estimates Q-values given
    states and actions.

    Usable with discrete action spaces.
    """

    def __init__(
        self,
        num_obs: int,
        n_neurons: int,
        num_actions: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            num_obs (int): the number of input observations
            n_neurons (int): the number of hidden neurons
            num_actions (int): the number of actions
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(num_obs, n_neurons, num_actions, device=device)

    def forward(self, obs: torch.Tensor) -> torch.Tensor:
        """
        Performs a forward pass through the network.

        Parameters:
            obs (torch.Tensor): the batch of state observations

        Returns:
            q_values (torch.Tensor): the Q-Value predictions.
        """
        q_values = self.ncp(obs)
        return q_values

__init__(num_obs, n_neurons, num_actions, *, device=None)

Parameters:

Name Type Description Default
num_obs int

the number of input observations

required
n_neurons int

the number of hidden neurons

required
num_actions int

the number of actions

required
device torch.device

the device to perform computations on

None
Source code in velora/models/sac/discrete.py
Python
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
def __init__(
    self,
    num_obs: int,
    n_neurons: int,
    num_actions: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        num_obs (int): the number of input observations
        n_neurons (int): the number of hidden neurons
        num_actions (int): the number of actions
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(num_obs, n_neurons, num_actions, device=device)

forward(obs)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
obs torch.Tensor

the batch of state observations

required

Returns:

Name Type Description
q_values torch.Tensor

the Q-Value predictions.

Source code in velora/models/sac/discrete.py
Python
173
174
175
176
177
178
179
180
181
182
183
184
def forward(self, obs: torch.Tensor) -> torch.Tensor:
    """
    Performs a forward pass through the network.

    Parameters:
        obs (torch.Tensor): the batch of state observations

    Returns:
        q_values (torch.Tensor): the Q-Value predictions.
    """
    q_values = self.ncp(obs)
    return q_values