Skip to content

velora.models.lnn

Documentation

Customization: Liquid Networks

Liquid Neural Network building blocks.

NCPLiquidCell

Bases: nn.Module

A Liquid Time-Constant (LTC) cell using a Closed-form (CfC) approach.

The LTC cell follows the closed-form continuous-depth (CFC; Equation 10) solution from the paper: Closed-form Continuous-time Neural Models.

Equation: $$ x(t) = \sigma(-f(x, I, θ_f), t) \; g(x, I, θ_g) + \left[ 1 - \sigma(-[\;f(x, I, θ_f)\;]\;t) \right] \; h(x, I, θ_h) $$

Source code in velora/models/lnn/cell.py
Python
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
class NCPLiquidCell(nn.Module):
    """
    A Liquid Time-Constant (LTC) cell using a Closed-form (CfC) approach.

    The LTC cell follows the closed-form continuous-depth
    (CFC; Equation 10) solution from the paper:
    [Closed-form Continuous-time Neural Models](https://arxiv.org/abs/2106.13898).

    Equation:
    $$
    x(t) =
        \\sigma(-f(x, I, θ_f), t) \\; g(x, I, θ_g)
        + \\left[ 1 - \\sigma(-[\\;f(x, I, θ_f)\\;]\\;t) \\right] \\; h(x, I, θ_h)
    $$
    """

    sparsity_mask: torch.Tensor

    def __init__(
        self,
        in_features: int,
        n_hidden: int,
        mask: torch.Tensor,
        *,
        init_type: str | WeightInitType = "kaiming_uniform",
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            in_features (int): number of input nodes.
            n_hidden (int): number of hidden nodes.
            mask (torch.Tensor): a matrix of sparse connections
                usually containing a combination of `[-1, 1, 0]`.
            init_type (str, optional): the type of weight initialization
            device (torch.device, optional): the device to load tensors on.
        """

        super().__init__()

        self.in_features = in_features
        self.n_hidden = n_hidden
        self.head_size = n_hidden + in_features
        self.init_type = init_type
        self.device = device

        # Absolute to maintain masking (-1 -> 1)
        self.register_buffer("sparsity_mask", self._prep_mask(mask.to(device)))

        self.tanh = nn.Tanh()  # Bounded: [-1, 1]
        self.sigmoid = nn.Sigmoid()  # Bounded: [0, 1]

        self.g_head = self._make_layer()
        self.h_head = self._make_layer()

        # LTC heads (f)
        self.f_head_to_g = self._make_layer()
        self.f_head_to_h = self._make_layer()

        # Hidden state projection
        self.proj = self._make_layer()

    def _make_layer(self) -> SparseLinear:
        """
        Helper method. Creates a new `SparseLinear` layer with the following values:

        - `in_features` - `self.n_hidden + self.in_features`.
        - `out_features` - `self.n_hidden`.
        - `mask` - `self.sparsity_mask`.
        - `device` - `self.device`.

        Returns:
            layer (SparseLinear): a `SparseLinear` layer.
        """
        return SparseLinear(
            self.head_size,
            self.n_hidden,
            self.sparsity_mask,
            init_type=self.init_type,
            device=self.device,
        )

    def _prep_mask(self, mask: torch.Tensor) -> torch.Tensor:
        """
        Utility method. Preprocesses mask to match head size.

        !!! note "Performs three operations"

            1. Adds a padded matrix of 1s to end of mask in shape
                `(n_extras, n_extras)` where `n_extras=mask.shape[1]`
            2. Transposes mask from col matrix -> row matrix
            3. Gets the absolute values of the mask (swapping `-1 -> 1`)

        Parameters:
            mask (torch.Tensor): weight sparsity mask.

        Returns:
            mask (torch.Tensor): an updated mask.
        """
        n_extras = mask.shape[1]
        extra_nodes = torch.ones((n_extras, n_extras), device=self.device)
        mask = torch.concatenate([mask.detach(), extra_nodes])
        return torch.abs(mask.T).to(self.device)

    def _new_hidden(
        self, x: torch.Tensor, g_out: torch.Tensor, h_out: torch.Tensor
    ) -> torch.Tensor:
        """
        Helper method. Computes the new hidden state.

        Parameters:
            x (torch.Tensor): input values.
            g_out (torch.Tensor): g_head output.
            h_out (torch.Tensor): h_head output.

        Returns:
            hidden (torch.Tensor): a new hidden state
        """
        g_head = self.tanh(g_out)  # g(x, I, θ_g)
        h_head = self.tanh(h_out)  # h(x, I, θ_h)

        fh_g = self.f_head_to_g(x)
        fh_h = self.f_head_to_h(x)

        gate_out = self.sigmoid(fh_g + fh_h)  # [1 - σ(-[f(x, I, θf)], t)]
        f_head = 1.0 - gate_out  # σ(-f(x, I, θf), t)

        return g_head * f_head + gate_out * h_head

    def update_mask(self, mask: torch.Tensor) -> None:
        """
        Updates the sparsity mask with a new one.

        Parameters:
            mask (torch.Tensor): new mask
        """
        self.sparsity_mask = self._prep_mask(mask.to(self.device))

    def forward(
        self, x: torch.Tensor, hidden: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Performs a forward pass through the cell.

        Parameters:
            x (torch.Tensor): input values.
            hidden (torch.Tensor): current hidden state.

        Returns:
            y_pred (torch.Tensor): the cell prediction.
            h_state (torch.Tensor): the hidden state.
        """
        x, hidden = x.to(self.device), hidden.to(self.device)
        x = torch.cat([x, hidden], dim=1)

        g_out = self.g_head(x)
        h_out = self.h_head(x)

        new_hidden = self._new_hidden(x, g_out, h_out)
        y_pred = self.proj(x) + new_hidden
        return y_pred, new_hidden

__init__(in_features, n_hidden, mask, *, init_type='kaiming_uniform', device=None)

Parameters:

Name Type Description Default
in_features int

number of input nodes.

required
n_hidden int

number of hidden nodes.

required
mask torch.Tensor

a matrix of sparse connections usually containing a combination of [-1, 1, 0].

required
init_type str

the type of weight initialization

'kaiming_uniform'
device torch.device

the device to load tensors on.

None
Source code in velora/models/lnn/cell.py
Python
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(
    self,
    in_features: int,
    n_hidden: int,
    mask: torch.Tensor,
    *,
    init_type: str | WeightInitType = "kaiming_uniform",
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        in_features (int): number of input nodes.
        n_hidden (int): number of hidden nodes.
        mask (torch.Tensor): a matrix of sparse connections
            usually containing a combination of `[-1, 1, 0]`.
        init_type (str, optional): the type of weight initialization
        device (torch.device, optional): the device to load tensors on.
    """

    super().__init__()

    self.in_features = in_features
    self.n_hidden = n_hidden
    self.head_size = n_hidden + in_features
    self.init_type = init_type
    self.device = device

    # Absolute to maintain masking (-1 -> 1)
    self.register_buffer("sparsity_mask", self._prep_mask(mask.to(device)))

    self.tanh = nn.Tanh()  # Bounded: [-1, 1]
    self.sigmoid = nn.Sigmoid()  # Bounded: [0, 1]

    self.g_head = self._make_layer()
    self.h_head = self._make_layer()

    # LTC heads (f)
    self.f_head_to_g = self._make_layer()
    self.f_head_to_h = self._make_layer()

    # Hidden state projection
    self.proj = self._make_layer()

forward(x, hidden)

Performs a forward pass through the cell.

Parameters:

Name Type Description Default
x torch.Tensor

input values.

required
hidden torch.Tensor

current hidden state.

required

Returns:

Name Type Description
y_pred torch.Tensor

the cell prediction.

h_state torch.Tensor

the hidden state.

Source code in velora/models/lnn/cell.py
Python
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def forward(
    self, x: torch.Tensor, hidden: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Performs a forward pass through the cell.

    Parameters:
        x (torch.Tensor): input values.
        hidden (torch.Tensor): current hidden state.

    Returns:
        y_pred (torch.Tensor): the cell prediction.
        h_state (torch.Tensor): the hidden state.
    """
    x, hidden = x.to(self.device), hidden.to(self.device)
    x = torch.cat([x, hidden], dim=1)

    g_out = self.g_head(x)
    h_out = self.h_head(x)

    new_hidden = self._new_hidden(x, g_out, h_out)
    y_pred = self.proj(x) + new_hidden
    return y_pred, new_hidden

update_mask(mask)

Updates the sparsity mask with a new one.

Parameters:

Name Type Description Default
mask torch.Tensor

new mask

required
Source code in velora/models/lnn/cell.py
Python
138
139
140
141
142
143
144
145
def update_mask(self, mask: torch.Tensor) -> None:
    """
    Updates the sparsity mask with a new one.

    Parameters:
        mask (torch.Tensor): new mask
    """
    self.sparsity_mask = self._prep_mask(mask.to(self.device))

LiquidNCPNetwork

Bases: nn.Module

A CfC Liquid Neural Circuit Policy (NCP) Network with three layers:

  1. Inter (input) - a SparseLinear layer
  2. Command (hidden) - a NCPLiquidCell layer
  3. Motor (output) - a SparseLinear layer
Decision nodes

inter and command neurons are automatically calculated using:

Python
1
2
command_neurons = max(int(0.4 * n_neurons), 1)
inter_neurons = n_neurons - command_neurons

Combines a Liquid Time-Constant (LTC) cell with Ordinary Neural Circuits (ONCs). Paper references:

Source code in velora/models/lnn/ncp.py
Python
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class LiquidNCPNetwork(nn.Module):
    """
    A CfC Liquid Neural Circuit Policy (NCP) Network with three layers:

    1. Inter (input) - a `SparseLinear` layer
    2. Command (hidden) - a `NCPLiquidCell` layer
    3. Motor (output) - a `SparseLinear` layer

    ??? note "Decision nodes"

        `inter` and `command` neurons are automatically calculated using:

        ```python
        command_neurons = max(int(0.4 * n_neurons), 1)
        inter_neurons = n_neurons - command_neurons
        ```

    Combines a Liquid Time-Constant (LTC) cell with Ordinary Neural Circuits (ONCs). Paper references:

    - [Closed-form Continuous-time Neural Models](https://arxiv.org/abs/2106.13898)
    - [Reinforcement Learning with Ordinary Neural Circuits](https://proceedings.mlr.press/v119/hasani20a.html)
    """

    def __init__(
        self,
        in_features: int,
        n_neurons: int,
        out_features: int,
        *,
        sparsity_level: float = 0.5,
        init_type: str | WeightInitType = "kaiming_uniform",
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            in_features (int): number of inputs (sensory nodes)
            n_neurons (int): number of decision nodes (inter and command nodes)
            out_features (int): number of out features (motor nodes)
            sparsity_level (float, optional): controls the connection sparsity
                between neurons.

                Must be a value between `[0.1, 0.9]` -

                - When `0.1` neurons are very dense.
                - When `0.9` they are very sparse.

            init_type (str, optional): the type of weight initialization
            device (torch.device, optional): the device to load tensors on
        """
        super().__init__()

        self.in_features = in_features
        self.n_neurons = n_neurons
        self.out_features = out_features
        self.device = device

        self.n_units = n_neurons + out_features  # inter + command + motor

        self.wiring = Wiring(
            in_features,
            n_neurons,
            out_features,
            sparsity_level=sparsity_level,
        )
        self.masks, self.counts = self.wiring.data()

        self.inter = SparseLinear(
            in_features,
            self.counts.inter,
            torch.abs(self.masks.inter.T),
            init_type=init_type,
            device=device,
        ).to(device)

        self.command = NCPLiquidCell(
            self.counts.inter,
            self.counts.command,
            self.masks.command,
            init_type=init_type,
            device=device,
        ).to(device)
        self.hidden_size = self.counts.command

        self.motor = SparseLinear(
            self.counts.command,
            self.counts.motor,
            torch.abs(self.masks.motor.T),
            init_type=init_type,
            device=device,
        ).to(device)

        self.act = nn.Mish()

        self._total_params = total_parameters(self)
        self._active_params = active_parameters(self)

    @property
    def total_params(self) -> int:
        """
        Gets the network's total parameter count.

        Returns:
            count (int): the total parameter count.
        """
        return self._total_params

    @property
    def active_params(self) -> int:
        """
        Gets the network's active parameter count.

        Returns:
            count (int): the active parameter count.
        """
        return self._active_params

    def forward(
        self, x: torch.Tensor, h_state: Optional[torch.Tensor] = None
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Performs a forward pass through the network.

        Parameters:
            x (torch.Tensor): an input tensor of shape: `(batch_size, features)`.

                - `batch_size` the number of samples per timestep.
                - `features` the features at each timestep (e.g.,
                image features, joint coordinates, word embeddings, raw amplitude
                values).
            h_state (torch.Tensor, optional): initial hidden state of the RNN with
                shape: `(batch_size, n_units)`.

                - `batch_size` the number of samples.
                - `n_units` the total number of hidden neurons
                    (`n_neurons + out_features`).

        Returns:
            y_pred (torch.Tensor): the network prediction. When `batch_size=1`. Out shape is `(out_features)`. Otherwise, `(batch_size, out_features)`.
            h_state (torch.Tensor): the final hidden state. Output shape is `(batch_size, n_units)`.
        """
        if x.dim() != 2:
            raise ValueError(
                f"Unsupported dimensionality: '{x.shape}'. Should be 2 dimensional with: '(batch_size, features)'."
            )

        x = x.to(dtype=torch.float32, device=self.device)

        batch_size, features = x.size()

        if h_state is None:
            h_state = torch.zeros(
                (batch_size, self.hidden_size),
                device=self.device,
            )

        # Batch -> (batch_size, out_features)
        x = self.act(self.inter(x))
        x, h_state = self.command(x, h_state.to(self.device))
        y_pred: torch.Tensor = self.motor(self.act(x))

        # Single item -> (out_features)
        if y_pred.shape[0] == 1:
            y_pred = y_pred.squeeze(0)

        # h_state -> (batch_size, n_units)
        return y_pred, h_state

active_params property

Gets the network's active parameter count.

Returns:

Name Type Description
count int

the active parameter count.

total_params property

Gets the network's total parameter count.

Returns:

Name Type Description
count int

the total parameter count.

__init__(in_features, n_neurons, out_features, *, sparsity_level=0.5, init_type='kaiming_uniform', device=None)

Parameters:

Name Type Description Default
in_features int

number of inputs (sensory nodes)

required
n_neurons int

number of decision nodes (inter and command nodes)

required
out_features int

number of out features (motor nodes)

required
sparsity_level float

controls the connection sparsity between neurons.

Must be a value between [0.1, 0.9] -

  • When 0.1 neurons are very dense.
  • When 0.9 they are very sparse.
0.5
init_type str

the type of weight initialization

'kaiming_uniform'
device torch.device

the device to load tensors on

None
Source code in velora/models/lnn/ncp.py
Python
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def __init__(
    self,
    in_features: int,
    n_neurons: int,
    out_features: int,
    *,
    sparsity_level: float = 0.5,
    init_type: str | WeightInitType = "kaiming_uniform",
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        in_features (int): number of inputs (sensory nodes)
        n_neurons (int): number of decision nodes (inter and command nodes)
        out_features (int): number of out features (motor nodes)
        sparsity_level (float, optional): controls the connection sparsity
            between neurons.

            Must be a value between `[0.1, 0.9]` -

            - When `0.1` neurons are very dense.
            - When `0.9` they are very sparse.

        init_type (str, optional): the type of weight initialization
        device (torch.device, optional): the device to load tensors on
    """
    super().__init__()

    self.in_features = in_features
    self.n_neurons = n_neurons
    self.out_features = out_features
    self.device = device

    self.n_units = n_neurons + out_features  # inter + command + motor

    self.wiring = Wiring(
        in_features,
        n_neurons,
        out_features,
        sparsity_level=sparsity_level,
    )
    self.masks, self.counts = self.wiring.data()

    self.inter = SparseLinear(
        in_features,
        self.counts.inter,
        torch.abs(self.masks.inter.T),
        init_type=init_type,
        device=device,
    ).to(device)

    self.command = NCPLiquidCell(
        self.counts.inter,
        self.counts.command,
        self.masks.command,
        init_type=init_type,
        device=device,
    ).to(device)
    self.hidden_size = self.counts.command

    self.motor = SparseLinear(
        self.counts.command,
        self.counts.motor,
        torch.abs(self.masks.motor.T),
        init_type=init_type,
        device=device,
    ).to(device)

    self.act = nn.Mish()

    self._total_params = total_parameters(self)
    self._active_params = active_parameters(self)

forward(x, h_state=None)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
x torch.Tensor

an input tensor of shape: (batch_size, features).

  • batch_size the number of samples per timestep.
  • features the features at each timestep (e.g., image features, joint coordinates, word embeddings, raw amplitude values).
required
h_state torch.Tensor

initial hidden state of the RNN with shape: (batch_size, n_units).

  • batch_size the number of samples.
  • n_units the total number of hidden neurons (n_neurons + out_features).
None

Returns:

Name Type Description
y_pred torch.Tensor

the network prediction. When batch_size=1. Out shape is (out_features). Otherwise, (batch_size, out_features).

h_state torch.Tensor

the final hidden state. Output shape is (batch_size, n_units).

Source code in velora/models/lnn/ncp.py
Python
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def forward(
    self, x: torch.Tensor, h_state: Optional[torch.Tensor] = None
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Performs a forward pass through the network.

    Parameters:
        x (torch.Tensor): an input tensor of shape: `(batch_size, features)`.

            - `batch_size` the number of samples per timestep.
            - `features` the features at each timestep (e.g.,
            image features, joint coordinates, word embeddings, raw amplitude
            values).
        h_state (torch.Tensor, optional): initial hidden state of the RNN with
            shape: `(batch_size, n_units)`.

            - `batch_size` the number of samples.
            - `n_units` the total number of hidden neurons
                (`n_neurons + out_features`).

    Returns:
        y_pred (torch.Tensor): the network prediction. When `batch_size=1`. Out shape is `(out_features)`. Otherwise, `(batch_size, out_features)`.
        h_state (torch.Tensor): the final hidden state. Output shape is `(batch_size, n_units)`.
    """
    if x.dim() != 2:
        raise ValueError(
            f"Unsupported dimensionality: '{x.shape}'. Should be 2 dimensional with: '(batch_size, features)'."
        )

    x = x.to(dtype=torch.float32, device=self.device)

    batch_size, features = x.size()

    if h_state is None:
        h_state = torch.zeros(
            (batch_size, self.hidden_size),
            device=self.device,
        )

    # Batch -> (batch_size, out_features)
    x = self.act(self.inter(x))
    x, h_state = self.command(x, h_state.to(self.device))
    y_pred: torch.Tensor = self.motor(self.act(x))

    # Single item -> (out_features)
    if y_pred.shape[0] == 1:
        y_pred = y_pred.squeeze(0)

    # h_state -> (batch_size, n_units)
    return y_pred, h_state

NCPNetwork

Bases: nn.Module

A Neural Circuit Policy (NCP) Network with three layers:

  1. Inter (input) - a SparseLinear layer
  2. Command (hidden) - a SparseLinear layer
  3. Motor (output) - a SparseLinear layer

Uses the Mish activation function between each layer.

Decision nodes

inter and command neurons are automatically calculated using:

Python
1
2
command_neurons = max(int(0.4 * n_neurons), 1)
inter_neurons = n_neurons - command_neurons

Uses an Ordinary Neural Circuit (ONC) architecture without Liquid dynamics. Paper references:

Source code in velora/models/lnn/ncp.py
Python
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
class NCPNetwork(nn.Module):
    """
    A Neural Circuit Policy (NCP) Network with three layers:

    1. Inter (input) - a `SparseLinear` layer
    2. Command (hidden) - a `SparseLinear` layer
    3. Motor (output) - a `SparseLinear` layer

    Uses the Mish activation function between each layer.

    ??? note "Decision nodes"

        `inter` and `command` neurons are automatically calculated using:

        ```python
        command_neurons = max(int(0.4 * n_neurons), 1)
        inter_neurons = n_neurons - command_neurons
        ```

    Uses an Ordinary Neural Circuit (ONC) architecture without Liquid dynamics.
    Paper references:

    - [Reinforcement Learning with Ordinary Neural Circuits](https://proceedings.mlr.press/v119/hasani20a.html)
    - [Mish: A Self Regularized Non-Monotonic Activation Function](https://arxiv.org/abs/1908.08681)
    """

    def __init__(
        self,
        in_features: int,
        n_neurons: int,
        out_features: int,
        *,
        sparsity_level: float = 0.5,
        init_type: str | WeightInitType = "kaiming_uniform",
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            in_features (int): number of inputs (sensory nodes)
            n_neurons (int): number of decision nodes (inter and command nodes)
            out_features (int): number of out features (motor nodes)
            sparsity_level (float, optional): controls the connection sparsity
                between neurons.

                Must be a value between `[0.1, 0.9]` -

                - When `0.1` neurons are very dense.
                - When `0.9` they are very sparse.

            init_type (str, optional): the type of weight initialization
            device (torch.device, optional): the device to load tensors on
        """
        super().__init__()

        self.in_features = in_features
        self.n_neurons = n_neurons
        self.out_features = out_features
        self.device = device

        self.n_units = n_neurons + out_features  # inter + command + motor

        self.wiring = Wiring(
            in_features,
            n_neurons,
            out_features,
            sparsity_level=sparsity_level,
        )
        self.masks, self.counts = self.wiring.data()

        self.ncp = nn.Sequential(
            SparseLinear(
                in_features,
                self.counts.inter,
                torch.abs(self.masks.inter.T),
                init_type=init_type,
                device=device,
            ),
            nn.Mish(),
            SparseLinear(
                self.counts.inter,
                self.counts.command,
                torch.abs(self.masks.command.T),
                init_type=init_type,
                device=device,
            ),
            nn.Mish(),
            SparseLinear(
                self.counts.command,
                self.counts.motor,
                torch.abs(self.masks.motor.T),
                init_type=init_type,
                device=device,
            ),
        ).to(device)

        self._total_params = total_parameters(self)
        self._active_params = active_parameters(self)

    @property
    def total_params(self) -> int:
        """
        Gets the network's total parameter count.

        Returns:
            count (int): the total parameter count.
        """
        return self._total_params

    @property
    def active_params(self) -> int:
        """
        Gets the network's active parameter count.

        Returns:
            count (int): the active parameter count.
        """
        return self._active_params

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Performs a forward pass through the network.

        Parameters:
            x (torch.Tensor): an input tensor of shape: `(batch_size, features)`.

                - `batch_size` the number of samples per timestep.
                - `features` the features at each timestep (e.g.,
                image features, joint coordinates, word embeddings, raw amplitude
                values).

        Returns:
            y_pred (torch.Tensor): the network prediction. When `batch_size=1`. Out shape is `(out_features)`. Otherwise, `(batch_size, out_features)`.
        """
        if x.dim() != 2:
            raise ValueError(
                f"Unsupported dimensionality: '{x.shape}'. Should be 2 dimensional with: '(batch_size, features)'."
            )

        x = x.to(dtype=torch.float32, device=self.device)

        # Batch -> (batch_size, out_features)
        y_pred: torch.Tensor = self.ncp(x)

        # Single item -> (out_features)
        if y_pred.shape[0] == 1:
            y_pred = y_pred.squeeze(0)

        return y_pred

active_params property

Gets the network's active parameter count.

Returns:

Name Type Description
count int

the active parameter count.

total_params property

Gets the network's total parameter count.

Returns:

Name Type Description
count int

the total parameter count.

__init__(in_features, n_neurons, out_features, *, sparsity_level=0.5, init_type='kaiming_uniform', device=None)

Parameters:

Name Type Description Default
in_features int

number of inputs (sensory nodes)

required
n_neurons int

number of decision nodes (inter and command nodes)

required
out_features int

number of out features (motor nodes)

required
sparsity_level float

controls the connection sparsity between neurons.

Must be a value between [0.1, 0.9] -

  • When 0.1 neurons are very dense.
  • When 0.9 they are very sparse.
0.5
init_type str

the type of weight initialization

'kaiming_uniform'
device torch.device

the device to load tensors on

None
Source code in velora/models/lnn/ncp.py
Python
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def __init__(
    self,
    in_features: int,
    n_neurons: int,
    out_features: int,
    *,
    sparsity_level: float = 0.5,
    init_type: str | WeightInitType = "kaiming_uniform",
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        in_features (int): number of inputs (sensory nodes)
        n_neurons (int): number of decision nodes (inter and command nodes)
        out_features (int): number of out features (motor nodes)
        sparsity_level (float, optional): controls the connection sparsity
            between neurons.

            Must be a value between `[0.1, 0.9]` -

            - When `0.1` neurons are very dense.
            - When `0.9` they are very sparse.

        init_type (str, optional): the type of weight initialization
        device (torch.device, optional): the device to load tensors on
    """
    super().__init__()

    self.in_features = in_features
    self.n_neurons = n_neurons
    self.out_features = out_features
    self.device = device

    self.n_units = n_neurons + out_features  # inter + command + motor

    self.wiring = Wiring(
        in_features,
        n_neurons,
        out_features,
        sparsity_level=sparsity_level,
    )
    self.masks, self.counts = self.wiring.data()

    self.ncp = nn.Sequential(
        SparseLinear(
            in_features,
            self.counts.inter,
            torch.abs(self.masks.inter.T),
            init_type=init_type,
            device=device,
        ),
        nn.Mish(),
        SparseLinear(
            self.counts.inter,
            self.counts.command,
            torch.abs(self.masks.command.T),
            init_type=init_type,
            device=device,
        ),
        nn.Mish(),
        SparseLinear(
            self.counts.command,
            self.counts.motor,
            torch.abs(self.masks.motor.T),
            init_type=init_type,
            device=device,
        ),
    ).to(device)

    self._total_params = total_parameters(self)
    self._active_params = active_parameters(self)

forward(x)

Performs a forward pass through the network.

Parameters:

Name Type Description Default
x torch.Tensor

an input tensor of shape: (batch_size, features).

  • batch_size the number of samples per timestep.
  • features the features at each timestep (e.g., image features, joint coordinates, word embeddings, raw amplitude values).
required

Returns:

Name Type Description
y_pred torch.Tensor

the network prediction. When batch_size=1. Out shape is (out_features). Otherwise, (batch_size, out_features).

Source code in velora/models/lnn/ncp.py
Python
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def forward(self, x: torch.Tensor) -> torch.Tensor:
    """
    Performs a forward pass through the network.

    Parameters:
        x (torch.Tensor): an input tensor of shape: `(batch_size, features)`.

            - `batch_size` the number of samples per timestep.
            - `features` the features at each timestep (e.g.,
            image features, joint coordinates, word embeddings, raw amplitude
            values).

    Returns:
        y_pred (torch.Tensor): the network prediction. When `batch_size=1`. Out shape is `(out_features)`. Otherwise, `(batch_size, out_features)`.
    """
    if x.dim() != 2:
        raise ValueError(
            f"Unsupported dimensionality: '{x.shape}'. Should be 2 dimensional with: '(batch_size, features)'."
        )

    x = x.to(dtype=torch.float32, device=self.device)

    # Batch -> (batch_size, out_features)
    y_pred: torch.Tensor = self.ncp(x)

    # Single item -> (out_features)
    if y_pred.shape[0] == 1:
        y_pred = y_pred.squeeze(0)

    return y_pred

SparseLinear

Bases: nn.Module

A torch.nn.Linear layer with sparsely weighted connections.

Source code in velora/models/lnn/sparse.py
Python
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class SparseLinear(nn.Module):
    """A `torch.nn.Linear` layer with sparsely weighted connections."""

    bias: torch.Tensor
    mask: torch.Tensor

    def __init__(
        self,
        in_features: int,
        out_features: int,
        mask: torch.Tensor,
        *,
        init_type: str | WeightInitType = "kaiming_uniform",
        bias: bool = True,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            in_features (int): number of input features
            out_features (int): number of output features
            mask (torch.Tensor): sparsity mask tensor of shape
                `(out_features, in_features)`
            init_type (str, optional): the type of weight initialization
            bias (bool, optional): a flag to enable additive bias
            device (torch.device, optional): device to perform computations on
        """
        super().__init__()

        self.in_features = in_features
        self.out_features = out_features
        self.device = device

        self.register_buffer("mask", mask.to(device).detach())

        weight = torch.empty((out_features, in_features), device=device)
        self.weight = nn.Parameter(weight)

        if bias:
            self.bias = nn.Parameter(torch.empty(out_features, device=device))
        else:
            self.register_parameter("bias", None)

        self.reset_parameters(init_type)

        with torch.no_grad():
            self.weight.data.mul_(self.mask)

    def reset_parameters(self, style: str | WeightInitType) -> None:
        """
        Initializes weights and biases using an initialization method.
        """
        weight_fn = get_init_fn(style)
        weight_fn(self)

    def update_mask(self, mask: torch.Tensor) -> None:
        """
        Updates the sparsity mask with a new one.

        Parameters:
            mask (torch.Tensor): new mask
        """
        self.mask = mask.to(self.device).detach()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Perform a forward pass through the layer.

        Parameters:
            x (torch.Tensor): input tensor with shape `(..., in_features)`

        Returns:
            y_pred (torch.Tensor): layer prediction with sparsity applied with shape `(..., out_features)`.
        """
        return F.linear(x, self.weight * self.mask, self.bias)

    def extra_repr(self) -> str:
        """String representation of layer parameters."""
        return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"

__init__(in_features, out_features, mask, *, init_type='kaiming_uniform', bias=True, device=None)

Parameters:

Name Type Description Default
in_features int

number of input features

required
out_features int

number of output features

required
mask torch.Tensor

sparsity mask tensor of shape (out_features, in_features)

required
init_type str

the type of weight initialization

'kaiming_uniform'
bias bool

a flag to enable additive bias

True
device torch.device

device to perform computations on

None
Source code in velora/models/lnn/sparse.py
Python
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def __init__(
    self,
    in_features: int,
    out_features: int,
    mask: torch.Tensor,
    *,
    init_type: str | WeightInitType = "kaiming_uniform",
    bias: bool = True,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        in_features (int): number of input features
        out_features (int): number of output features
        mask (torch.Tensor): sparsity mask tensor of shape
            `(out_features, in_features)`
        init_type (str, optional): the type of weight initialization
        bias (bool, optional): a flag to enable additive bias
        device (torch.device, optional): device to perform computations on
    """
    super().__init__()

    self.in_features = in_features
    self.out_features = out_features
    self.device = device

    self.register_buffer("mask", mask.to(device).detach())

    weight = torch.empty((out_features, in_features), device=device)
    self.weight = nn.Parameter(weight)

    if bias:
        self.bias = nn.Parameter(torch.empty(out_features, device=device))
    else:
        self.register_parameter("bias", None)

    self.reset_parameters(init_type)

    with torch.no_grad():
        self.weight.data.mul_(self.mask)

extra_repr()

String representation of layer parameters.

Source code in velora/models/lnn/sparse.py
Python
83
84
85
def extra_repr(self) -> str:
    """String representation of layer parameters."""
    return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"

forward(x)

Perform a forward pass through the layer.

Parameters:

Name Type Description Default
x torch.Tensor

input tensor with shape (..., in_features)

required

Returns:

Name Type Description
y_pred torch.Tensor

layer prediction with sparsity applied with shape (..., out_features).

Source code in velora/models/lnn/sparse.py
Python
71
72
73
74
75
76
77
78
79
80
81
def forward(self, x: torch.Tensor) -> torch.Tensor:
    """
    Perform a forward pass through the layer.

    Parameters:
        x (torch.Tensor): input tensor with shape `(..., in_features)`

    Returns:
        y_pred (torch.Tensor): layer prediction with sparsity applied with shape `(..., out_features)`.
    """
    return F.linear(x, self.weight * self.mask, self.bias)

reset_parameters(style)

Initializes weights and biases using an initialization method.

Source code in velora/models/lnn/sparse.py
Python
55
56
57
58
59
60
def reset_parameters(self, style: str | WeightInitType) -> None:
    """
    Initializes weights and biases using an initialization method.
    """
    weight_fn = get_init_fn(style)
    weight_fn(self)

update_mask(mask)

Updates the sparsity mask with a new one.

Parameters:

Name Type Description Default
mask torch.Tensor

new mask

required
Source code in velora/models/lnn/sparse.py
Python
62
63
64
65
66
67
68
69
def update_mask(self, mask: torch.Tensor) -> None:
    """
    Updates the sparsity mask with a new one.

    Parameters:
        mask (torch.Tensor): new mask
    """
    self.mask = mask.to(self.device).detach()