Skip to content

velora.models.base

Home to the agent base class for all pre-built agents.

LiquidNCPModule

Bases: nn.Module

A base class for Liquid NCP modules.

Useful for Actor-Critic modules.

Source code in velora/models/base.py
Python
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
class LiquidNCPModule(nn.Module):
    """
    A base class for Liquid NCP modules.

    Useful for Actor-Critic modules.
    """

    def __init__(
        self,
        in_features: int,
        n_neurons: int,
        out_features: int,
        *,
        init_type: str | WeightInitType = "kaiming_uniform",
        device: torch.device | None = None,
    ):
        """
        Parameters:
            in_features (int): the number of input nodes
            n_neurons (int): the number of hidden neurons
            out_features (int): the number of output nodes
            init_type (str, optional): the type of weight initialization
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__()

        self.in_features = in_features
        self.n_neurons = n_neurons
        self.out_features = out_features
        self.device = device

        self.ncp = LiquidNCPNetwork(
            in_features=in_features,
            n_neurons=n_neurons,
            out_features=out_features,
            init_type=init_type,
            device=device,
        ).to(device)

    def config(self) -> ModuleConfig:
        """
        Gets details about the module.

        Returns:
            config (ModuleConfig): a config model containing module details.
        """
        return ModuleConfig(
            active_params=self.ncp.active_params,
            total_params=self.ncp.total_params,
            architecture=summary(self),
        )

__init__(in_features, n_neurons, out_features, *, init_type='kaiming_uniform', device=None)

Parameters:

Name Type Description Default
in_features int

the number of input nodes

required
n_neurons int

the number of hidden neurons

required
out_features int

the number of output nodes

required
init_type str

the type of weight initialization

'kaiming_uniform'
device torch.device

the device to perform computations on

None
Source code in velora/models/base.py
Python
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def __init__(
    self,
    in_features: int,
    n_neurons: int,
    out_features: int,
    *,
    init_type: str | WeightInitType = "kaiming_uniform",
    device: torch.device | None = None,
):
    """
    Parameters:
        in_features (int): the number of input nodes
        n_neurons (int): the number of hidden neurons
        out_features (int): the number of output nodes
        init_type (str, optional): the type of weight initialization
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__()

    self.in_features = in_features
    self.n_neurons = n_neurons
    self.out_features = out_features
    self.device = device

    self.ncp = LiquidNCPNetwork(
        in_features=in_features,
        n_neurons=n_neurons,
        out_features=out_features,
        init_type=init_type,
        device=device,
    ).to(device)

config()

Gets details about the module.

Returns:

Name Type Description
config ModuleConfig

a config model containing module details.

Source code in velora/models/base.py
Python
121
122
123
124
125
126
127
128
129
130
131
132
def config(self) -> ModuleConfig:
    """
    Gets details about the module.

    Returns:
        config (ModuleConfig): a config model containing module details.
    """
    return ModuleConfig(
        active_params=self.ncp.active_params,
        total_params=self.ncp.total_params,
        architecture=summary(self),
    )

NCPModule

Bases: nn.Module

A base class for NCP modules.

Useful for Actor-Critic modules.

Source code in velora/models/base.py
Python
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
class NCPModule(nn.Module):
    """
    A base class for NCP modules.

    Useful for Actor-Critic modules.
    """

    def __init__(
        self,
        in_features: int,
        n_neurons: int,
        out_features: int,
        *,
        init_type: str | WeightInitType = "kaiming_uniform",
        device: torch.device | None = None,
    ):
        """
        Parameters:
            in_features (int): the number of input nodes
            n_neurons (int): the number of hidden neurons
            out_features (int): the number of output nodes
            init_type (str, optional): the type of weight initialization
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__()

        self.in_features = in_features
        self.n_neurons = n_neurons
        self.out_features = out_features
        self.device = device

        self.ncp = NCPNetwork(
            in_features=in_features,
            n_neurons=n_neurons,
            out_features=out_features,
            init_type=init_type,
            device=device,
        ).to(device)

    def config(self) -> ModuleConfig:
        """
        Gets details about the module.

        Returns:
            config (ModuleConfig): a config model containing module details.
        """
        return ModuleConfig(
            active_params=self.ncp.active_params,
            total_params=self.ncp.total_params,
            architecture=summary(self),
        )

__init__(in_features, n_neurons, out_features, *, init_type='kaiming_uniform', device=None)

Parameters:

Name Type Description Default
in_features int

the number of input nodes

required
n_neurons int

the number of hidden neurons

required
out_features int

the number of output nodes

required
init_type str

the type of weight initialization

'kaiming_uniform'
device torch.device

the device to perform computations on

None
Source code in velora/models/base.py
Python
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    in_features: int,
    n_neurons: int,
    out_features: int,
    *,
    init_type: str | WeightInitType = "kaiming_uniform",
    device: torch.device | None = None,
):
    """
    Parameters:
        in_features (int): the number of input nodes
        n_neurons (int): the number of hidden neurons
        out_features (int): the number of output nodes
        init_type (str, optional): the type of weight initialization
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__()

    self.in_features = in_features
    self.n_neurons = n_neurons
    self.out_features = out_features
    self.device = device

    self.ncp = NCPNetwork(
        in_features=in_features,
        n_neurons=n_neurons,
        out_features=out_features,
        init_type=init_type,
        device=device,
    ).to(device)

config()

Gets details about the module.

Returns:

Name Type Description
config ModuleConfig

a config model containing module details.

Source code in velora/models/base.py
Python
68
69
70
71
72
73
74
75
76
77
78
79
def config(self) -> ModuleConfig:
    """
    Gets details about the module.

    Returns:
        config (ModuleConfig): a config model containing module details.
    """
    return ModuleConfig(
        active_params=self.ncp.active_params,
        total_params=self.ncp.total_params,
        architecture=summary(self),
    )

RLModuleAgent

A base class for RL agents that use modules.

Provides a blueprint describing the core methods that agents must have and includes useful utility methods.

Source code in velora/models/base.py
Python
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
class RLModuleAgent:
    """
    A base class for RL agents that use modules.

    Provides a blueprint describing the core methods that agents *must* have and
    includes useful utility methods.
    """

    def __init__(
        self,
        env: gym.Env,
        actor_neurons: int,
        critic_neurons: int,
        buffer_size: int,
        optim: Type[optim.Optimizer],
        device: torch.device | None,
        seed: int | None,
    ) -> None:
        """
        Parameters:
            env (gym.Env): Gymnasium environment to train on
            actor_neurons (int): number of decision nodes (inter and command nodes)
                for the actor
            critic_neurons (int): number of decision nodes (inter and command nodes)
                for the critic
            buffer_size (int): buffer capacity
            device (torch.device, optional): the device to perform computations on
            seed (int, optional): random number seed
        """
        self.env = env
        self.eval_env = add_core_env_wrappers(env, device)
        self.actor_neurons = actor_neurons
        self.critic_neurons = critic_neurons
        self.buffer_size = buffer_size
        self.optim = optim
        self.device = device
        self.seed = set_seed(seed)

        self.action_dim: int = (
            self.env.action_space.n.item()
            if isinstance(self.env.action_space, gym.spaces.Discrete)
            else self.env.action_space.shape[-1]
        )
        self.state_dim: int = self.env.observation_space.shape[-1]

        self.action_scale = None
        self.action_bias = None

        if isinstance(self.env.action_space, gym.spaces.Box):
            self.action_scale = (
                torch.tensor(
                    self.env.action_space.high - self.env.action_space.low,
                    device=device,
                )
                / 2.0
            )
            self.action_bias = (
                torch.tensor(
                    self.env.action_space.high + self.env.action_space.low,
                    device=device,
                )
                / 2.0
            )

        self.config: RLAgentConfig | None = None
        self.buffer: "BufferBase" | None = None

        self.actor: "ActorModule" | None = None
        self.critic: "CriticModule" | None = None

        self.entropy: "EntropyModule" | None = None

        self.active_params = 0
        self.total_params = 0

        self.metadata: Dict[str, Any] = {}

    @abstractmethod
    def train(
        self,
        n_episodes: int,
        max_steps: int,
        window_size: int,
        *args,
        **kwargs,
    ) -> Any:
        pass  # pragma: no cover

    @abstractmethod
    def predict(
        self,
        state: torch.Tensor,
        hidden: torch.Tensor,
        train_mode: bool = False,
        *args,
        **kwargs,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        pass  # pragma: no cover

    @abstractmethod
    def save(
        self,
        dirpath: str | Path,
        *,
        buffer: bool = False,
        config: bool = False,
    ) -> None:
        """
        Saves the current model state into `safetensors` and `json` files.

        !!! warning

            `model_config.json` is stored in the `dirpath.parent`.

        Includes:

        - `model_config.json` - contains the core details of the agent (optional)
        - `metadata.json` - contains the model, optimizer and buffer (optional) metadata
        - `model_state.safetensors` - contains the model weights and biases
        - `optim_state.safetensors` - contains the optimizer states (actor and critic)
        - `buffer_state.safetensors` - contains the buffer state (only if `buffer=True`)

        Parameters:
            dirpath (str | Path): the location to store the model state. Should only
                consist of `folder` names. E.g., `<folder>/<folder>`
            buffer (bool, optional): a flag for storing the buffer state
            config (bool, optional): a flag for storing the model's config
        """
        pass  # pragma: no cover

    @classmethod
    @abstractmethod
    def load(cls, dirpath: str | Path, *, buffer: bool = False) -> Self:
        """
        Creates a new agent instance by loading a saved one from the `dirpath`.
        Also, loads the original training buffer if `buffer=True`.

        These files must exist in the `dirpath`:

        - `metadata.json` - contains the model, optimizer and buffer (optional) metadata
        - `model_state.safetensors` - contains the model weights and biases
        - `optim_state.safetensors` - contains the optimizer states (actor and critic)
        - `buffer_state.safetensors` - contains the buffer state (only if `buffer=True`)

        Parameters:
            dirpath (str | Path): the location to store the model state. Should only
                consist of `folder` names. E.g., `<folder>/<folder>`
            buffer (bool, optional): a flag for storing the buffer state

        Returns:
            agent (Self): a new agent instance with the saved state
        """
        pass  # pragma: no cover

    def state_dict(self) -> Dict[StateDictKeys, Dict[str, Any]]:
        """
        Retrieves the agent's module state dictionaries and splits them into
        categories.

        Returns:
            state_dict (Dict[Literal["modules", "optimizers"], Dict[str, Any]]): the agent's module state dicts categorized.
        """
        final_dict: Dict[StateDictKeys, Dict[str, Any]] = {
            "modules": {},
            "optimizers": {},
        }

        for module in [self.actor, self.critic, self.entropy]:
            if module is not None:
                state_dict: Dict[str, Any] = module.state_dict()

                for key, val in state_dict.items():
                    if "optim" in key:
                        final_dict["optimizers"][key] = val
                    else:
                        final_dict["modules"][key] = val

        return final_dict

    def set_metadata(self, values: Dict[str, Any], seed: int) -> Dict[str, Any]:
        """
        Creates the agents metadata based on a given set of local variables.

        Parameters:
            values (Dict[str, Any]): local variables
            seed (int): randomly generated seed

        Returns:
            metadata (Dict[str, Any]): an updated dictionary of agent metadata.
        """
        metadata = {
            k: v for k, v in values.items() if k not in ["self", "__class__", "env"]
        }
        metadata["device"] = str(self.device) if self.device is not None else "cpu"
        metadata["optim"] = f"torch.optim.{self.optim.__name__}"
        metadata["seed"] = seed

        return metadata

    def _set_train_params(self, params: Dict[str, Any]) -> TrainConfig:
        """
        Helper method. Sets the `train_params` given a dictionary of training parameters.

        Parameters:
            params (Dict[str, Any]): a dictionary of training parameters

        Returns:
            config (TrainConfig): a training config model
        """
        params = dict(
            callbacks=(
                dict(cb.config() for cb in params["callbacks"])
                if params["callbacks"]
                else None
            ),
            **{k: v for k, v in params.items() if k not in ["self", "callbacks"]},
        )
        return TrainConfig(**params)

__init__(env, actor_neurons, critic_neurons, buffer_size, optim, device, seed)

Parameters:

Name Type Description Default
env gym.Env

Gymnasium environment to train on

required
actor_neurons int

number of decision nodes (inter and command nodes) for the actor

required
critic_neurons int

number of decision nodes (inter and command nodes) for the critic

required
buffer_size int

buffer capacity

required
device torch.device

the device to perform computations on

required
seed int

random number seed

required
Source code in velora/models/base.py
Python
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def __init__(
    self,
    env: gym.Env,
    actor_neurons: int,
    critic_neurons: int,
    buffer_size: int,
    optim: Type[optim.Optimizer],
    device: torch.device | None,
    seed: int | None,
) -> None:
    """
    Parameters:
        env (gym.Env): Gymnasium environment to train on
        actor_neurons (int): number of decision nodes (inter and command nodes)
            for the actor
        critic_neurons (int): number of decision nodes (inter and command nodes)
            for the critic
        buffer_size (int): buffer capacity
        device (torch.device, optional): the device to perform computations on
        seed (int, optional): random number seed
    """
    self.env = env
    self.eval_env = add_core_env_wrappers(env, device)
    self.actor_neurons = actor_neurons
    self.critic_neurons = critic_neurons
    self.buffer_size = buffer_size
    self.optim = optim
    self.device = device
    self.seed = set_seed(seed)

    self.action_dim: int = (
        self.env.action_space.n.item()
        if isinstance(self.env.action_space, gym.spaces.Discrete)
        else self.env.action_space.shape[-1]
    )
    self.state_dim: int = self.env.observation_space.shape[-1]

    self.action_scale = None
    self.action_bias = None

    if isinstance(self.env.action_space, gym.spaces.Box):
        self.action_scale = (
            torch.tensor(
                self.env.action_space.high - self.env.action_space.low,
                device=device,
            )
            / 2.0
        )
        self.action_bias = (
            torch.tensor(
                self.env.action_space.high + self.env.action_space.low,
                device=device,
            )
            / 2.0
        )

    self.config: RLAgentConfig | None = None
    self.buffer: "BufferBase" | None = None

    self.actor: "ActorModule" | None = None
    self.critic: "CriticModule" | None = None

    self.entropy: "EntropyModule" | None = None

    self.active_params = 0
    self.total_params = 0

    self.metadata: Dict[str, Any] = {}

load(dirpath, *, buffer=False) abstractmethod classmethod

Creates a new agent instance by loading a saved one from the dirpath. Also, loads the original training buffer if buffer=True.

These files must exist in the dirpath:

  • metadata.json - contains the model, optimizer and buffer (optional) metadata
  • model_state.safetensors - contains the model weights and biases
  • optim_state.safetensors - contains the optimizer states (actor and critic)
  • buffer_state.safetensors - contains the buffer state (only if buffer=True)

Parameters:

Name Type Description Default
dirpath str | Path

the location to store the model state. Should only consist of folder names. E.g., <folder>/<folder>

required
buffer bool

a flag for storing the buffer state

False

Returns:

Name Type Description
agent Self

a new agent instance with the saved state

Source code in velora/models/base.py
Python
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
@classmethod
@abstractmethod
def load(cls, dirpath: str | Path, *, buffer: bool = False) -> Self:
    """
    Creates a new agent instance by loading a saved one from the `dirpath`.
    Also, loads the original training buffer if `buffer=True`.

    These files must exist in the `dirpath`:

    - `metadata.json` - contains the model, optimizer and buffer (optional) metadata
    - `model_state.safetensors` - contains the model weights and biases
    - `optim_state.safetensors` - contains the optimizer states (actor and critic)
    - `buffer_state.safetensors` - contains the buffer state (only if `buffer=True`)

    Parameters:
        dirpath (str | Path): the location to store the model state. Should only
            consist of `folder` names. E.g., `<folder>/<folder>`
        buffer (bool, optional): a flag for storing the buffer state

    Returns:
        agent (Self): a new agent instance with the saved state
    """
    pass  # pragma: no cover

save(dirpath, *, buffer=False, config=False) abstractmethod

Saves the current model state into safetensors and json files.

Warning

model_config.json is stored in the dirpath.parent.

Includes:

  • model_config.json - contains the core details of the agent (optional)
  • metadata.json - contains the model, optimizer and buffer (optional) metadata
  • model_state.safetensors - contains the model weights and biases
  • optim_state.safetensors - contains the optimizer states (actor and critic)
  • buffer_state.safetensors - contains the buffer state (only if buffer=True)

Parameters:

Name Type Description Default
dirpath str | Path

the location to store the model state. Should only consist of folder names. E.g., <folder>/<folder>

required
buffer bool

a flag for storing the buffer state

False
config bool

a flag for storing the model's config

False
Source code in velora/models/base.py
Python
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
@abstractmethod
def save(
    self,
    dirpath: str | Path,
    *,
    buffer: bool = False,
    config: bool = False,
) -> None:
    """
    Saves the current model state into `safetensors` and `json` files.

    !!! warning

        `model_config.json` is stored in the `dirpath.parent`.

    Includes:

    - `model_config.json` - contains the core details of the agent (optional)
    - `metadata.json` - contains the model, optimizer and buffer (optional) metadata
    - `model_state.safetensors` - contains the model weights and biases
    - `optim_state.safetensors` - contains the optimizer states (actor and critic)
    - `buffer_state.safetensors` - contains the buffer state (only if `buffer=True`)

    Parameters:
        dirpath (str | Path): the location to store the model state. Should only
            consist of `folder` names. E.g., `<folder>/<folder>`
        buffer (bool, optional): a flag for storing the buffer state
        config (bool, optional): a flag for storing the model's config
    """
    pass  # pragma: no cover

set_metadata(values, seed)

Creates the agents metadata based on a given set of local variables.

Parameters:

Name Type Description Default
values Dict[str, Any]

local variables

required
seed int

randomly generated seed

required

Returns:

Name Type Description
metadata Dict[str, Any]

an updated dictionary of agent metadata.

Source code in velora/models/base.py
Python
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
def set_metadata(self, values: Dict[str, Any], seed: int) -> Dict[str, Any]:
    """
    Creates the agents metadata based on a given set of local variables.

    Parameters:
        values (Dict[str, Any]): local variables
        seed (int): randomly generated seed

    Returns:
        metadata (Dict[str, Any]): an updated dictionary of agent metadata.
    """
    metadata = {
        k: v for k, v in values.items() if k not in ["self", "__class__", "env"]
    }
    metadata["device"] = str(self.device) if self.device is not None else "cpu"
    metadata["optim"] = f"torch.optim.{self.optim.__name__}"
    metadata["seed"] = seed

    return metadata

state_dict()

Retrieves the agent's module state dictionaries and splits them into categories.

Returns:

Name Type Description
state_dict Dict[Literal['modules', 'optimizers'], Dict[str, Any]]

the agent's module state dicts categorized.

Source code in velora/models/base.py
Python
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def state_dict(self) -> Dict[StateDictKeys, Dict[str, Any]]:
    """
    Retrieves the agent's module state dictionaries and splits them into
    categories.

    Returns:
        state_dict (Dict[Literal["modules", "optimizers"], Dict[str, Any]]): the agent's module state dicts categorized.
    """
    final_dict: Dict[StateDictKeys, Dict[str, Any]] = {
        "modules": {},
        "optimizers": {},
    }

    for module in [self.actor, self.critic, self.entropy]:
        if module is not None:
            state_dict: Dict[str, Any] = module.state_dict()

            for key, val in state_dict.items():
                if "optim" in key:
                    final_dict["optimizers"][key] = val
                else:
                    final_dict["modules"][key] = val

    return final_dict