Skip to content

velora.buffer

Documentation

Customization: Buffers

Storage buffers for all algorithms.

BatchExperience dataclass

Storage container for a batch agent experiences.

Attributes:

Name Type Description
states torch.Tensor

a batch of environment observations

actions torch.Tensor

a batch of agent actions taken in the states

rewards torch.Tensor

a batch of rewards obtained for taking the actions

next_states torch.Tensor

a batch of newly generated environment observations following the actions taken

dones torch.Tensor

a batch of environment completion statuses

hiddens torch.Tensor

a batch of prediction network hidden states (e.g., Actor)

Source code in velora/buffer/experience.py
Python
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
@dataclass
class BatchExperience:
    """
    Storage container for a batch agent experiences.

    Attributes:
        states (torch.Tensor): a batch of environment observations
        actions (torch.Tensor): a batch of agent actions taken in the states
        rewards (torch.Tensor): a batch of rewards obtained for taking the actions
        next_states (torch.Tensor): a batch of newly generated environment
            observations following the actions taken
        dones (torch.Tensor): a batch of environment completion statuses
        hiddens (torch.Tensor): a batch of prediction network hidden states
            (e.g., Actor)
    """

    states: torch.Tensor
    actions: torch.Tensor
    rewards: torch.Tensor
    next_states: torch.Tensor
    dones: torch.Tensor
    hiddens: torch.Tensor

BufferBase

A base class for all buffers.

Stores experiences (states, actions, rewards, next_states, dones) as individual items in tensors.

Source code in velora/buffer/base.py
Python
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
class BufferBase:
    """
    A base class for all buffers.

    Stores experiences `(states, actions, rewards, next_states, dones)` as
    individual items in tensors.
    """

    def __init__(
        self,
        capacity: int,
        state_dim: int,
        action_dim: int,
        hidden_dim: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            capacity (int): the total capacity of the buffer
            state_dim (int): dimension of state observations
            action_dim (int): dimension of actions
            hidden_dim (int): dimension of hidden state
            device (torch.device, optional): the device to perform computations on
        """
        self.capacity = capacity
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.hidden_dim = hidden_dim
        self.device = device

        # Position indicators
        self.position = 0
        self.size = 0

        # Pre-allocate storage
        self.states = torch.zeros((capacity, state_dim), device=device)
        self.actions = torch.zeros((capacity, action_dim), device=device)
        self.rewards = torch.zeros((capacity, 1), device=device)
        self.next_states = torch.zeros((capacity, state_dim), device=device)
        self.dones = torch.zeros((capacity, 1), device=device)
        self.hiddens = torch.zeros((capacity, hidden_dim), device=device)

    def add(
        self,
        state: torch.Tensor,
        action: torch.Tensor,
        reward: float,
        next_state: torch.Tensor,
        done: bool,
        hidden: torch.Tensor,
    ) -> None:
        """
        Adds a single experience to the buffer.

        Parameters:
            state (torch.Tensor): current state observation
            action (torch.Tensor): action taken
            reward (float): reward received
            next_state (torch.Tensor): next state observation
            done (bool): whether the episode ended
            hidden (torch.Tensor): Actor hidden state (prediction network)
        """
        self.states[self.position] = state.to(torch.float32)
        self.actions[self.position] = action
        self.rewards[self.position] = reward
        self.next_states[self.position] = next_state.to(torch.float32)
        self.dones[self.position] = done
        self.hiddens[self.position] = hidden

        # Update position - deque style
        self.position = (self.position + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)

    def add_multi(
        self,
        states: torch.Tensor,
        actions: torch.Tensor,
        rewards: torch.Tensor,
        next_states: torch.Tensor,
        dones: torch.Tensor,
        hiddens: torch.Tensor,
    ) -> None:
        """
        Adds a set of experience to the buffer.

        Parameters:
            states (torch.Tensor): current state observations
            actions (torch.Tensor): action takens
            rewards (torch.Tensor): rewards received
            next_states (torch.Tensor): next state observations
            dones (torch.Tensor): whether the episode ended
            hiddens (torch.Tensor): Actor hidden states (prediction network)
        """
        batch_size = states.shape[0]

        new_position = self.position + batch_size
        indices = torch.arange(self.position, new_position) % self.capacity

        dtype = torch.float32
        rewards = rewards.unsqueeze(-1) if rewards.dim() == 1 else rewards
        dones = dones.unsqueeze(-1) if dones.dim() == 1 else dones
        actions = actions.unsqueeze(-1) if actions.dim() == 1 else actions

        self.states[indices] = states.to(dtype)
        self.actions[indices] = actions.to(dtype)
        self.rewards[indices] = rewards.to(dtype)
        self.next_states[indices] = next_states.to(dtype)
        self.dones[indices] = dones.to(dtype)
        self.hiddens[indices] = hiddens

        # Update position - deque style
        self.position = (new_position) % self.capacity
        self.size = min(self.size + batch_size, self.capacity)

    @abstractmethod
    def sample(self) -> BatchExperience:
        """
        Samples experience from the buffer.

        Returns:
            batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

                All items have the same shape `(batch_size, features)`.
        """
        pass  # pragma: no cover

    def __len__(self) -> int:
        """
        Gets the current size of the buffer.

        Returns:
            size (int): the current size of the buffer.
        """
        return self.size

    def metadata(self) -> Dict[MetaDataKeys, Any]:
        """
        Gets the metadata of the buffer.

        Includes:

        - `capacity` - the maximum capacity of the buffer.
        - `state_dim` - state dimension.
        - `action_dim` - action dimension.
        - `hidden_dim` - hidden state dimension.
        - `position` - current buffer position.
        - `size` - current size of buffer.
        - `device` - the device used for computations.

        Returns:
            metadata (Dict[str, Any]): the buffers metadata
        """
        return {
            "capacity": self.capacity,
            "state_dim": self.state_dim,
            "action_dim": self.action_dim,
            "hidden_dim": self.hidden_dim,
            "position": self.position,
            "size": self.size,
            "device": str(self.device) if self.device else None,
        }

    def state_dict(self) -> Dict[BufferKeys, torch.Tensor]:
        """
        Return a dictionary containing the buffers state.

        Includes:

        - `states` - tensor of states.
        - `actions` - tensor of actions.
        - `rewards` - tensor of rewards.
        - `next_states` - tensor of next states.
        - `dones` - tensor of dones.
        - `hiddens` - tensor of Actor hidden states (prediction network).

        Returns:
            state_dict (Dict[str, torch.Tensor]): the current state of the buffer
        """
        return {
            "states": self.states,
            "actions": self.actions,
            "rewards": self.rewards,
            "next_states": self.next_states,
            "dones": self.dones,
            "hiddens": self.hiddens,
        }

    def save(self, dirpath: str | Path, prefix: str = "buffer_") -> None:
        """
        Saves a buffers `state_dict()` to a `safetensors` file.

        Includes:

        - `<prefix>metadata.json` - the buffers metadata
        - `<prefix>state.safetensors` - the buffer state

        Parameters:
            dirpath (str | Path): the folder path to save the buffer state
            prefix (str, optional): a name prefix for the files
        """
        save_path = Path(dirpath)
        save_path.mkdir(parents=True, exist_ok=True)

        metadata_path = Path(save_path, f"{prefix}metadata").with_suffix(".json")
        buffer_path = Path(save_path, f"{prefix}state").with_suffix(".safetensors")

        save_file(self.state_dict(), buffer_path)

        with metadata_path.open("w") as f:
            f.write(json.dumps(self.metadata(), indent=2))

    @classmethod
    def load(cls, state_path: str | Path, metadata: Dict[MetaDataKeys, Any]) -> Self:
        """
        Restores the buffer from a saved state.

        Parameters:
            state_path (str | Path): the filepath to the buffer state
            metadata (Dict[str, Any]): a dictionary of metadata already
                loaded from a `metadata.json` file

        Returns:
            buffer (Self): a new buffer instance with the saved state restored
        """
        buffer_path = Path(state_path).with_suffix(".safetensors")
        device = metadata["device"] or "cpu"

        # Create new buffer instance
        buffer = cls(
            capacity=metadata["capacity"],
            state_dim=metadata["state_dim"],
            action_dim=metadata["action_dim"],
            hidden_dim=metadata["hidden_dim"],
            device=torch.device(device) if device else None,
        )
        buffer.position = metadata["position"]
        buffer.size = metadata["size"]

        # Load buffer state
        data: Dict[BufferKeys, torch.Tensor] = load_file(buffer_path, device)

        buffer.states = data["states"]
        buffer.actions = data["actions"]
        buffer.rewards = data["rewards"]
        buffer.next_states = data["next_states"]
        buffer.dones = data["dones"]
        buffer.hiddens = data["hiddens"]

        return buffer

__init__(capacity, state_dim, action_dim, hidden_dim, *, device=None)

Parameters:

Name Type Description Default
capacity int

the total capacity of the buffer

required
state_dim int

dimension of state observations

required
action_dim int

dimension of actions

required
hidden_dim int

dimension of hidden state

required
device torch.device

the device to perform computations on

None
Source code in velora/buffer/base.py
Python
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def __init__(
    self,
    capacity: int,
    state_dim: int,
    action_dim: int,
    hidden_dim: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        capacity (int): the total capacity of the buffer
        state_dim (int): dimension of state observations
        action_dim (int): dimension of actions
        hidden_dim (int): dimension of hidden state
        device (torch.device, optional): the device to perform computations on
    """
    self.capacity = capacity
    self.state_dim = state_dim
    self.action_dim = action_dim
    self.hidden_dim = hidden_dim
    self.device = device

    # Position indicators
    self.position = 0
    self.size = 0

    # Pre-allocate storage
    self.states = torch.zeros((capacity, state_dim), device=device)
    self.actions = torch.zeros((capacity, action_dim), device=device)
    self.rewards = torch.zeros((capacity, 1), device=device)
    self.next_states = torch.zeros((capacity, state_dim), device=device)
    self.dones = torch.zeros((capacity, 1), device=device)
    self.hiddens = torch.zeros((capacity, hidden_dim), device=device)

__len__()

Gets the current size of the buffer.

Returns:

Name Type Description
size int

the current size of the buffer.

Source code in velora/buffer/base.py
Python
157
158
159
160
161
162
163
164
def __len__(self) -> int:
    """
    Gets the current size of the buffer.

    Returns:
        size (int): the current size of the buffer.
    """
    return self.size

add(state, action, reward, next_state, done, hidden)

Adds a single experience to the buffer.

Parameters:

Name Type Description Default
state torch.Tensor

current state observation

required
action torch.Tensor

action taken

required
reward float

reward received

required
next_state torch.Tensor

next state observation

required
done bool

whether the episode ended

required
hidden torch.Tensor

Actor hidden state (prediction network)

required
Source code in velora/buffer/base.py
Python
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def add(
    self,
    state: torch.Tensor,
    action: torch.Tensor,
    reward: float,
    next_state: torch.Tensor,
    done: bool,
    hidden: torch.Tensor,
) -> None:
    """
    Adds a single experience to the buffer.

    Parameters:
        state (torch.Tensor): current state observation
        action (torch.Tensor): action taken
        reward (float): reward received
        next_state (torch.Tensor): next state observation
        done (bool): whether the episode ended
        hidden (torch.Tensor): Actor hidden state (prediction network)
    """
    self.states[self.position] = state.to(torch.float32)
    self.actions[self.position] = action
    self.rewards[self.position] = reward
    self.next_states[self.position] = next_state.to(torch.float32)
    self.dones[self.position] = done
    self.hiddens[self.position] = hidden

    # Update position - deque style
    self.position = (self.position + 1) % self.capacity
    self.size = min(self.size + 1, self.capacity)

add_multi(states, actions, rewards, next_states, dones, hiddens)

Adds a set of experience to the buffer.

Parameters:

Name Type Description Default
states torch.Tensor

current state observations

required
actions torch.Tensor

action takens

required
rewards torch.Tensor

rewards received

required
next_states torch.Tensor

next state observations

required
dones torch.Tensor

whether the episode ended

required
hiddens torch.Tensor

Actor hidden states (prediction network)

required
Source code in velora/buffer/base.py
Python
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def add_multi(
    self,
    states: torch.Tensor,
    actions: torch.Tensor,
    rewards: torch.Tensor,
    next_states: torch.Tensor,
    dones: torch.Tensor,
    hiddens: torch.Tensor,
) -> None:
    """
    Adds a set of experience to the buffer.

    Parameters:
        states (torch.Tensor): current state observations
        actions (torch.Tensor): action takens
        rewards (torch.Tensor): rewards received
        next_states (torch.Tensor): next state observations
        dones (torch.Tensor): whether the episode ended
        hiddens (torch.Tensor): Actor hidden states (prediction network)
    """
    batch_size = states.shape[0]

    new_position = self.position + batch_size
    indices = torch.arange(self.position, new_position) % self.capacity

    dtype = torch.float32
    rewards = rewards.unsqueeze(-1) if rewards.dim() == 1 else rewards
    dones = dones.unsqueeze(-1) if dones.dim() == 1 else dones
    actions = actions.unsqueeze(-1) if actions.dim() == 1 else actions

    self.states[indices] = states.to(dtype)
    self.actions[indices] = actions.to(dtype)
    self.rewards[indices] = rewards.to(dtype)
    self.next_states[indices] = next_states.to(dtype)
    self.dones[indices] = dones.to(dtype)
    self.hiddens[indices] = hiddens

    # Update position - deque style
    self.position = (new_position) % self.capacity
    self.size = min(self.size + batch_size, self.capacity)

load(state_path, metadata) classmethod

Restores the buffer from a saved state.

Parameters:

Name Type Description Default
state_path str | Path

the filepath to the buffer state

required
metadata Dict[str, Any]

a dictionary of metadata already loaded from a metadata.json file

required

Returns:

Name Type Description
buffer Self

a new buffer instance with the saved state restored

Source code in velora/buffer/base.py
Python
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
@classmethod
def load(cls, state_path: str | Path, metadata: Dict[MetaDataKeys, Any]) -> Self:
    """
    Restores the buffer from a saved state.

    Parameters:
        state_path (str | Path): the filepath to the buffer state
        metadata (Dict[str, Any]): a dictionary of metadata already
            loaded from a `metadata.json` file

    Returns:
        buffer (Self): a new buffer instance with the saved state restored
    """
    buffer_path = Path(state_path).with_suffix(".safetensors")
    device = metadata["device"] or "cpu"

    # Create new buffer instance
    buffer = cls(
        capacity=metadata["capacity"],
        state_dim=metadata["state_dim"],
        action_dim=metadata["action_dim"],
        hidden_dim=metadata["hidden_dim"],
        device=torch.device(device) if device else None,
    )
    buffer.position = metadata["position"]
    buffer.size = metadata["size"]

    # Load buffer state
    data: Dict[BufferKeys, torch.Tensor] = load_file(buffer_path, device)

    buffer.states = data["states"]
    buffer.actions = data["actions"]
    buffer.rewards = data["rewards"]
    buffer.next_states = data["next_states"]
    buffer.dones = data["dones"]
    buffer.hiddens = data["hiddens"]

    return buffer

metadata()

Gets the metadata of the buffer.

Includes:

  • capacity - the maximum capacity of the buffer.
  • state_dim - state dimension.
  • action_dim - action dimension.
  • hidden_dim - hidden state dimension.
  • position - current buffer position.
  • size - current size of buffer.
  • device - the device used for computations.

Returns:

Name Type Description
metadata Dict[str, Any]

the buffers metadata

Source code in velora/buffer/base.py
Python
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def metadata(self) -> Dict[MetaDataKeys, Any]:
    """
    Gets the metadata of the buffer.

    Includes:

    - `capacity` - the maximum capacity of the buffer.
    - `state_dim` - state dimension.
    - `action_dim` - action dimension.
    - `hidden_dim` - hidden state dimension.
    - `position` - current buffer position.
    - `size` - current size of buffer.
    - `device` - the device used for computations.

    Returns:
        metadata (Dict[str, Any]): the buffers metadata
    """
    return {
        "capacity": self.capacity,
        "state_dim": self.state_dim,
        "action_dim": self.action_dim,
        "hidden_dim": self.hidden_dim,
        "position": self.position,
        "size": self.size,
        "device": str(self.device) if self.device else None,
    }

sample() abstractmethod

Samples experience from the buffer.

Returns:

Name Type Description
batch BatchExperience

an object of samples with the attributes (states, actions, rewards, next_states, dones, hidden).

All items have the same shape (batch_size, features).

Source code in velora/buffer/base.py
Python
145
146
147
148
149
150
151
152
153
154
155
@abstractmethod
def sample(self) -> BatchExperience:
    """
    Samples experience from the buffer.

    Returns:
        batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

            All items have the same shape `(batch_size, features)`.
    """
    pass  # pragma: no cover

save(dirpath, prefix='buffer_')

Saves a buffers state_dict() to a safetensors file.

Includes:

  • <prefix>metadata.json - the buffers metadata
  • <prefix>state.safetensors - the buffer state

Parameters:

Name Type Description Default
dirpath str | Path

the folder path to save the buffer state

required
prefix str

a name prefix for the files

'buffer_'
Source code in velora/buffer/base.py
Python
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
def save(self, dirpath: str | Path, prefix: str = "buffer_") -> None:
    """
    Saves a buffers `state_dict()` to a `safetensors` file.

    Includes:

    - `<prefix>metadata.json` - the buffers metadata
    - `<prefix>state.safetensors` - the buffer state

    Parameters:
        dirpath (str | Path): the folder path to save the buffer state
        prefix (str, optional): a name prefix for the files
    """
    save_path = Path(dirpath)
    save_path.mkdir(parents=True, exist_ok=True)

    metadata_path = Path(save_path, f"{prefix}metadata").with_suffix(".json")
    buffer_path = Path(save_path, f"{prefix}state").with_suffix(".safetensors")

    save_file(self.state_dict(), buffer_path)

    with metadata_path.open("w") as f:
        f.write(json.dumps(self.metadata(), indent=2))

state_dict()

Return a dictionary containing the buffers state.

Includes:

  • states - tensor of states.
  • actions - tensor of actions.
  • rewards - tensor of rewards.
  • next_states - tensor of next states.
  • dones - tensor of dones.
  • hiddens - tensor of Actor hidden states (prediction network).

Returns:

Name Type Description
state_dict Dict[str, torch.Tensor]

the current state of the buffer

Source code in velora/buffer/base.py
Python
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def state_dict(self) -> Dict[BufferKeys, torch.Tensor]:
    """
    Return a dictionary containing the buffers state.

    Includes:

    - `states` - tensor of states.
    - `actions` - tensor of actions.
    - `rewards` - tensor of rewards.
    - `next_states` - tensor of next states.
    - `dones` - tensor of dones.
    - `hiddens` - tensor of Actor hidden states (prediction network).

    Returns:
        state_dict (Dict[str, torch.Tensor]): the current state of the buffer
    """
    return {
        "states": self.states,
        "actions": self.actions,
        "rewards": self.rewards,
        "next_states": self.next_states,
        "dones": self.dones,
        "hiddens": self.hiddens,
    }

ReplayBuffer

Bases: BufferBase

A Buffer for storing agent experiences. Used for Off-Policy agents.

First introduced in Deep RL in the Deep Q-Network paper: Player Atari with Deep Reinforcement Learning.

Source code in velora/buffer/replay.py
Python
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
class ReplayBuffer(BufferBase):
    """
    A Buffer for storing agent experiences. Used for Off-Policy agents.

    First introduced in Deep RL in the Deep Q-Network paper:
    [Player Atari with Deep Reinforcement Learning](https://arxiv.org/abs/1312.5602).
    """

    def __init__(
        self,
        capacity: int,
        state_dim: int,
        action_dim: int,
        hidden_dim: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            capacity (int): the total capacity of the buffer
            state_dim (int): dimension of state observations
            action_dim (int): dimension of actions
            hidden_dim (int): dimension of hidden state
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(capacity, state_dim, action_dim, hidden_dim, device=device)

    def config(self) -> BufferConfig:
        """
        Creates a buffer config model.

        Returns:
            config (BufferConfig): a config model with buffer details.
        """
        return BufferConfig(
            type="ReplayBuffer",
            capacity=self.capacity,
            state_dim=self.state_dim,
            action_dim=self.action_dim,
            hidden_dim=self.hidden_dim,
        )

    @override
    def sample(self, batch_size: int) -> BatchExperience:
        """
        Samples a random batch of experiences from the buffer.

        Parameters:
            batch_size (int): the number of items to sample

        Returns:
            batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

                All items have the same shape `(batch_size, features)`.
        """
        if len(self) < batch_size:
            raise ValueError(
                f"Buffer does not contain enough experiences. Available: {len(self)}, Requested: {batch_size}"
            )

        indices = torch.randint(0, self.size, (batch_size,), device=self.device)

        return BatchExperience(
            states=self.states[indices],
            actions=self.actions[indices],
            rewards=self.rewards[indices],
            next_states=self.next_states[indices],
            dones=self.dones[indices],
            hiddens=self.hiddens[indices],
        )

    def warm(self, agent: "RLModuleAgent", n_samples: int, num_envs: int = 8) -> None:
        """
        Warms the buffer to fill it to a number of samples by generating them
        from an agent using a `vectorized` copy of the environment.

        Parameters:
            agent (Any): the agent to generate samples with
            n_samples (int): the maximum number of samples to generate
            num_envs (int, optional): number of vectorized environments. Cannot
                be smaller than `2`
        """
        if num_envs < 2:
            raise ValueError(f"'{num_envs=}' cannot be smaller than 2.")

        envs = gym.make_vec(
            agent.env.spec.id,
            num_envs=num_envs,
            vectorization_mode="sync",
        )
        envs: gym.vector.SyncVectorEnv = gym.wrappers.vector.NumpyToTorch(
            envs, agent.device
        )

        hidden = None
        states, _ = envs.reset()

        while not len(self) >= n_samples:
            actions, hidden = agent.predict(states, hidden, train_mode=True)
            next_states, rewards, terminated, truncated, _ = envs.step(actions)
            dones = terminated | truncated

            self.add_multi(states, actions, rewards, next_states, dones, hidden)

            states = next_states

        envs.close()

__init__(capacity, state_dim, action_dim, hidden_dim, *, device=None)

Parameters:

Name Type Description Default
capacity int

the total capacity of the buffer

required
state_dim int

dimension of state observations

required
action_dim int

dimension of actions

required
hidden_dim int

dimension of hidden state

required
device torch.device

the device to perform computations on

None
Source code in velora/buffer/replay.py
Python
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    capacity: int,
    state_dim: int,
    action_dim: int,
    hidden_dim: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        capacity (int): the total capacity of the buffer
        state_dim (int): dimension of state observations
        action_dim (int): dimension of actions
        hidden_dim (int): dimension of hidden state
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(capacity, state_dim, action_dim, hidden_dim, device=device)

config()

Creates a buffer config model.

Returns:

Name Type Description
config BufferConfig

a config model with buffer details.

Source code in velora/buffer/replay.py
Python
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def config(self) -> BufferConfig:
    """
    Creates a buffer config model.

    Returns:
        config (BufferConfig): a config model with buffer details.
    """
    return BufferConfig(
        type="ReplayBuffer",
        capacity=self.capacity,
        state_dim=self.state_dim,
        action_dim=self.action_dim,
        hidden_dim=self.hidden_dim,
    )

sample(batch_size)

Samples a random batch of experiences from the buffer.

Parameters:

Name Type Description Default
batch_size int

the number of items to sample

required

Returns:

Name Type Description
batch BatchExperience

an object of samples with the attributes (states, actions, rewards, next_states, dones, hidden).

All items have the same shape (batch_size, features).

Source code in velora/buffer/replay.py
Python
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
@override
def sample(self, batch_size: int) -> BatchExperience:
    """
    Samples a random batch of experiences from the buffer.

    Parameters:
        batch_size (int): the number of items to sample

    Returns:
        batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

            All items have the same shape `(batch_size, features)`.
    """
    if len(self) < batch_size:
        raise ValueError(
            f"Buffer does not contain enough experiences. Available: {len(self)}, Requested: {batch_size}"
        )

    indices = torch.randint(0, self.size, (batch_size,), device=self.device)

    return BatchExperience(
        states=self.states[indices],
        actions=self.actions[indices],
        rewards=self.rewards[indices],
        next_states=self.next_states[indices],
        dones=self.dones[indices],
        hiddens=self.hiddens[indices],
    )

warm(agent, n_samples, num_envs=8)

Warms the buffer to fill it to a number of samples by generating them from an agent using a vectorized copy of the environment.

Parameters:

Name Type Description Default
agent Any

the agent to generate samples with

required
n_samples int

the maximum number of samples to generate

required
num_envs int

number of vectorized environments. Cannot be smaller than 2

8
Source code in velora/buffer/replay.py
Python
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def warm(self, agent: "RLModuleAgent", n_samples: int, num_envs: int = 8) -> None:
    """
    Warms the buffer to fill it to a number of samples by generating them
    from an agent using a `vectorized` copy of the environment.

    Parameters:
        agent (Any): the agent to generate samples with
        n_samples (int): the maximum number of samples to generate
        num_envs (int, optional): number of vectorized environments. Cannot
            be smaller than `2`
    """
    if num_envs < 2:
        raise ValueError(f"'{num_envs=}' cannot be smaller than 2.")

    envs = gym.make_vec(
        agent.env.spec.id,
        num_envs=num_envs,
        vectorization_mode="sync",
    )
    envs: gym.vector.SyncVectorEnv = gym.wrappers.vector.NumpyToTorch(
        envs, agent.device
    )

    hidden = None
    states, _ = envs.reset()

    while not len(self) >= n_samples:
        actions, hidden = agent.predict(states, hidden, train_mode=True)
        next_states, rewards, terminated, truncated, _ = envs.step(actions)
        dones = terminated | truncated

        self.add_multi(states, actions, rewards, next_states, dones, hidden)

        states = next_states

    envs.close()