velora.buffer¶

Documentation

Customization: Buffers

Storage buffers for all algorithms.

`BatchExperience` `dataclass` ¶

Storage container for a batch agent experiences.

Attributes:

Name	Type	Description
`states`	`torch.Tensor`	a batch of environment observations
`actions`	`torch.Tensor`	a batch of agent actions taken in the states
`rewards`	`torch.Tensor`	a batch of rewards obtained for taking the actions
`next_states`	`torch.Tensor`	a batch of newly generated environment observations following the actions taken
`dones`	`torch.Tensor`	a batch of environment completion statuses
`hiddens`	`torch.Tensor`	a batch of prediction network hidden states (e.g., Actor)

Source code in velora/buffer/experience.py

Python
@dataclass
class BatchExperience:
    """
    Storage container for a batch agent experiences.

    Attributes:
        states (torch.Tensor): a batch of environment observations
        actions (torch.Tensor): a batch of agent actions taken in the states
        rewards (torch.Tensor): a batch of rewards obtained for taking the actions
        next_states (torch.Tensor): a batch of newly generated environment
            observations following the actions taken
        dones (torch.Tensor): a batch of environment completion statuses
        hiddens (torch.Tensor): a batch of prediction network hidden states
            (e.g., Actor)
    """

    states: torch.Tensor
    actions: torch.Tensor
    rewards: torch.Tensor
    next_states: torch.Tensor
    dones: torch.Tensor
    hiddens: torch.Tensor

`BufferBase` ¶

A base class for all buffers.

Stores experiences (states, actions, rewards, next_states, dones) as individual items in tensors.

Source code in velora/buffer/base.py

Python
class BufferBase:
    """
    A base class for all buffers.

    Stores experiences `(states, actions, rewards, next_states, dones)` as
    individual items in tensors.
    """

    def __init__(
        self,
        capacity: int,
        state_dim: int,
        action_dim: int,
        hidden_dim: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            capacity (int): the total capacity of the buffer
            state_dim (int): dimension of state observations
            action_dim (int): dimension of actions
            hidden_dim (int): dimension of hidden state
            device (torch.device, optional): the device to perform computations on
        """
        self.capacity = capacity
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.hidden_dim = hidden_dim
        self.device = device

        # Position indicators
        self.position = 0
        self.size = 0

        # Pre-allocate storage
        self.states = torch.zeros((capacity, state_dim), device=device)
        self.actions = torch.zeros((capacity, action_dim), device=device)
        self.rewards = torch.zeros((capacity, 1), device=device)
        self.next_states = torch.zeros((capacity, state_dim), device=device)
        self.dones = torch.zeros((capacity, 1), device=device)
        self.hiddens = torch.zeros((capacity, hidden_dim), device=device)

    def add(
        self,
        state: torch.Tensor,
        action: torch.Tensor,
        reward: float,
        next_state: torch.Tensor,
        done: bool,
        hidden: torch.Tensor,
    ) -> None:
        """
        Adds a single experience to the buffer.

        Parameters:
            state (torch.Tensor): current state observation
            action (torch.Tensor): action taken
            reward (float): reward received
            next_state (torch.Tensor): next state observation
            done (bool): whether the episode ended
            hidden (torch.Tensor): Actor hidden state (prediction network)
        """
        self.states[self.position] = state.to(torch.float32)
        self.actions[self.position] = action
        self.rewards[self.position] = reward
        self.next_states[self.position] = next_state.to(torch.float32)
        self.dones[self.position] = done
        self.hiddens[self.position] = hidden

        # Update position - deque style
        self.position = (self.position + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)

    def add_multi(
        self,
        states: torch.Tensor,
        actions: torch.Tensor,
        rewards: torch.Tensor,
        next_states: torch.Tensor,
        dones: torch.Tensor,
        hiddens: torch.Tensor,
    ) -> None:
        """
        Adds a set of experience to the buffer.

        Parameters:
            states (torch.Tensor): current state observations
            actions (torch.Tensor): action takens
            rewards (torch.Tensor): rewards received
            next_states (torch.Tensor): next state observations
            dones (torch.Tensor): whether the episode ended
            hiddens (torch.Tensor): Actor hidden states (prediction network)
        """
        batch_size = states.shape[0]

        new_position = self.position + batch_size
        indices = torch.arange(self.position, new_position) % self.capacity

        dtype = torch.float32
        rewards = rewards.unsqueeze(-1) if rewards.dim() == 1 else rewards
        dones = dones.unsqueeze(-1) if dones.dim() == 1 else dones
        actions = actions.unsqueeze(-1) if actions.dim() == 1 else actions

        self.states[indices] = states.to(dtype)
        self.actions[indices] = actions.to(dtype)
        self.rewards[indices] = rewards.to(dtype)
        self.next_states[indices] = next_states.to(dtype)
        self.dones[indices] = dones.to(dtype)
        self.hiddens[indices] = hiddens

        # Update position - deque style
        self.position = (new_position) % self.capacity
        self.size = min(self.size + batch_size, self.capacity)

    @abstractmethod
    def sample(self) -> BatchExperience:
        """
        Samples experience from the buffer.

        Returns:
            batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

                All items have the same shape `(batch_size, features)`.
        """
        pass  # pragma: no cover

    def __len__(self) -> int:
        """
        Gets the current size of the buffer.

        Returns:
            size (int): the current size of the buffer.
        """
        return self.size

    def metadata(self) -> Dict[MetaDataKeys, Any]:
        """
        Gets the metadata of the buffer.

        Includes:

        - `capacity` - the maximum capacity of the buffer.
        - `state_dim` - state dimension.
        - `action_dim` - action dimension.
        - `hidden_dim` - hidden state dimension.
        - `position` - current buffer position.
        - `size` - current size of buffer.
        - `device` - the device used for computations.

        Returns:
            metadata (Dict[str, Any]): the buffers metadata
        """
        return {
            "capacity": self.capacity,
            "state_dim": self.state_dim,
            "action_dim": self.action_dim,
            "hidden_dim": self.hidden_dim,
            "position": self.position,
            "size": self.size,
            "device": str(self.device) if self.device else None,
        }

    def state_dict(self) -> Dict[BufferKeys, torch.Tensor]:
        """
        Return a dictionary containing the buffers state.

        Includes:

        - `states` - tensor of states.
        - `actions` - tensor of actions.
        - `rewards` - tensor of rewards.
        - `next_states` - tensor of next states.
        - `dones` - tensor of dones.
        - `hiddens` - tensor of Actor hidden states (prediction network).

        Returns:
            state_dict (Dict[str, torch.Tensor]): the current state of the buffer
        """
        return {
            "states": self.states,
            "actions": self.actions,
            "rewards": self.rewards,
            "next_states": self.next_states,
            "dones": self.dones,
            "hiddens": self.hiddens,
        }

    def save(self, dirpath: str | Path, prefix: str = "buffer_") -> None:
        """
        Saves a buffers `state_dict()` to a `safetensors` file.

        Includes:

        - `<prefix>metadata.json` - the buffers metadata
        - `<prefix>state.safetensors` - the buffer state

        Parameters:
            dirpath (str | Path): the folder path to save the buffer state
            prefix (str, optional): a name prefix for the files
        """
        save_path = Path(dirpath)
        save_path.mkdir(parents=True, exist_ok=True)

        metadata_path = Path(save_path, f"{prefix}metadata").with_suffix(".json")
        buffer_path = Path(save_path, f"{prefix}state").with_suffix(".safetensors")

        save_file(self.state_dict(), buffer_path)

        with metadata_path.open("w") as f:
            f.write(json.dumps(self.metadata(), indent=2))

    @classmethod
    def load(cls, state_path: str | Path, metadata: Dict[MetaDataKeys, Any]) -> Self:
        """
        Restores the buffer from a saved state.

        Parameters:
            state_path (str | Path): the filepath to the buffer state
            metadata (Dict[str, Any]): a dictionary of metadata already
                loaded from a `metadata.json` file

        Returns:
            buffer (Self): a new buffer instance with the saved state restored
        """
        buffer_path = Path(state_path).with_suffix(".safetensors")
        device = metadata["device"] or "cpu"

        # Create new buffer instance
        buffer = cls(
            capacity=metadata["capacity"],
            state_dim=metadata["state_dim"],
            action_dim=metadata["action_dim"],
            hidden_dim=metadata["hidden_dim"],
            device=torch.device(device) if device else None,
        )
        buffer.position = metadata["position"]
        buffer.size = metadata["size"]

        # Load buffer state
        data: Dict[BufferKeys, torch.Tensor] = load_file(buffer_path, device)

        buffer.states = data["states"]
        buffer.actions = data["actions"]
        buffer.rewards = data["rewards"]
        buffer.next_states = data["next_states"]
        buffer.dones = data["dones"]
        buffer.hiddens = data["hiddens"]

        return buffer

`init(capacity, state_dim, action_dim, hidden_dim, *, device=None)` ¶

Parameters:

Name	Type	Description	Default
`capacity`	`int`	the total capacity of the buffer	required
`state_dim`	`int`	dimension of state observations	required
`action_dim`	`int`	dimension of actions	required
`hidden_dim`	`int`	dimension of hidden state	required
`device`	`torch.device`	the device to perform computations on	`None`

Source code in velora/buffer/base.py

Python
def __init__(
    self,
    capacity: int,
    state_dim: int,
    action_dim: int,
    hidden_dim: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        capacity (int): the total capacity of the buffer
        state_dim (int): dimension of state observations
        action_dim (int): dimension of actions
        hidden_dim (int): dimension of hidden state
        device (torch.device, optional): the device to perform computations on
    """
    self.capacity = capacity
    self.state_dim = state_dim
    self.action_dim = action_dim
    self.hidden_dim = hidden_dim
    self.device = device

    # Position indicators
    self.position = 0
    self.size = 0

    # Pre-allocate storage
    self.states = torch.zeros((capacity, state_dim), device=device)
    self.actions = torch.zeros((capacity, action_dim), device=device)
    self.rewards = torch.zeros((capacity, 1), device=device)
    self.next_states = torch.zeros((capacity, state_dim), device=device)
    self.dones = torch.zeros((capacity, 1), device=device)
    self.hiddens = torch.zeros((capacity, hidden_dim), device=device)

`len()` ¶

Gets the current size of the buffer.

Returns:

Name	Type	Description
`size`	`int`	the current size of the buffer.

Source code in velora/buffer/base.py

Python
def __len__(self) -> int:
    """
    Gets the current size of the buffer.

    Returns:
        size (int): the current size of the buffer.
    """
    return self.size

`add(state, action, reward, next_state, done, hidden)` ¶

Adds a single experience to the buffer.

Parameters:

Name	Type	Description	Default
`state`	`torch.Tensor`	current state observation	required
`action`	`torch.Tensor`	action taken	required
`reward`	`float`	reward received	required
`next_state`	`torch.Tensor`	next state observation	required
`done`	`bool`	whether the episode ended	required
`hidden`	`torch.Tensor`	Actor hidden state (prediction network)	required

Source code in velora/buffer/base.py

Python
def add(
    self,
    state: torch.Tensor,
    action: torch.Tensor,
    reward: float,
    next_state: torch.Tensor,
    done: bool,
    hidden: torch.Tensor,
) -> None:
    """
    Adds a single experience to the buffer.

    Parameters:
        state (torch.Tensor): current state observation
        action (torch.Tensor): action taken
        reward (float): reward received
        next_state (torch.Tensor): next state observation
        done (bool): whether the episode ended
        hidden (torch.Tensor): Actor hidden state (prediction network)
    """
    self.states[self.position] = state.to(torch.float32)
    self.actions[self.position] = action
    self.rewards[self.position] = reward
    self.next_states[self.position] = next_state.to(torch.float32)
    self.dones[self.position] = done
    self.hiddens[self.position] = hidden

    # Update position - deque style
    self.position = (self.position + 1) % self.capacity
    self.size = min(self.size + 1, self.capacity)

`add_multi(states, actions, rewards, next_states, dones, hiddens)` ¶

Adds a set of experience to the buffer.

Parameters:

Name	Type	Description	Default
`states`	`torch.Tensor`	current state observations	required
`actions`	`torch.Tensor`	action takens	required
`rewards`	`torch.Tensor`	rewards received	required
`next_states`	`torch.Tensor`	next state observations	required
`dones`	`torch.Tensor`	whether the episode ended	required
`hiddens`	`torch.Tensor`	Actor hidden states (prediction network)	required

Source code in velora/buffer/base.py

Python
def add_multi(
    self,
    states: torch.Tensor,
    actions: torch.Tensor,
    rewards: torch.Tensor,
    next_states: torch.Tensor,
    dones: torch.Tensor,
    hiddens: torch.Tensor,
) -> None:
    """
    Adds a set of experience to the buffer.

    Parameters:
        states (torch.Tensor): current state observations
        actions (torch.Tensor): action takens
        rewards (torch.Tensor): rewards received
        next_states (torch.Tensor): next state observations
        dones (torch.Tensor): whether the episode ended
        hiddens (torch.Tensor): Actor hidden states (prediction network)
    """
    batch_size = states.shape[0]

    new_position = self.position + batch_size
    indices = torch.arange(self.position, new_position) % self.capacity

    dtype = torch.float32
    rewards = rewards.unsqueeze(-1) if rewards.dim() == 1 else rewards
    dones = dones.unsqueeze(-1) if dones.dim() == 1 else dones
    actions = actions.unsqueeze(-1) if actions.dim() == 1 else actions

    self.states[indices] = states.to(dtype)
    self.actions[indices] = actions.to(dtype)
    self.rewards[indices] = rewards.to(dtype)
    self.next_states[indices] = next_states.to(dtype)
    self.dones[indices] = dones.to(dtype)
    self.hiddens[indices] = hiddens

    # Update position - deque style
    self.position = (new_position) % self.capacity
    self.size = min(self.size + batch_size, self.capacity)

`load(state_path, metadata)` `classmethod` ¶

Restores the buffer from a saved state.

Parameters:

Name	Type	Description	Default
`state_path`	`str \| Path`	the filepath to the buffer state	required
`metadata`	`Dict[str, Any]`	a dictionary of metadata already loaded from a `metadata.json` file	required

Returns:

Name	Type	Description
`buffer`	`Self`	a new buffer instance with the saved state restored

Source code in velora/buffer/base.py

Python
@classmethod
def load(cls, state_path: str | Path, metadata: Dict[MetaDataKeys, Any]) -> Self:
    """
    Restores the buffer from a saved state.

    Parameters:
        state_path (str | Path): the filepath to the buffer state
        metadata (Dict[str, Any]): a dictionary of metadata already
            loaded from a `metadata.json` file

    Returns:
        buffer (Self): a new buffer instance with the saved state restored
    """
    buffer_path = Path(state_path).with_suffix(".safetensors")
    device = metadata["device"] or "cpu"

    # Create new buffer instance
    buffer = cls(
        capacity=metadata["capacity"],
        state_dim=metadata["state_dim"],
        action_dim=metadata["action_dim"],
        hidden_dim=metadata["hidden_dim"],
        device=torch.device(device) if device else None,
    )
    buffer.position = metadata["position"]
    buffer.size = metadata["size"]

    # Load buffer state
    data: Dict[BufferKeys, torch.Tensor] = load_file(buffer_path, device)

    buffer.states = data["states"]
    buffer.actions = data["actions"]
    buffer.rewards = data["rewards"]
    buffer.next_states = data["next_states"]
    buffer.dones = data["dones"]
    buffer.hiddens = data["hiddens"]

    return buffer

`metadata()` ¶

Gets the metadata of the buffer.

Includes:

capacity - the maximum capacity of the buffer.
state_dim - state dimension.
action_dim - action dimension.
hidden_dim - hidden state dimension.
position - current buffer position.
size - current size of buffer.
device - the device used for computations.

Returns:

Name	Type	Description
`metadata`	`Dict[str, Any]`	the buffers metadata

Source code in velora/buffer/base.py

Python
def metadata(self) -> Dict[MetaDataKeys, Any]:
    """
    Gets the metadata of the buffer.

    Includes:

    - `capacity` - the maximum capacity of the buffer.
    - `state_dim` - state dimension.
    - `action_dim` - action dimension.
    - `hidden_dim` - hidden state dimension.
    - `position` - current buffer position.
    - `size` - current size of buffer.
    - `device` - the device used for computations.

    Returns:
        metadata (Dict[str, Any]): the buffers metadata
    """
    return {
        "capacity": self.capacity,
        "state_dim": self.state_dim,
        "action_dim": self.action_dim,
        "hidden_dim": self.hidden_dim,
        "position": self.position,
        "size": self.size,
        "device": str(self.device) if self.device else None,
    }

`sample()` `abstractmethod` ¶

Samples experience from the buffer.

Returns:

Name	Type	Description
`batch`	`BatchExperience`	an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`). All items have the same shape `(batch_size, features)`.

Source code in velora/buffer/base.py

Python
@abstractmethod
def sample(self) -> BatchExperience:
    """
    Samples experience from the buffer.

    Returns:
        batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

            All items have the same shape `(batch_size, features)`.
    """
    pass  # pragma: no cover

`save(dirpath, prefix='buffer_')` ¶

Saves a buffers state_dict() to a safetensors file.

Includes:

<prefix>metadata.json - the buffers metadata
<prefix>state.safetensors - the buffer state

Parameters:

Name	Type	Description	Default
`dirpath`	`str \| Path`	the folder path to save the buffer state	required
`prefix`	`str`	a name prefix for the files	`'buffer_'`

Source code in velora/buffer/base.py

Python
def save(self, dirpath: str | Path, prefix: str = "buffer_") -> None:
    """
    Saves a buffers `state_dict()` to a `safetensors` file.

    Includes:

    - `<prefix>metadata.json` - the buffers metadata
    - `<prefix>state.safetensors` - the buffer state

    Parameters:
        dirpath (str | Path): the folder path to save the buffer state
        prefix (str, optional): a name prefix for the files
    """
    save_path = Path(dirpath)
    save_path.mkdir(parents=True, exist_ok=True)

    metadata_path = Path(save_path, f"{prefix}metadata").with_suffix(".json")
    buffer_path = Path(save_path, f"{prefix}state").with_suffix(".safetensors")

    save_file(self.state_dict(), buffer_path)

    with metadata_path.open("w") as f:
        f.write(json.dumps(self.metadata(), indent=2))

`state_dict()` ¶

Return a dictionary containing the buffers state.

Includes:

states - tensor of states.
actions - tensor of actions.
rewards - tensor of rewards.
next_states - tensor of next states.
dones - tensor of dones.
hiddens - tensor of Actor hidden states (prediction network).

Returns:

Name	Type	Description
`state_dict`	`Dict[str, torch.Tensor]`	the current state of the buffer

Source code in velora/buffer/base.py

Python
def state_dict(self) -> Dict[BufferKeys, torch.Tensor]:
    """
    Return a dictionary containing the buffers state.

    Includes:

    - `states` - tensor of states.
    - `actions` - tensor of actions.
    - `rewards` - tensor of rewards.
    - `next_states` - tensor of next states.
    - `dones` - tensor of dones.
    - `hiddens` - tensor of Actor hidden states (prediction network).

    Returns:
        state_dict (Dict[str, torch.Tensor]): the current state of the buffer
    """
    return {
        "states": self.states,
        "actions": self.actions,
        "rewards": self.rewards,
        "next_states": self.next_states,
        "dones": self.dones,
        "hiddens": self.hiddens,
    }

`ReplayBuffer` ¶

Bases: BufferBase

A Buffer for storing agent experiences. Used for Off-Policy agents.

First introduced in Deep RL in the Deep Q-Network paper: Player Atari with Deep Reinforcement Learning.

Source code in velora/buffer/replay.py

Python
class ReplayBuffer(BufferBase):
    """
    A Buffer for storing agent experiences. Used for Off-Policy agents.

    First introduced in Deep RL in the Deep Q-Network paper:
    [Player Atari with Deep Reinforcement Learning](https://arxiv.org/abs/1312.5602).
    """

    def __init__(
        self,
        capacity: int,
        state_dim: int,
        action_dim: int,
        hidden_dim: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            capacity (int): the total capacity of the buffer
            state_dim (int): dimension of state observations
            action_dim (int): dimension of actions
            hidden_dim (int): dimension of hidden state
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(capacity, state_dim, action_dim, hidden_dim, device=device)

    def config(self) -> BufferConfig:
        """
        Creates a buffer config model.

        Returns:
            config (BufferConfig): a config model with buffer details.
        """
        return BufferConfig(
            type="ReplayBuffer",
            capacity=self.capacity,
            state_dim=self.state_dim,
            action_dim=self.action_dim,
            hidden_dim=self.hidden_dim,
        )

    @override
    def sample(self, batch_size: int) -> BatchExperience:
        """
        Samples a random batch of experiences from the buffer.

        Parameters:
            batch_size (int): the number of items to sample

        Returns:
            batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

                All items have the same shape `(batch_size, features)`.
        """
        if len(self) < batch_size:
            raise ValueError(
                f"Buffer does not contain enough experiences. Available: {len(self)}, Requested: {batch_size}"
            )

        indices = torch.randint(0, self.size, (batch_size,), device=self.device)

        return BatchExperience(
            states=self.states[indices],
            actions=self.actions[indices],
            rewards=self.rewards[indices],
            next_states=self.next_states[indices],
            dones=self.dones[indices],
            hiddens=self.hiddens[indices],
        )

    def warm(self, agent: "RLModuleAgent", n_samples: int, num_envs: int = 8) -> None:
        """
        Warms the buffer to fill it to a number of samples by generating them
        from an agent using a `vectorized` copy of the environment.

        Parameters:
            agent (Any): the agent to generate samples with
            n_samples (int): the maximum number of samples to generate
            num_envs (int, optional): number of vectorized environments. Cannot
                be smaller than `2`
        """
        if num_envs < 2:
            raise ValueError(f"'{num_envs=}' cannot be smaller than 2.")

        envs = gym.make_vec(
            agent.env.spec.id,
            num_envs=num_envs,
            vectorization_mode="sync",
        )
        envs: gym.vector.SyncVectorEnv = gym.wrappers.vector.NumpyToTorch(
            envs, agent.device
        )

        hidden = None
        states, _ = envs.reset()

        while not len(self) >= n_samples:
            actions, hidden = agent.predict(states, hidden, train_mode=True)
            next_states, rewards, terminated, truncated, _ = envs.step(actions)
            dones = terminated | truncated

            self.add_multi(states, actions, rewards, next_states, dones, hidden)

            states = next_states

        envs.close()

`init(capacity, state_dim, action_dim, hidden_dim, *, device=None)` ¶

Parameters:

Name	Type	Description	Default
`capacity`	`int`	the total capacity of the buffer	required
`state_dim`	`int`	dimension of state observations	required
`action_dim`	`int`	dimension of actions	required
`hidden_dim`	`int`	dimension of hidden state	required
`device`	`torch.device`	the device to perform computations on	`None`

Source code in velora/buffer/replay.py

Python
def __init__(
    self,
    capacity: int,
    state_dim: int,
    action_dim: int,
    hidden_dim: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        capacity (int): the total capacity of the buffer
        state_dim (int): dimension of state observations
        action_dim (int): dimension of actions
        hidden_dim (int): dimension of hidden state
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(capacity, state_dim, action_dim, hidden_dim, device=device)

`config()` ¶

Creates a buffer config model.

Returns:

Name	Type	Description
`config`	`BufferConfig`	a config model with buffer details.

Source code in velora/buffer/replay.py

Python
def config(self) -> BufferConfig:
    """
    Creates a buffer config model.

    Returns:
        config (BufferConfig): a config model with buffer details.
    """
    return BufferConfig(
        type="ReplayBuffer",
        capacity=self.capacity,
        state_dim=self.state_dim,
        action_dim=self.action_dim,
        hidden_dim=self.hidden_dim,
    )

`sample(batch_size)` ¶

Samples a random batch of experiences from the buffer.

Parameters:

Name	Type	Description	Default
`batch_size`	`int`	the number of items to sample	required

Returns:

Name	Type	Description
`batch`	`BatchExperience`	an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`). All items have the same shape `(batch_size, features)`.

Source code in velora/buffer/replay.py

Python
@override
def sample(self, batch_size: int) -> BatchExperience:
    """
    Samples a random batch of experiences from the buffer.

    Parameters:
        batch_size (int): the number of items to sample

    Returns:
        batch (BatchExperience): an object of samples with the attributes (`states`, `actions`, `rewards`, `next_states`, `dones`, `hidden`).

            All items have the same shape `(batch_size, features)`.
    """
    if len(self) < batch_size:
        raise ValueError(
            f"Buffer does not contain enough experiences. Available: {len(self)}, Requested: {batch_size}"
        )

    indices = torch.randint(0, self.size, (batch_size,), device=self.device)

    return BatchExperience(
        states=self.states[indices],
        actions=self.actions[indices],
        rewards=self.rewards[indices],
        next_states=self.next_states[indices],
        dones=self.dones[indices],
        hiddens=self.hiddens[indices],
    )

`warm(agent, n_samples, num_envs=8)` ¶

Warms the buffer to fill it to a number of samples by generating them from an agent using a vectorized copy of the environment.

Parameters:

Name	Type	Description	Default
`agent`	`Any`	the agent to generate samples with	required
`n_samples`	`int`	the maximum number of samples to generate	required
`num_envs`	`int`	number of vectorized environments. Cannot be smaller than `2`	`8`

Source code in velora/buffer/replay.py

Python
def warm(self, agent: "RLModuleAgent", n_samples: int, num_envs: int = 8) -> None:
    """
    Warms the buffer to fill it to a number of samples by generating them
    from an agent using a `vectorized` copy of the environment.

    Parameters:
        agent (Any): the agent to generate samples with
        n_samples (int): the maximum number of samples to generate
        num_envs (int, optional): number of vectorized environments. Cannot
            be smaller than `2`
    """
    if num_envs < 2:
        raise ValueError(f"'{num_envs=}' cannot be smaller than 2.")

    envs = gym.make_vec(
        agent.env.spec.id,
        num_envs=num_envs,
        vectorization_mode="sync",
    )
    envs: gym.vector.SyncVectorEnv = gym.wrappers.vector.NumpyToTorch(
        envs, agent.device
    )

    hidden = None
    states, _ = envs.reset()

    while not len(self) >= n_samples:
        actions, hidden = agent.predict(states, hidden, train_mode=True)
        next_states, rewards, terminated, truncated, _ = envs.step(actions)
        dones = terminated | truncated

        self.add_multi(states, actions, rewards, next_states, dones, hidden)

        states = next_states

    envs.close()

velora.buffer¶

BatchExperience dataclass ¶

BufferBase ¶

__init__(capacity, state_dim, action_dim, hidden_dim, *, device=None) ¶

__len__() ¶

add(state, action, reward, next_state, done, hidden) ¶

add_multi(states, actions, rewards, next_states, dones, hiddens) ¶

load(state_path, metadata) classmethod ¶

metadata() ¶

sample() abstractmethod ¶

save(dirpath, prefix='buffer_') ¶

state_dict() ¶

ReplayBuffer ¶

__init__(capacity, state_dim, action_dim, hidden_dim, *, device=None) ¶

config() ¶

sample(batch_size) ¶

warm(agent, n_samples, num_envs=8) ¶

`BatchExperience` `dataclass` ¶

`BufferBase` ¶

`init(capacity, state_dim, action_dim, hidden_dim, *, device=None)` ¶

`len()` ¶

`add(state, action, reward, next_state, done, hidden)` ¶

`add_multi(states, actions, rewards, next_states, dones, hiddens)` ¶

`load(state_path, metadata)` `classmethod` ¶

`metadata()` ¶

`sample()` `abstractmethod` ¶

`save(dirpath, prefix='buffer_')` ¶

`state_dict()` ¶

`ReplayBuffer` ¶

`init(capacity, state_dim, action_dim, hidden_dim, *, device=None)` ¶

`config()` ¶

`sample(batch_size)` ¶

`warm(agent, n_samples, num_envs=8)` ¶