velora.training¶

Methods and classes dedicated to handling agent training.

`MovingMetric` ¶

Tracks a metric with a moving window for statistics.

Attributes:

Name	Type	Description
`window`	`torch.Tensor`	a list of values for the statistics
`window_size`	`int`	the window size of the moving statistics

Source code in velora/training/metrics.py

Python
class MovingMetric:
    """
    Tracks a metric with a moving window for statistics.

    Attributes:
        window (torch.Tensor): a list of values for the statistics
        window_size (int): the window size of the moving statistics
    """

    def __init__(self, window_size: int, *, device: torch.device | None = None) -> None:
        """
        Parameters:
            window_size (int): the size of the moving window
            device (torch.device, optional): the device to perform computations on
        """
        self.window_size = window_size
        self.device = device

        # Position indicators
        self.position = 0
        self.size = 0

        # Pre-allocated storage
        self.window = torch.zeros((window_size), device=device)

    @property
    def latest(self) -> torch.Tensor:
        """Gets the latest value."""
        latest_pos = (self.position - 1) % self.window_size
        return self.window[latest_pos]

    def add(self, value: torch.Tensor) -> None:
        """
        Adds a value and updates the window.

        Parameters:
            value (torch.Tensor): value to add
        """
        self.window[self.position] = value.to(self.device)

        # Update position - deque style
        self.position = (self.position + 1) % self.window_size
        self.size = min(self.size + 1, self.window_size)

    def mean(self) -> torch.Tensor:
        """
        Calculates the mean of values or the current window.

        Returns:
            avg (torch.Tensor): the calculated mean.
        """
        return self.window.mean()

    def std(self) -> torch.Tensor:
        """
        Calculates the standard deviation of values or the current window.

        Returns:
            std (torch.Tensor): the calculated standard deviation.
        """
        return (
            self.window.std()
            if self.window.size(dim=0) > 1
            else torch.tensor(0.0, device=self.device)
        )

    def max(self) -> torch.Tensor:
        """
        Calculates the maximum value of a set of values or the current window.

        Returns:
            max (torch.Tensor): the maximum value.
        """
        return self.window.max()

    def __len__(self) -> int:
        """Returns the number of items in the values array."""
        return self.size

`latest` `property` ¶

Gets the latest value.

`init(window_size, *, device=None)` ¶

Parameters:

Name	Type	Description	Default
`window_size`	`int`	the size of the moving window	required
`device`	`torch.device`	the device to perform computations on	`None`

Source code in velora/training/metrics.py

Python
def __init__(self, window_size: int, *, device: torch.device | None = None) -> None:
    """
    Parameters:
        window_size (int): the size of the moving window
        device (torch.device, optional): the device to perform computations on
    """
    self.window_size = window_size
    self.device = device

    # Position indicators
    self.position = 0
    self.size = 0

    # Pre-allocated storage
    self.window = torch.zeros((window_size), device=device)

`len()` ¶

Returns the number of items in the values array.

Source code in velora/training/metrics.py

Python
def __len__(self) -> int:
    """Returns the number of items in the values array."""
    return self.size

`add(value)` ¶

Adds a value and updates the window.

Parameters:

Name	Type	Description	Default
`value`	`torch.Tensor`	value to add	required

Source code in velora/training/metrics.py

Python
def add(self, value: torch.Tensor) -> None:
    """
    Adds a value and updates the window.

    Parameters:
        value (torch.Tensor): value to add
    """
    self.window[self.position] = value.to(self.device)

    # Update position - deque style
    self.position = (self.position + 1) % self.window_size
    self.size = min(self.size + 1, self.window_size)

`max()` ¶

Calculates the maximum value of a set of values or the current window.

Returns:

Name	Type	Description
`max`	`torch.Tensor`	the maximum value.

Source code in velora/training/metrics.py

Python
def max(self) -> torch.Tensor:
    """
    Calculates the maximum value of a set of values or the current window.

    Returns:
        max (torch.Tensor): the maximum value.
    """
    return self.window.max()

`mean()` ¶

Calculates the mean of values or the current window.

Returns:

Name	Type	Description
`avg`	`torch.Tensor`	the calculated mean.

Source code in velora/training/metrics.py

Python
def mean(self) -> torch.Tensor:
    """
    Calculates the mean of values or the current window.

    Returns:
        avg (torch.Tensor): the calculated mean.
    """
    return self.window.mean()

`std()` ¶

Calculates the standard deviation of values or the current window.

Returns:

Name	Type	Description
`std`	`torch.Tensor`	the calculated standard deviation.

Source code in velora/training/metrics.py

Python
def std(self) -> torch.Tensor:
    """
    Calculates the standard deviation of values or the current window.

    Returns:
        std (torch.Tensor): the calculated standard deviation.
    """
    return (
        self.window.std()
        if self.window.size(dim=0) > 1
        else torch.tensor(0.0, device=self.device)
    )

`StepStorage` ¶

A storage container for step metrics.

Useful for calculating the episodic average values to store in MetricStorage.

Attributes:

Name	Type	Description
`critic_losses`	`torch.Tensor`	a tensor of agent Critic loss values
`actor_losses`	`torch.Tensor`	a tensor of agent Actor loss values
`entropy_losses`	`torch.Tensor`	a tensor of agent Entropy loss values

Source code in velora/training/metrics.py

Python
class StepStorage:
    """
    A storage container for step metrics.

    Useful for calculating the episodic average values to store in `MetricStorage`.

    Attributes:
        critic_losses (torch.Tensor): a tensor of agent Critic loss values
        actor_losses (torch.Tensor): a tensor of agent Actor loss values
        entropy_losses (torch.Tensor): a tensor of agent Entropy loss values
    """

    def __init__(self, capacity: int, *, device: torch.device | None = None) -> None:
        """
        Parameters:
            capacity (int): storage capacity for each tensor
            device (torch.device, optional): the device to perform computations on
        """
        self.capacity = capacity
        self.device = device

        # Position indicators
        self.position = 0
        self.size = 0

        self.critic_losses = torch.zeros((capacity), device=device)
        self.actor_losses = torch.zeros((capacity), device=device)
        self.entropy_losses = torch.zeros((capacity), device=device)

    def critic_avg(self, ep_length: int) -> torch.Tensor:
        """
        Computes the critic loss average. Useful for computing episodic averages.

        Parameters:
            ep_length (int): size of the episode

        Returns:
            avg (torch.Tensor): critic loss step average
        """
        return self.critic_losses[:ep_length].mean()

    def actor_avg(self, ep_length: int) -> torch.Tensor:
        """
        Computes the actor loss average. Useful for computing episodic averages.

        Parameters:
            ep_length (int): size of the episode

        Returns:
            avg (torch.Tensor): actor loss step average
        """
        return self.actor_losses[:ep_length].mean()

    def entropy_avg(self, ep_length: int) -> torch.Tensor:
        """
        Computes the entropy loss average. Useful for computing episodic averages.

        Parameters:
            ep_length (int): size of the episode

        Returns:
            avg (torch.Tensor): entropy loss step average
        """
        return self.entropy_losses[:ep_length].mean()

    def add(
        self,
        critic: torch.Tensor,
        actor: torch.Tensor,
        entropy: torch.Tensor,
    ) -> None:
        """
        Adds one of each metric into storage.

        Parameters:
            critic (torch.Tensor): critic loss
            actor (torch.Tensor): actor loss
            entropy (torch.Tensor): entropy loss
        """
        self.critic_losses[self.position] = critic
        self.actor_losses[self.position] = actor
        self.entropy_losses[self.position] = entropy

        # Update position
        self.position = (self.position + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)

    def empty(self) -> None:
        """Empty storage."""
        self.critic_losses.zero_()
        self.actor_losses.zero_()
        self.entropy_losses.zero_()

        self.position = 0
        self.size = 0

`init(capacity, *, device=None)` ¶

Parameters:

Name	Type	Description	Default
`capacity`	`int`	storage capacity for each tensor	required
`device`	`torch.device`	the device to perform computations on	`None`

Source code in velora/training/metrics.py

Python
def __init__(self, capacity: int, *, device: torch.device | None = None) -> None:
    """
    Parameters:
        capacity (int): storage capacity for each tensor
        device (torch.device, optional): the device to perform computations on
    """
    self.capacity = capacity
    self.device = device

    # Position indicators
    self.position = 0
    self.size = 0

    self.critic_losses = torch.zeros((capacity), device=device)
    self.actor_losses = torch.zeros((capacity), device=device)
    self.entropy_losses = torch.zeros((capacity), device=device)

`actor_avg(ep_length)` ¶

Computes the actor loss average. Useful for computing episodic averages.

Parameters:

Name	Type	Description	Default
`ep_length`	`int`	size of the episode	required

Returns:

Name	Type	Description
`avg`	`torch.Tensor`	actor loss step average

Source code in velora/training/metrics.py

Python
def actor_avg(self, ep_length: int) -> torch.Tensor:
    """
    Computes the actor loss average. Useful for computing episodic averages.

    Parameters:
        ep_length (int): size of the episode

    Returns:
        avg (torch.Tensor): actor loss step average
    """
    return self.actor_losses[:ep_length].mean()

`add(critic, actor, entropy)` ¶

Adds one of each metric into storage.

Parameters:

Name	Type	Description	Default
`critic`	`torch.Tensor`	critic loss	required
`actor`	`torch.Tensor`	actor loss	required
`entropy`	`torch.Tensor`	entropy loss	required

Source code in velora/training/metrics.py

Python
def add(
    self,
    critic: torch.Tensor,
    actor: torch.Tensor,
    entropy: torch.Tensor,
) -> None:
    """
    Adds one of each metric into storage.

    Parameters:
        critic (torch.Tensor): critic loss
        actor (torch.Tensor): actor loss
        entropy (torch.Tensor): entropy loss
    """
    self.critic_losses[self.position] = critic
    self.actor_losses[self.position] = actor
    self.entropy_losses[self.position] = entropy

    # Update position
    self.position = (self.position + 1) % self.capacity
    self.size = min(self.size + 1, self.capacity)

`critic_avg(ep_length)` ¶

Computes the critic loss average. Useful for computing episodic averages.

Parameters:

Name	Type	Description	Default
`ep_length`	`int`	size of the episode	required

Returns:

Name	Type	Description
`avg`	`torch.Tensor`	critic loss step average

Source code in velora/training/metrics.py

Python
def critic_avg(self, ep_length: int) -> torch.Tensor:
    """
    Computes the critic loss average. Useful for computing episodic averages.

    Parameters:
        ep_length (int): size of the episode

    Returns:
        avg (torch.Tensor): critic loss step average
    """
    return self.critic_losses[:ep_length].mean()

`empty()` ¶

Empty storage.

Source code in velora/training/metrics.py

Python
def empty(self) -> None:
    """Empty storage."""
    self.critic_losses.zero_()
    self.actor_losses.zero_()
    self.entropy_losses.zero_()

    self.position = 0
    self.size = 0

`entropy_avg(ep_length)` ¶

Computes the entropy loss average. Useful for computing episodic averages.

Parameters:

Name	Type	Description	Default
`ep_length`	`int`	size of the episode	required

Returns:

Name	Type	Description
`avg`	`torch.Tensor`	entropy loss step average

Source code in velora/training/metrics.py

Python
def entropy_avg(self, ep_length: int) -> torch.Tensor:
    """
    Computes the entropy loss average. Useful for computing episodic averages.

    Parameters:
        ep_length (int): size of the episode

    Returns:
        avg (torch.Tensor): entropy loss step average
    """
    return self.entropy_losses[:ep_length].mean()

`TrainHandler` ¶

Bases: TrainHandlerBase

A context manager for handling an agents training state. Compatible with single environments.

Source code in velora/training/handler.py

Python
class TrainHandler(TrainHandlerBase):
    """
    A context manager for handling an agents training state. Compatible with single
    environments.
    """

    def __init__(
        self,
        agent: RLModuleAgent,
        n_episodes: int,
        max_steps: int,
        log_freq: int,
        window_size: int,
        callbacks: List["TrainCallback"] | None,
    ) -> None:
        """
        Parameters:
            agent (RLModuleAgent): the agent being trained
            n_episodes (int): the total number of training episodes
            max_steps (int): maximum number of steps in an episode
            log_freq (int): metric logging frequency (in episodes)
            window_size (int): episode window size rate
            callbacks (List[TrainCallback] | None): a list of training callbacks.
                If `None` sets to an empty list
        """
        super().__init__(agent, window_size, callbacks)

        self.log_freq = log_freq
        self.n_episodes = n_episodes
        self.max_steps = max_steps

    @property
    def metrics(self) -> TrainMetrics:
        """
        Training metric class instance.

        Returns:
            metrics (TrainMetrics): current training metric state.
        """
        return self._metrics

    def __enter__(self) -> Self:
        """
        Setup the training context, initializing the environment.

        Returns:
            self (Self): the initialized context.
        """
        self.session = Session(self.engine)
        self._metrics = TrainMetrics(
            self.session,
            self.window_size,
            self.n_episodes,
            self.max_steps,
            device=self.device,
        )
        self._metrics.start_experiment(self.agent.config)

        self.state = TrainState(
            agent=self.agent,
            env=self.env,
            session=self.session,
            total_episodes=self.n_episodes,
            experiment_id=self._metrics.experiment_id,
        )

        return super().__enter__()

    def __exit__(
        self,
        exc_type: Type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ):
        super().__exit__(exc_type, exc_val, exc_tb)

        if self.state.saving_enabled:
            self.save_completed()

    def start(self) -> None:
        super().start()

        # Update environment with callback wrappers
        self.env = self.state.env

    def step(self, current_step: int) -> None:
        """
        Performs `step` callback event.

        Parameters:
            current_step (int): the current training timestep index
        """
        self.state.update(status="step", current_step=current_step)
        self._run_callbacks()

    def log(self, idx: int, log_type: Literal["episode", "step"]) -> None:
        """
        Performs `logging` callback event.

        Parameters:
            idx (int): the current training step or episode index
            log_type (str): the type of logging method
        """
        if log_type == "episode":
            self.state.update(status="logging", current_ep=idx, logging_type=log_type)
        else:
            self.state.update(status="logging", current_step=idx, logging_type=log_type)

        self._run_callbacks()

`metrics` `property` ¶

Training metric class instance.

Returns:

Name	Type	Description
`metrics`	`TrainMetrics`	current training metric state.

`enter()` ¶

Setup the training context, initializing the environment.

Returns:

Name	Type	Description
`self`	`Self`	the initialized context.

Source code in velora/training/handler.py

Python
def __enter__(self) -> Self:
    """
    Setup the training context, initializing the environment.

    Returns:
        self (Self): the initialized context.
    """
    self.session = Session(self.engine)
    self._metrics = TrainMetrics(
        self.session,
        self.window_size,
        self.n_episodes,
        self.max_steps,
        device=self.device,
    )
    self._metrics.start_experiment(self.agent.config)

    self.state = TrainState(
        agent=self.agent,
        env=self.env,
        session=self.session,
        total_episodes=self.n_episodes,
        experiment_id=self._metrics.experiment_id,
    )

    return super().__enter__()

`init(agent, n_episodes, max_steps, log_freq, window_size, callbacks)` ¶

Parameters:

Name	Type	Description	Default
`agent`	`RLModuleAgent`	the agent being trained	required
`n_episodes`	`int`	the total number of training episodes	required
`max_steps`	`int`	maximum number of steps in an episode	required
`log_freq`	`int`	metric logging frequency (in episodes)	required
`window_size`	`int`	episode window size rate	required
`callbacks`	`List[TrainCallback] \| None`	a list of training callbacks. If `None` sets to an empty list	required

Source code in velora/training/handler.py

Python
def __init__(
    self,
    agent: RLModuleAgent,
    n_episodes: int,
    max_steps: int,
    log_freq: int,
    window_size: int,
    callbacks: List["TrainCallback"] | None,
) -> None:
    """
    Parameters:
        agent (RLModuleAgent): the agent being trained
        n_episodes (int): the total number of training episodes
        max_steps (int): maximum number of steps in an episode
        log_freq (int): metric logging frequency (in episodes)
        window_size (int): episode window size rate
        callbacks (List[TrainCallback] | None): a list of training callbacks.
            If `None` sets to an empty list
    """
    super().__init__(agent, window_size, callbacks)

    self.log_freq = log_freq
    self.n_episodes = n_episodes
    self.max_steps = max_steps

`log(idx, log_type)` ¶

Performs logging callback event.

Parameters:

Name	Type	Description	Default
`idx`	`int`	the current training step or episode index	required
`log_type`	`str`	the type of logging method	required

Source code in velora/training/handler.py

Python
def log(self, idx: int, log_type: Literal["episode", "step"]) -> None:
    """
    Performs `logging` callback event.

    Parameters:
        idx (int): the current training step or episode index
        log_type (str): the type of logging method
    """
    if log_type == "episode":
        self.state.update(status="logging", current_ep=idx, logging_type=log_type)
    else:
        self.state.update(status="logging", current_step=idx, logging_type=log_type)

    self._run_callbacks()

`step(current_step)` ¶

Performs step callback event.

Parameters:

Name	Type	Description	Default
`current_step`	`int`	the current training timestep index	required

Source code in velora/training/handler.py

Python
def step(self, current_step: int) -> None:
    """
    Performs `step` callback event.

    Parameters:
        current_step (int): the current training timestep index
    """
    self.state.update(status="step", current_step=current_step)
    self._run_callbacks()

`TrainMetrics` ¶

Bases: TrainMetricsBase

A utility class for working with and storing episodic training metrics for monitoring an agents training performance.

Source code in velora/training/metrics.py

Python
class TrainMetrics(TrainMetricsBase):
    """
    A utility class for working with and storing episodic training metrics for
    monitoring an agents training performance.
    """

    def __init__(
        self,
        session: Session,
        window_size: int,
        n_episodes: int,
        max_steps: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            session (sqlmodel.Session): current metric database session
            window_size (int): moving average window size
            n_episodes (int): total number of training episodes
            max_steps (int): maximum number of steps per episode
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(session, window_size, device=device)

        self.n_episodes = n_episodes
        self.max_steps = max_steps

        self._current_losses = StepStorage(max_steps, device=device)

    def add_step(
        self,
        critic: torch.Tensor,
        actor: torch.Tensor,
        entropy: torch.Tensor,
    ) -> None:
        """
        Add timestep metrics to local storage.

        Parameters:
            critic (torch.Tensor): critic step loss
            actor (torch.Tensor): actor step loss
            entropy (torch.Tensor): entropy step loss
        """
        self._exp_created_check()

        self._current_losses.add(critic, actor, entropy)

    def add_episode(
        self,
        ep_idx: int,
        reward: torch.Tensor,
        ep_length: torch.Tensor,
    ) -> None:
        """
        Add episode metrics to the metric database and reset step accumulators.

        Parameters:
            ep_idx (int): the current episode index
            reward (torch.Tensor): episode reward
            ep_length (torch.Tensor): number of steps after episode done
        """
        self._exp_created_check()

        self._ep_rewards.add(reward.to(self.device))
        self._ep_lengths.add(ep_length.to(self.device))

        self._actor_loss = self._current_losses.actor_avg(ep_length.item())
        self._critic_loss = self._current_losses.critic_avg(ep_length.item())
        self._entropy_loss = self._current_losses.entropy_avg(ep_length.item())
        self.step_total += ep_length

        moving_avg = self.reward_moving_avg()
        moving_std = self.reward_moving_std()

        ep = Episode(
            experiment_id=self.experiment_id,
            episode_num=ep_idx,
            reward=reward.item(),
            length=ep_length.item(),
            reward_moving_avg=moving_avg,
            reward_moving_std=moving_std,
            actor_loss=self._actor_loss.item(),
            critic_loss=self._critic_loss.item(),
            entropy_loss=self._entropy_loss.item(),
        )
        self.session.add(ep)
        self.session.commit()

        # Reset step storage
        self._current_losses.empty()

    def info(self, current_ep: int) -> None:
        """
        Outputs basic information to the console.

        Parameters:
            current_ep (int): the current episode index
        """
        ep = number_to_short(current_ep)
        max_eps = number_to_short(self.n_episodes)

        ep_length = number_to_short(int(self._ep_lengths.latest))
        step_total = number_to_short(self.step_total.item())

        max_length = number_to_short(int(self._ep_lengths.max().item()))
        max_steps = number_to_short(self.max_steps)

        print(
            f"Episode: {ep}/{max_eps}, "
            f"Steps: {ep_length}/{step_total}, "
            f"Max Length: {max_length}/{max_steps}, "
            f"Reward Avg: {self.reward_moving_avg():.2f}, "
            f"Reward Max: {self.reward_moving_max():.2f}, "
            f"Actor Loss: {self._actor_loss.item():.2f}, "
            f"Critic Loss: {self._critic_loss.item():.2f}, "
            f"Entropy Loss: {self._entropy_loss.item():.2f}"
        )

`init(session, window_size, n_episodes, max_steps, *, device=None)` ¶

Parameters:

Name	Type	Description	Default
`session`	`sqlmodel.Session`	current metric database session	required
`window_size`	`int`	moving average window size	required
`n_episodes`	`int`	total number of training episodes	required
`max_steps`	`int`	maximum number of steps per episode	required
`device`	`torch.device`	the device to perform computations on	`None`

Source code in velora/training/metrics.py

Python
def __init__(
    self,
    session: Session,
    window_size: int,
    n_episodes: int,
    max_steps: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        session (sqlmodel.Session): current metric database session
        window_size (int): moving average window size
        n_episodes (int): total number of training episodes
        max_steps (int): maximum number of steps per episode
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(session, window_size, device=device)

    self.n_episodes = n_episodes
    self.max_steps = max_steps

    self._current_losses = StepStorage(max_steps, device=device)

`add_episode(ep_idx, reward, ep_length)` ¶

Add episode metrics to the metric database and reset step accumulators.

Parameters:

Name	Type	Description	Default
`ep_idx`	`int`	the current episode index	required
`reward`	`torch.Tensor`	episode reward	required
`ep_length`	`torch.Tensor`	number of steps after episode done	required

Source code in velora/training/metrics.py

Python
def add_episode(
    self,
    ep_idx: int,
    reward: torch.Tensor,
    ep_length: torch.Tensor,
) -> None:
    """
    Add episode metrics to the metric database and reset step accumulators.

    Parameters:
        ep_idx (int): the current episode index
        reward (torch.Tensor): episode reward
        ep_length (torch.Tensor): number of steps after episode done
    """
    self._exp_created_check()

    self._ep_rewards.add(reward.to(self.device))
    self._ep_lengths.add(ep_length.to(self.device))

    self._actor_loss = self._current_losses.actor_avg(ep_length.item())
    self._critic_loss = self._current_losses.critic_avg(ep_length.item())
    self._entropy_loss = self._current_losses.entropy_avg(ep_length.item())
    self.step_total += ep_length

    moving_avg = self.reward_moving_avg()
    moving_std = self.reward_moving_std()

    ep = Episode(
        experiment_id=self.experiment_id,
        episode_num=ep_idx,
        reward=reward.item(),
        length=ep_length.item(),
        reward_moving_avg=moving_avg,
        reward_moving_std=moving_std,
        actor_loss=self._actor_loss.item(),
        critic_loss=self._critic_loss.item(),
        entropy_loss=self._entropy_loss.item(),
    )
    self.session.add(ep)
    self.session.commit()

    # Reset step storage
    self._current_losses.empty()

`add_step(critic, actor, entropy)` ¶

Add timestep metrics to local storage.

Parameters:

Name	Type	Description	Default
`critic`	`torch.Tensor`	critic step loss	required
`actor`	`torch.Tensor`	actor step loss	required
`entropy`	`torch.Tensor`	entropy step loss	required

Source code in velora/training/metrics.py

Python
def add_step(
    self,
    critic: torch.Tensor,
    actor: torch.Tensor,
    entropy: torch.Tensor,
) -> None:
    """
    Add timestep metrics to local storage.

    Parameters:
        critic (torch.Tensor): critic step loss
        actor (torch.Tensor): actor step loss
        entropy (torch.Tensor): entropy step loss
    """
    self._exp_created_check()

    self._current_losses.add(critic, actor, entropy)

`info(current_ep)` ¶

Outputs basic information to the console.

Parameters:

Name	Type	Description	Default
`current_ep`	`int`	the current episode index	required

Source code in velora/training/metrics.py

Python
def info(self, current_ep: int) -> None:
    """
    Outputs basic information to the console.

    Parameters:
        current_ep (int): the current episode index
    """
    ep = number_to_short(current_ep)
    max_eps = number_to_short(self.n_episodes)

    ep_length = number_to_short(int(self._ep_lengths.latest))
    step_total = number_to_short(self.step_total.item())

    max_length = number_to_short(int(self._ep_lengths.max().item()))
    max_steps = number_to_short(self.max_steps)

    print(
        f"Episode: {ep}/{max_eps}, "
        f"Steps: {ep_length}/{step_total}, "
        f"Max Length: {max_length}/{max_steps}, "
        f"Reward Avg: {self.reward_moving_avg():.2f}, "
        f"Reward Max: {self.reward_moving_max():.2f}, "
        f"Actor Loss: {self._actor_loss.item():.2f}, "
        f"Critic Loss: {self._critic_loss.item():.2f}, "
        f"Entropy Loss: {self._entropy_loss.item():.2f}"
    )

velora.training¶

MovingMetric ¶

latest property ¶

__init__(window_size, *, device=None) ¶

__len__() ¶

add(value) ¶

max() ¶

mean() ¶

std() ¶

StepStorage ¶

__init__(capacity, *, device=None) ¶

actor_avg(ep_length) ¶

add(critic, actor, entropy) ¶

critic_avg(ep_length) ¶

empty() ¶

entropy_avg(ep_length) ¶

TrainHandler ¶

metrics property ¶

__enter__() ¶

__init__(agent, n_episodes, max_steps, log_freq, window_size, callbacks) ¶

log(idx, log_type) ¶

step(current_step) ¶

TrainMetrics ¶

__init__(session, window_size, n_episodes, max_steps, *, device=None) ¶

add_episode(ep_idx, reward, ep_length) ¶

add_step(critic, actor, entropy) ¶

info(current_ep) ¶

`MovingMetric` ¶

`latest` `property` ¶

`init(window_size, *, device=None)` ¶

`len()` ¶

`add(value)` ¶

`max()` ¶

`mean()` ¶

`std()` ¶

`StepStorage` ¶

`init(capacity, *, device=None)` ¶

`actor_avg(ep_length)` ¶

`add(critic, actor, entropy)` ¶

`critic_avg(ep_length)` ¶

`empty()` ¶

`entropy_avg(ep_length)` ¶

`TrainHandler` ¶

`metrics` `property` ¶

`enter()` ¶

`init(agent, n_episodes, max_steps, log_freq, window_size, callbacks)` ¶

`log(idx, log_type)` ¶

`step(current_step)` ¶

`TrainMetrics` ¶

`init(session, window_size, n_episodes, max_steps, *, device=None)` ¶

`add_episode(ep_idx, reward, ep_length)` ¶

`add_step(critic, actor, entropy)` ¶

`info(current_ep)` ¶