Skip to content

velora.training

Methods and classes dedicated to handling agent training.

MovingMetric

Tracks a metric with a moving window for statistics.

Attributes:

Name Type Description
window torch.Tensor

a list of values for the statistics

window_size int

the window size of the moving statistics

Source code in velora/training/metrics.py
Python
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
class MovingMetric:
    """
    Tracks a metric with a moving window for statistics.

    Attributes:
        window (torch.Tensor): a list of values for the statistics
        window_size (int): the window size of the moving statistics
    """

    def __init__(self, window_size: int, *, device: torch.device | None = None) -> None:
        """
        Parameters:
            window_size (int): the size of the moving window
            device (torch.device, optional): the device to perform computations on
        """
        self.window_size = window_size
        self.device = device

        # Position indicators
        self.position = 0
        self.size = 0

        # Pre-allocated storage
        self.window = torch.zeros((window_size), device=device)

    @property
    def latest(self) -> torch.Tensor:
        """Gets the latest value."""
        latest_pos = (self.position - 1) % self.window_size
        return self.window[latest_pos]

    def add(self, value: torch.Tensor) -> None:
        """
        Adds a value and updates the window.

        Parameters:
            value (torch.Tensor): value to add
        """
        self.window[self.position] = value.to(self.device)

        # Update position - deque style
        self.position = (self.position + 1) % self.window_size
        self.size = min(self.size + 1, self.window_size)

    def mean(self) -> torch.Tensor:
        """
        Calculates the mean of values or the current window.

        Returns:
            avg (torch.Tensor): the calculated mean.
        """
        return self.window.mean()

    def std(self) -> torch.Tensor:
        """
        Calculates the standard deviation of values or the current window.

        Returns:
            std (torch.Tensor): the calculated standard deviation.
        """
        return (
            self.window.std()
            if self.window.size(dim=0) > 1
            else torch.tensor(0.0, device=self.device)
        )

    def max(self) -> torch.Tensor:
        """
        Calculates the maximum value of a set of values or the current window.

        Returns:
            max (torch.Tensor): the maximum value.
        """
        return self.window.max()

    def __len__(self) -> int:
        """Returns the number of items in the values array."""
        return self.size

latest property

Gets the latest value.

__init__(window_size, *, device=None)

Parameters:

Name Type Description Default
window_size int

the size of the moving window

required
device torch.device

the device to perform computations on

None
Source code in velora/training/metrics.py
Python
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def __init__(self, window_size: int, *, device: torch.device | None = None) -> None:
    """
    Parameters:
        window_size (int): the size of the moving window
        device (torch.device, optional): the device to perform computations on
    """
    self.window_size = window_size
    self.device = device

    # Position indicators
    self.position = 0
    self.size = 0

    # Pre-allocated storage
    self.window = torch.zeros((window_size), device=device)

__len__()

Returns the number of items in the values array.

Source code in velora/training/metrics.py
Python
185
186
187
def __len__(self) -> int:
    """Returns the number of items in the values array."""
    return self.size

add(value)

Adds a value and updates the window.

Parameters:

Name Type Description Default
value torch.Tensor

value to add

required
Source code in velora/training/metrics.py
Python
141
142
143
144
145
146
147
148
149
150
151
152
def add(self, value: torch.Tensor) -> None:
    """
    Adds a value and updates the window.

    Parameters:
        value (torch.Tensor): value to add
    """
    self.window[self.position] = value.to(self.device)

    # Update position - deque style
    self.position = (self.position + 1) % self.window_size
    self.size = min(self.size + 1, self.window_size)

max()

Calculates the maximum value of a set of values or the current window.

Returns:

Name Type Description
max torch.Tensor

the maximum value.

Source code in velora/training/metrics.py
Python
176
177
178
179
180
181
182
183
def max(self) -> torch.Tensor:
    """
    Calculates the maximum value of a set of values or the current window.

    Returns:
        max (torch.Tensor): the maximum value.
    """
    return self.window.max()

mean()

Calculates the mean of values or the current window.

Returns:

Name Type Description
avg torch.Tensor

the calculated mean.

Source code in velora/training/metrics.py
Python
154
155
156
157
158
159
160
161
def mean(self) -> torch.Tensor:
    """
    Calculates the mean of values or the current window.

    Returns:
        avg (torch.Tensor): the calculated mean.
    """
    return self.window.mean()

std()

Calculates the standard deviation of values or the current window.

Returns:

Name Type Description
std torch.Tensor

the calculated standard deviation.

Source code in velora/training/metrics.py
Python
163
164
165
166
167
168
169
170
171
172
173
174
def std(self) -> torch.Tensor:
    """
    Calculates the standard deviation of values or the current window.

    Returns:
        std (torch.Tensor): the calculated standard deviation.
    """
    return (
        self.window.std()
        if self.window.size(dim=0) > 1
        else torch.tensor(0.0, device=self.device)
    )

StepStorage

A storage container for step metrics.

Useful for calculating the episodic average values to store in MetricStorage.

Attributes:

Name Type Description
critic_losses torch.Tensor

a tensor of agent Critic loss values

actor_losses torch.Tensor

a tensor of agent Actor loss values

entropy_losses torch.Tensor

a tensor of agent Entropy loss values

Source code in velora/training/metrics.py
Python
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
class StepStorage:
    """
    A storage container for step metrics.

    Useful for calculating the episodic average values to store in `MetricStorage`.

    Attributes:
        critic_losses (torch.Tensor): a tensor of agent Critic loss values
        actor_losses (torch.Tensor): a tensor of agent Actor loss values
        entropy_losses (torch.Tensor): a tensor of agent Entropy loss values
    """

    def __init__(self, capacity: int, *, device: torch.device | None = None) -> None:
        """
        Parameters:
            capacity (int): storage capacity for each tensor
            device (torch.device, optional): the device to perform computations on
        """
        self.capacity = capacity
        self.device = device

        # Position indicators
        self.position = 0
        self.size = 0

        self.critic_losses = torch.zeros((capacity), device=device)
        self.actor_losses = torch.zeros((capacity), device=device)
        self.entropy_losses = torch.zeros((capacity), device=device)

    def critic_avg(self, ep_length: int) -> torch.Tensor:
        """
        Computes the critic loss average. Useful for computing episodic averages.

        Parameters:
            ep_length (int): size of the episode

        Returns:
            avg (torch.Tensor): critic loss step average
        """
        return self.critic_losses[:ep_length].mean()

    def actor_avg(self, ep_length: int) -> torch.Tensor:
        """
        Computes the actor loss average. Useful for computing episodic averages.

        Parameters:
            ep_length (int): size of the episode

        Returns:
            avg (torch.Tensor): actor loss step average
        """
        return self.actor_losses[:ep_length].mean()

    def entropy_avg(self, ep_length: int) -> torch.Tensor:
        """
        Computes the entropy loss average. Useful for computing episodic averages.

        Parameters:
            ep_length (int): size of the episode

        Returns:
            avg (torch.Tensor): entropy loss step average
        """
        return self.entropy_losses[:ep_length].mean()

    def add(
        self,
        critic: torch.Tensor,
        actor: torch.Tensor,
        entropy: torch.Tensor,
    ) -> None:
        """
        Adds one of each metric into storage.

        Parameters:
            critic (torch.Tensor): critic loss
            actor (torch.Tensor): actor loss
            entropy (torch.Tensor): entropy loss
        """
        self.critic_losses[self.position] = critic
        self.actor_losses[self.position] = actor
        self.entropy_losses[self.position] = entropy

        # Update position
        self.position = (self.position + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)

    def empty(self) -> None:
        """Empty storage."""
        self.critic_losses.zero_()
        self.actor_losses.zero_()
        self.entropy_losses.zero_()

        self.position = 0
        self.size = 0

__init__(capacity, *, device=None)

Parameters:

Name Type Description Default
capacity int

storage capacity for each tensor

required
device torch.device

the device to perform computations on

None
Source code in velora/training/metrics.py
Python
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def __init__(self, capacity: int, *, device: torch.device | None = None) -> None:
    """
    Parameters:
        capacity (int): storage capacity for each tensor
        device (torch.device, optional): the device to perform computations on
    """
    self.capacity = capacity
    self.device = device

    # Position indicators
    self.position = 0
    self.size = 0

    self.critic_losses = torch.zeros((capacity), device=device)
    self.actor_losses = torch.zeros((capacity), device=device)
    self.entropy_losses = torch.zeros((capacity), device=device)

actor_avg(ep_length)

Computes the actor loss average. Useful for computing episodic averages.

Parameters:

Name Type Description Default
ep_length int

size of the episode

required

Returns:

Name Type Description
avg torch.Tensor

actor loss step average

Source code in velora/training/metrics.py
Python
54
55
56
57
58
59
60
61
62
63
64
def actor_avg(self, ep_length: int) -> torch.Tensor:
    """
    Computes the actor loss average. Useful for computing episodic averages.

    Parameters:
        ep_length (int): size of the episode

    Returns:
        avg (torch.Tensor): actor loss step average
    """
    return self.actor_losses[:ep_length].mean()

add(critic, actor, entropy)

Adds one of each metric into storage.

Parameters:

Name Type Description Default
critic torch.Tensor

critic loss

required
actor torch.Tensor

actor loss

required
entropy torch.Tensor

entropy loss

required
Source code in velora/training/metrics.py
Python
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def add(
    self,
    critic: torch.Tensor,
    actor: torch.Tensor,
    entropy: torch.Tensor,
) -> None:
    """
    Adds one of each metric into storage.

    Parameters:
        critic (torch.Tensor): critic loss
        actor (torch.Tensor): actor loss
        entropy (torch.Tensor): entropy loss
    """
    self.critic_losses[self.position] = critic
    self.actor_losses[self.position] = actor
    self.entropy_losses[self.position] = entropy

    # Update position
    self.position = (self.position + 1) % self.capacity
    self.size = min(self.size + 1, self.capacity)

critic_avg(ep_length)

Computes the critic loss average. Useful for computing episodic averages.

Parameters:

Name Type Description Default
ep_length int

size of the episode

required

Returns:

Name Type Description
avg torch.Tensor

critic loss step average

Source code in velora/training/metrics.py
Python
42
43
44
45
46
47
48
49
50
51
52
def critic_avg(self, ep_length: int) -> torch.Tensor:
    """
    Computes the critic loss average. Useful for computing episodic averages.

    Parameters:
        ep_length (int): size of the episode

    Returns:
        avg (torch.Tensor): critic loss step average
    """
    return self.critic_losses[:ep_length].mean()

empty()

Empty storage.

Source code in velora/training/metrics.py
Python
100
101
102
103
104
105
106
107
def empty(self) -> None:
    """Empty storage."""
    self.critic_losses.zero_()
    self.actor_losses.zero_()
    self.entropy_losses.zero_()

    self.position = 0
    self.size = 0

entropy_avg(ep_length)

Computes the entropy loss average. Useful for computing episodic averages.

Parameters:

Name Type Description Default
ep_length int

size of the episode

required

Returns:

Name Type Description
avg torch.Tensor

entropy loss step average

Source code in velora/training/metrics.py
Python
66
67
68
69
70
71
72
73
74
75
76
def entropy_avg(self, ep_length: int) -> torch.Tensor:
    """
    Computes the entropy loss average. Useful for computing episodic averages.

    Parameters:
        ep_length (int): size of the episode

    Returns:
        avg (torch.Tensor): entropy loss step average
    """
    return self.entropy_losses[:ep_length].mean()

TrainHandler

Bases: TrainHandlerBase

A context manager for handling an agents training state. Compatible with single environments.

Source code in velora/training/handler.py
Python
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
class TrainHandler(TrainHandlerBase):
    """
    A context manager for handling an agents training state. Compatible with single
    environments.
    """

    def __init__(
        self,
        agent: RLModuleAgent,
        n_episodes: int,
        max_steps: int,
        log_freq: int,
        window_size: int,
        callbacks: List["TrainCallback"] | None,
    ) -> None:
        """
        Parameters:
            agent (RLModuleAgent): the agent being trained
            n_episodes (int): the total number of training episodes
            max_steps (int): maximum number of steps in an episode
            log_freq (int): metric logging frequency (in episodes)
            window_size (int): episode window size rate
            callbacks (List[TrainCallback] | None): a list of training callbacks.
                If `None` sets to an empty list
        """
        super().__init__(agent, window_size, callbacks)

        self.log_freq = log_freq
        self.n_episodes = n_episodes
        self.max_steps = max_steps

    @property
    def metrics(self) -> TrainMetrics:
        """
        Training metric class instance.

        Returns:
            metrics (TrainMetrics): current training metric state.
        """
        return self._metrics

    def __enter__(self) -> Self:
        """
        Setup the training context, initializing the environment.

        Returns:
            self (Self): the initialized context.
        """
        self.session = Session(self.engine)
        self._metrics = TrainMetrics(
            self.session,
            self.window_size,
            self.n_episodes,
            self.max_steps,
            device=self.device,
        )
        self._metrics.start_experiment(self.agent.config)

        self.state = TrainState(
            agent=self.agent,
            env=self.env,
            session=self.session,
            total_episodes=self.n_episodes,
            experiment_id=self._metrics.experiment_id,
        )

        return super().__enter__()

    def __exit__(
        self,
        exc_type: Type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ):
        super().__exit__(exc_type, exc_val, exc_tb)

        if self.state.saving_enabled:
            self.save_completed()

    def start(self) -> None:
        super().start()

        # Update environment with callback wrappers
        self.env = self.state.env

    def step(self, current_step: int) -> None:
        """
        Performs `step` callback event.

        Parameters:
            current_step (int): the current training timestep index
        """
        self.state.update(status="step", current_step=current_step)
        self._run_callbacks()

    def log(self, idx: int, log_type: Literal["episode", "step"]) -> None:
        """
        Performs `logging` callback event.

        Parameters:
            idx (int): the current training step or episode index
            log_type (str): the type of logging method
        """
        if log_type == "episode":
            self.state.update(status="logging", current_ep=idx, logging_type=log_type)
        else:
            self.state.update(status="logging", current_step=idx, logging_type=log_type)

        self._run_callbacks()

metrics property

Training metric class instance.

Returns:

Name Type Description
metrics TrainMetrics

current training metric state.

__enter__()

Setup the training context, initializing the environment.

Returns:

Name Type Description
self Self

the initialized context.

Source code in velora/training/handler.py
Python
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
def __enter__(self) -> Self:
    """
    Setup the training context, initializing the environment.

    Returns:
        self (Self): the initialized context.
    """
    self.session = Session(self.engine)
    self._metrics = TrainMetrics(
        self.session,
        self.window_size,
        self.n_episodes,
        self.max_steps,
        device=self.device,
    )
    self._metrics.start_experiment(self.agent.config)

    self.state = TrainState(
        agent=self.agent,
        env=self.env,
        session=self.session,
        total_episodes=self.n_episodes,
        experiment_id=self._metrics.experiment_id,
    )

    return super().__enter__()

__init__(agent, n_episodes, max_steps, log_freq, window_size, callbacks)

Parameters:

Name Type Description Default
agent RLModuleAgent

the agent being trained

required
n_episodes int

the total number of training episodes

required
max_steps int

maximum number of steps in an episode

required
log_freq int

metric logging frequency (in episodes)

required
window_size int

episode window size rate

required
callbacks List[TrainCallback] | None

a list of training callbacks. If None sets to an empty list

required
Source code in velora/training/handler.py
Python
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def __init__(
    self,
    agent: RLModuleAgent,
    n_episodes: int,
    max_steps: int,
    log_freq: int,
    window_size: int,
    callbacks: List["TrainCallback"] | None,
) -> None:
    """
    Parameters:
        agent (RLModuleAgent): the agent being trained
        n_episodes (int): the total number of training episodes
        max_steps (int): maximum number of steps in an episode
        log_freq (int): metric logging frequency (in episodes)
        window_size (int): episode window size rate
        callbacks (List[TrainCallback] | None): a list of training callbacks.
            If `None` sets to an empty list
    """
    super().__init__(agent, window_size, callbacks)

    self.log_freq = log_freq
    self.n_episodes = n_episodes
    self.max_steps = max_steps

log(idx, log_type)

Performs logging callback event.

Parameters:

Name Type Description Default
idx int

the current training step or episode index

required
log_type str

the type of logging method

required
Source code in velora/training/handler.py
Python
288
289
290
291
292
293
294
295
296
297
298
299
300
301
def log(self, idx: int, log_type: Literal["episode", "step"]) -> None:
    """
    Performs `logging` callback event.

    Parameters:
        idx (int): the current training step or episode index
        log_type (str): the type of logging method
    """
    if log_type == "episode":
        self.state.update(status="logging", current_ep=idx, logging_type=log_type)
    else:
        self.state.update(status="logging", current_step=idx, logging_type=log_type)

    self._run_callbacks()

step(current_step)

Performs step callback event.

Parameters:

Name Type Description Default
current_step int

the current training timestep index

required
Source code in velora/training/handler.py
Python
278
279
280
281
282
283
284
285
286
def step(self, current_step: int) -> None:
    """
    Performs `step` callback event.

    Parameters:
        current_step (int): the current training timestep index
    """
    self.state.update(status="step", current_step=current_step)
    self._run_callbacks()

TrainMetrics

Bases: TrainMetricsBase

A utility class for working with and storing episodic training metrics for monitoring an agents training performance.

Source code in velora/training/metrics.py
Python
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
class TrainMetrics(TrainMetricsBase):
    """
    A utility class for working with and storing episodic training metrics for
    monitoring an agents training performance.
    """

    def __init__(
        self,
        session: Session,
        window_size: int,
        n_episodes: int,
        max_steps: int,
        *,
        device: torch.device | None = None,
    ) -> None:
        """
        Parameters:
            session (sqlmodel.Session): current metric database session
            window_size (int): moving average window size
            n_episodes (int): total number of training episodes
            max_steps (int): maximum number of steps per episode
            device (torch.device, optional): the device to perform computations on
        """
        super().__init__(session, window_size, device=device)

        self.n_episodes = n_episodes
        self.max_steps = max_steps

        self._current_losses = StepStorage(max_steps, device=device)

    def add_step(
        self,
        critic: torch.Tensor,
        actor: torch.Tensor,
        entropy: torch.Tensor,
    ) -> None:
        """
        Add timestep metrics to local storage.

        Parameters:
            critic (torch.Tensor): critic step loss
            actor (torch.Tensor): actor step loss
            entropy (torch.Tensor): entropy step loss
        """
        self._exp_created_check()

        self._current_losses.add(critic, actor, entropy)

    def add_episode(
        self,
        ep_idx: int,
        reward: torch.Tensor,
        ep_length: torch.Tensor,
    ) -> None:
        """
        Add episode metrics to the metric database and reset step accumulators.

        Parameters:
            ep_idx (int): the current episode index
            reward (torch.Tensor): episode reward
            ep_length (torch.Tensor): number of steps after episode done
        """
        self._exp_created_check()

        self._ep_rewards.add(reward.to(self.device))
        self._ep_lengths.add(ep_length.to(self.device))

        self._actor_loss = self._current_losses.actor_avg(ep_length.item())
        self._critic_loss = self._current_losses.critic_avg(ep_length.item())
        self._entropy_loss = self._current_losses.entropy_avg(ep_length.item())
        self.step_total += ep_length

        moving_avg = self.reward_moving_avg()
        moving_std = self.reward_moving_std()

        ep = Episode(
            experiment_id=self.experiment_id,
            episode_num=ep_idx,
            reward=reward.item(),
            length=ep_length.item(),
            reward_moving_avg=moving_avg,
            reward_moving_std=moving_std,
            actor_loss=self._actor_loss.item(),
            critic_loss=self._critic_loss.item(),
            entropy_loss=self._entropy_loss.item(),
        )
        self.session.add(ep)
        self.session.commit()

        # Reset step storage
        self._current_losses.empty()

    def info(self, current_ep: int) -> None:
        """
        Outputs basic information to the console.

        Parameters:
            current_ep (int): the current episode index
        """
        ep = number_to_short(current_ep)
        max_eps = number_to_short(self.n_episodes)

        ep_length = number_to_short(int(self._ep_lengths.latest))
        step_total = number_to_short(self.step_total.item())

        max_length = number_to_short(int(self._ep_lengths.max().item()))
        max_steps = number_to_short(self.max_steps)

        print(
            f"Episode: {ep}/{max_eps}, "
            f"Steps: {ep_length}/{step_total}, "
            f"Max Length: {max_length}/{max_steps}, "
            f"Reward Avg: {self.reward_moving_avg():.2f}, "
            f"Reward Max: {self.reward_moving_max():.2f}, "
            f"Actor Loss: {self._actor_loss.item():.2f}, "
            f"Critic Loss: {self._critic_loss.item():.2f}, "
            f"Entropy Loss: {self._entropy_loss.item():.2f}"
        )

__init__(session, window_size, n_episodes, max_steps, *, device=None)

Parameters:

Name Type Description Default
session sqlmodel.Session

current metric database session

required
window_size int

moving average window size

required
n_episodes int

total number of training episodes

required
max_steps int

maximum number of steps per episode

required
device torch.device

the device to perform computations on

None
Source code in velora/training/metrics.py
Python
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
def __init__(
    self,
    session: Session,
    window_size: int,
    n_episodes: int,
    max_steps: int,
    *,
    device: torch.device | None = None,
) -> None:
    """
    Parameters:
        session (sqlmodel.Session): current metric database session
        window_size (int): moving average window size
        n_episodes (int): total number of training episodes
        max_steps (int): maximum number of steps per episode
        device (torch.device, optional): the device to perform computations on
    """
    super().__init__(session, window_size, device=device)

    self.n_episodes = n_episodes
    self.max_steps = max_steps

    self._current_losses = StepStorage(max_steps, device=device)

add_episode(ep_idx, reward, ep_length)

Add episode metrics to the metric database and reset step accumulators.

Parameters:

Name Type Description Default
ep_idx int

the current episode index

required
reward torch.Tensor

episode reward

required
ep_length torch.Tensor

number of steps after episode done

required
Source code in velora/training/metrics.py
Python
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
def add_episode(
    self,
    ep_idx: int,
    reward: torch.Tensor,
    ep_length: torch.Tensor,
) -> None:
    """
    Add episode metrics to the metric database and reset step accumulators.

    Parameters:
        ep_idx (int): the current episode index
        reward (torch.Tensor): episode reward
        ep_length (torch.Tensor): number of steps after episode done
    """
    self._exp_created_check()

    self._ep_rewards.add(reward.to(self.device))
    self._ep_lengths.add(ep_length.to(self.device))

    self._actor_loss = self._current_losses.actor_avg(ep_length.item())
    self._critic_loss = self._current_losses.critic_avg(ep_length.item())
    self._entropy_loss = self._current_losses.entropy_avg(ep_length.item())
    self.step_total += ep_length

    moving_avg = self.reward_moving_avg()
    moving_std = self.reward_moving_std()

    ep = Episode(
        experiment_id=self.experiment_id,
        episode_num=ep_idx,
        reward=reward.item(),
        length=ep_length.item(),
        reward_moving_avg=moving_avg,
        reward_moving_std=moving_std,
        actor_loss=self._actor_loss.item(),
        critic_loss=self._critic_loss.item(),
        entropy_loss=self._entropy_loss.item(),
    )
    self.session.add(ep)
    self.session.commit()

    # Reset step storage
    self._current_losses.empty()

add_step(critic, actor, entropy)

Add timestep metrics to local storage.

Parameters:

Name Type Description Default
critic torch.Tensor

critic step loss

required
actor torch.Tensor

actor step loss

required
entropy torch.Tensor

entropy step loss

required
Source code in velora/training/metrics.py
Python
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def add_step(
    self,
    critic: torch.Tensor,
    actor: torch.Tensor,
    entropy: torch.Tensor,
) -> None:
    """
    Add timestep metrics to local storage.

    Parameters:
        critic (torch.Tensor): critic step loss
        actor (torch.Tensor): actor step loss
        entropy (torch.Tensor): entropy step loss
    """
    self._exp_created_check()

    self._current_losses.add(critic, actor, entropy)

info(current_ep)

Outputs basic information to the console.

Parameters:

Name Type Description Default
current_ep int

the current episode index

required
Source code in velora/training/metrics.py
Python
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
def info(self, current_ep: int) -> None:
    """
    Outputs basic information to the console.

    Parameters:
        current_ep (int): the current episode index
    """
    ep = number_to_short(current_ep)
    max_eps = number_to_short(self.n_episodes)

    ep_length = number_to_short(int(self._ep_lengths.latest))
    step_total = number_to_short(self.step_total.item())

    max_length = number_to_short(int(self._ep_lengths.max().item()))
    max_steps = number_to_short(self.max_steps)

    print(
        f"Episode: {ep}/{max_eps}, "
        f"Steps: {ep_length}/{step_total}, "
        f"Max Length: {max_length}/{max_steps}, "
        f"Reward Avg: {self.reward_moving_avg():.2f}, "
        f"Reward Max: {self.reward_moving_max():.2f}, "
        f"Actor Loss: {self._actor_loss.item():.2f}, "
        f"Critic Loss: {self._critic_loss.item():.2f}, "
        f"Entropy Loss: {self._entropy_loss.item():.2f}"
    )