From 4ab91bcf57f01d37ee29bbd98459a04129df73aa Mon Sep 17 00:00:00 2001 From: Alexandru Gherghescu <gherghescu_alex1@yahoo.ro> Date: Mon, 22 Jan 2024 21:50:30 +0200 Subject: [PATCH] Ignore last batches when calculating final train loss Visual change. This only changes what the trainer reports as the final training loss. Not quite sure if the value before was accurate anyway, since gradient accumulation would not let the optimizer step every batch anyway. For a big enough dataset, this should not have any impact at all. The final loss value will be reported based on the last calculation of the loss, correctly taking into consideration gradient accumulation as well. --- optimus/trainer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/optimus/trainer.py b/optimus/trainer.py index 065d61a..f55f73e 100644 --- a/optimus/trainer.py +++ b/optimus/trainer.py @@ -155,10 +155,6 @@ class Trainer(): f"~{self.ms_per_batch:.2f} ms/batch | " \ f" lr: {lr:.7f}" - # account for last batches when computing average train loss - self.train_loss = total_loss / (len(self.dl.train) % est_interval - 1) - self.train_ppl = math.exp(self.train_loss) - pb.on_iter_end() def _do_epoch_validate(self): -- GitLab