From 8579fc15d3c74d04ba26d6ef2db8f4763a6ec042 Mon Sep 17 00:00:00 2001 From: Alexandru Gherghescu <gherghescu_alex1@yahoo.ro> Date: Fri, 26 Jan 2024 23:10:45 +0200 Subject: [PATCH] Adjust optimizer epsilon value for AMP Pick a better default as epsilon value. Although this value should never touch the fp16 gradients in mixed precision training (as the optimizer should only ever work on the master fp32 copy of the model), this value didn't need to be changed. However, in pure fp16 training, any epsilon value lower than 1e-7 would simply underflow to 0, causing it to become useless. Although the framework doesn't directly support the second case above, an epsilon value of 1e-7 seems like a better default for both AMP and normal training. --- training.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/training.py b/training.py index 1cd22ae..64cdf8f 100644 --- a/training.py +++ b/training.py @@ -89,9 +89,14 @@ def main(batch_size: int = 8, _total_params = sum(p.numel() for p in model.parameters()) print(f"Number of model parameters: {_total_params}") - # define loss metric and optimizer + # define loss metric criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-9) + + # define optimizer + # see [1] for a discussion on what the epsilon value should be for amp; 1e-7 + # is a good default for both amp and normal training + # [1]: https://github.com/pytorch/pytorch/issues/26218 + optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-7) print("Starting training...") -- GitLab