Skip to content
Snippets Groups Projects

Draft: Basis for unit testing

Open Vlad-Andrei BĂDOIU (78692) requested to merge vladb/unit_tests into main
Files
4
+ 69
0
import unittest
import torch
from torch import nn
from optimus.datasets import WikiText103Dataset
from optimus.tokenizers import SentencePieceTokenizer
from optimus.dataloader import OptimusDataLoader
from optimus.models import OptimusTransformer
from optimus.trainer import Trainer
class BenchmarkTest(unittest.TestCase):
def test_ms_per_batch(self):
batch_size: int = 8
grad_acc_steps: int = 1
seq_len: int = 512
lr_max: float = 1e-4
grad_clip_norm: float = 1.0
epochs: int = 1
tokenizer_path: str = 'optimus16K-wikitext103.model'
checkpoints_path: str = 'best_model.pth'
dim: int = 512
n_layers: int = 6
n_heads: int = 8
dropout: float = 0.0
device: str = 'cuda'
tok = SentencePieceTokenizer(model_path=tokenizer_path)
# load dataset splits
train_ds = WikiText103Dataset(split='test')
test_ds = WikiText103Dataset(split='valid')
# create dataloader object and move to device
dl = OptimusDataLoader(train_ds, test_ds, tok,
bs=batch_size,
seq_len=seq_len,
device=device)
# create model and move to device
model = OptimusTransformer(len(tok),
n_layers=n_layers,
dim=dim,
n_heads=n_heads,
p_drop=dropout,
weight_tying=False)
model = model.to(device)
# define loss metric and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-9)
print("Starting training...")
# create trainer and start fitting
trainer = Trainer(dl=dl,
model=model,
criterion=criterion,
optimizer=optimizer,
lr=lr_max,
grad_acc_steps=grad_acc_steps,
grad_clip_norm=grad_clip_norm,
model_save_path=checkpoints_path,
progress_bar=False)
trainer.fit(epochs)
print("--- Testing results for Optimus Transfoer --- ")
print(f"Est. ms/batch : {trainer.ms_per_batch:.2f}\n")
\ No newline at end of file
Loading