From ab9e6da54e8e206b0e2139cb82fa8fb3b2a2fb35 Mon Sep 17 00:00:00 2001 From: Vlad-Andrei Badoiu <vlad_andrei.badoiu@upb.ro> Date: Mon, 29 Jan 2024 16:17:15 +0000 Subject: [PATCH] Basis for unit testing --- .gitlab-ci.yml | 4 ++ requirements.txt | 4 ++ tests/__init__.py | 0 tests/benchmark/test_benchmark.py | 69 +++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 requirements.txt create mode 100644 tests/__init__.py create mode 100644 tests/benchmark/test_benchmark.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4ba1fec..a36670b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,4 +6,8 @@ useGPU: stage: test_gpu script: - echo "Check whether we have enabled our GPU or not." + - pip install -r requirements.txt - nvidia-smi + - ls + - pwd + - export PYTHONPATH=$PYTHONPATH:.; pytest -s diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f047cda --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +sentencepiece +pytest +fastai +fastprogress diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/benchmark/test_benchmark.py b/tests/benchmark/test_benchmark.py new file mode 100644 index 0000000..2f74122 --- /dev/null +++ b/tests/benchmark/test_benchmark.py @@ -0,0 +1,69 @@ +import unittest + +import torch +from torch import nn + +from optimus.datasets import WikiText103Dataset +from optimus.tokenizers import SentencePieceTokenizer +from optimus.dataloader import OptimusDataLoader +from optimus.models import OptimusTransformer +from optimus.trainer import Trainer + +class BenchmarkTest(unittest.TestCase): + + def test_ms_per_batch(self): + batch_size: int = 8 + grad_acc_steps: int = 1 + seq_len: int = 512 + lr_max: float = 1e-4 + grad_clip_norm: float = 1.0 + epochs: int = 1 + tokenizer_path: str = 'optimus16K-wikitext103.model' + checkpoints_path: str = 'best_model.pth' + dim: int = 512 + n_layers: int = 6 + n_heads: int = 8 + dropout: float = 0.0 + device: str = 'cuda' + + tok = SentencePieceTokenizer(model_path=tokenizer_path) + + # load dataset splits + train_ds = WikiText103Dataset(split='test') + test_ds = WikiText103Dataset(split='valid') + + # create dataloader object and move to device + dl = OptimusDataLoader(train_ds, test_ds, tok, + bs=batch_size, + seq_len=seq_len, + device=device) + + # create model and move to device + model = OptimusTransformer(len(tok), + n_layers=n_layers, + dim=dim, + n_heads=n_heads, + p_drop=dropout, + weight_tying=False) + model = model.to(device) + + # define loss metric and optimizer + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-9) + + print("Starting training...") + + # create trainer and start fitting + trainer = Trainer(dl=dl, + model=model, + criterion=criterion, + optimizer=optimizer, + lr=lr_max, + grad_acc_steps=grad_acc_steps, + grad_clip_norm=grad_clip_norm, + model_save_path=checkpoints_path, + progress_bar=False) + trainer.fit(epochs) + + print("--- Testing results for Optimus Transfoer --- ") + print(f"Est. ms/batch : {trainer.ms_per_batch:.2f}\n") \ No newline at end of file -- GitLab