diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4ba1fec2a6ba912a73595564662b55a7d145b589..a36670b7d0deaf73f4ff4c9c1642c8fe851c1d4b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,4 +6,8 @@ useGPU:
     stage: test_gpu
     script:
         - echo "Check whether we have enabled our GPU or not."
+        - pip install -r requirements.txt
         - nvidia-smi
+        - ls
+        - pwd
+        - export PYTHONPATH=$PYTHONPATH:.; pytest -s
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f047cda0c1edd3c55b3d81382bce8e41f2b449cc
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+sentencepiece
+pytest
+fastai
+fastprogress
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/benchmark/test_benchmark.py b/tests/benchmark/test_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f74122a6644e5d865b7ce949931b293e9526f20
--- /dev/null
+++ b/tests/benchmark/test_benchmark.py
@@ -0,0 +1,69 @@
+import unittest
+
+import torch
+from torch import nn
+
+from optimus.datasets import WikiText103Dataset
+from optimus.tokenizers import SentencePieceTokenizer
+from optimus.dataloader import OptimusDataLoader
+from optimus.models import OptimusTransformer
+from optimus.trainer import Trainer
+
+class BenchmarkTest(unittest.TestCase):
+
+    def test_ms_per_batch(self):
+        batch_size: int = 8
+        grad_acc_steps: int = 1
+        seq_len: int = 512
+        lr_max: float = 1e-4
+        grad_clip_norm: float = 1.0
+        epochs: int = 1
+        tokenizer_path: str = 'optimus16K-wikitext103.model'
+        checkpoints_path: str = 'best_model.pth'
+        dim: int = 512
+        n_layers: int = 6
+        n_heads: int = 8
+        dropout: float = 0.0
+        device: str = 'cuda'
+        
+        tok = SentencePieceTokenizer(model_path=tokenizer_path)
+
+        # load dataset splits
+        train_ds = WikiText103Dataset(split='test')
+        test_ds = WikiText103Dataset(split='valid')
+
+        # create dataloader object and move to device
+        dl = OptimusDataLoader(train_ds, test_ds, tok,
+                            bs=batch_size,
+                            seq_len=seq_len,
+                            device=device)
+        
+        # create model and move to device
+        model = OptimusTransformer(len(tok),
+                                n_layers=n_layers,
+                                dim=dim,
+                                n_heads=n_heads,
+                                p_drop=dropout,
+                                weight_tying=False)
+        model = model.to(device)
+
+        # define loss metric and optimizer
+        criterion = nn.CrossEntropyLoss()
+        optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-9)
+
+        print("Starting training...")
+
+        # create trainer and start fitting
+        trainer = Trainer(dl=dl,
+                        model=model,
+                        criterion=criterion,
+                        optimizer=optimizer,
+                        lr=lr_max,
+                        grad_acc_steps=grad_acc_steps,
+                        grad_clip_norm=grad_clip_norm,
+                        model_save_path=checkpoints_path,
+                        progress_bar=False)
+        trainer.fit(epochs)
+
+        print("--- Testing results for Optimus Transfoer --- ")
+        print(f"Est. ms/batch : {trainer.ms_per_batch:.2f}\n")
\ No newline at end of file