diff --git a/Dockerfile-13b b/Dockerfile-13b
deleted file mode 100644
index 5267946337faaaace4f77de65d39e148109705d5..0000000000000000000000000000000000000000
--- a/Dockerfile-13b
+++ /dev/null
@@ -1,42 +0,0 @@
-# there's an extra step needed to install the Nvidia Container Toolkit, which
-# allows the docker containers to access the gpus outside; there's a guide for
-# ubuntu about that here:
-# https://saturncloud.io/blog/how-to-install-pytorch-on-the-gpu-with-docker/
-
-# before building, note that the weights and the script need to be in the
-# current folder
-
-# build image with: `docker build -t gitlab.cs.pub.ro/netsys/llama-images:llama-13b -f Dockerfile-13b .`
-# run image with: `docker run -it --gpus all gitlab.cs.pub.ro/netsys/llama-images:llama-13b`
-
-FROM condaforge/mambaforge
-
-# install stuff inside conda
-RUN mamba install -c pytorch -c nvidia pytorch torchvision torchaudio pytorch-cuda=11.8 -y && \
-    mamba install -c fastai fastai -y && \
-    mamba clean -afy
-
-# llama dependencies
-RUN pip install fairscale sentencepiece fire && \
-    pip cache purge
-
-# add the llama repo
-RUN git clone https://github.com/facebookresearch/llama /llama
-
-# add the tokenizer
-ADD tokenizer.model /llama/tokenizer.model
-
-# add the weights
-ADD llama-2-13b/ /llama/llama-2-13b/
-
-# add the dialog script
-ADD dialog.py /llama/dialog.py
-
-# run llama example program
-CMD ["torchrun", \
-     "--nproc_per_node", "2", \
-     "/llama/dialog.py", \
-     "--ckpt_dir", "/llama/llama-2-13b/", \
-     "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
-     "--max_batch_size", "6"]
diff --git a/Dockerfile-70b b/Dockerfile-70b
deleted file mode 100644
index f68e775fadd9e90ca0d39d3404bf83fd01917246..0000000000000000000000000000000000000000
--- a/Dockerfile-70b
+++ /dev/null
@@ -1,42 +0,0 @@
-# there's an extra step needed to install the Nvidia Container Toolkit, which
-# allows the docker containers to access the gpus outside; there's a guide for
-# ubuntu about that here:
-# https://saturncloud.io/blog/how-to-install-pytorch-on-the-gpu-with-docker/
-
-# before building, note that the weights and the script need to be in the
-# current folder
-
-# build image with: `docker build -t gitlab.cs.pub.ro/netsys/llama-images:llama-70b -f Dockerfile-70b .`
-# run image with: `docker run -it --gpus all gitlab.cs.pub.ro/netsys/llama-images:llama-70b`
-
-FROM condaforge/mambaforge
-
-# install stuff inside conda
-RUN mamba install -c pytorch -c nvidia pytorch torchvision torchaudio pytorch-cuda=11.8 -y && \
-    mamba install -c fastai fastai -y && \
-    mamba clean -afy
-
-# llama dependencies
-RUN pip install fairscale sentencepiece fire && \
-    pip cache purge
-
-# add the llama repo
-RUN git clone https://github.com/facebookresearch/llama /llama
-
-# add the tokenizer
-ADD tokenizer.model /llama/tokenizer.model
-
-# add the weights
-ADD llama-2-70b/ /llama/llama-2-70b/
-
-# add the dialog script
-ADD dialog.py /llama/dialog.py
-
-# run llama example program
-CMD ["torchrun", \
-     "--nproc_per_node", "8", \
-     "/llama/dialog.py", \
-     "--ckpt_dir", "/llama/llama-2-70b/", \
-     "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
-     "--max_batch_size", "6"]
diff --git a/Dockerfile-7b b/Dockerfile-7b
deleted file mode 100644
index 2ab8ebc4b2ff70ab5b2869a6629291151cae9354..0000000000000000000000000000000000000000
--- a/Dockerfile-7b
+++ /dev/null
@@ -1,42 +0,0 @@
-# there's an extra step needed to install the Nvidia Container Toolkit, which
-# allows the docker containers to access the gpus outside; there's a guide for
-# ubuntu about that here:
-# https://saturncloud.io/blog/how-to-install-pytorch-on-the-gpu-with-docker/
-
-# before building, note that the weights and the script need to be in the
-# current folder
-
-# build image with: `docker build -t gitlab.cs.pub.ro/netsys/llama-images:llama-7b -f Dockerfile-7b .`
-# run image with: `docker run -it --gpus all gitlab.cs.pub.ro/netsys/llama-images:llama-7b`
-
-FROM condaforge/mambaforge
-
-# install stuff inside conda
-RUN mamba install -c pytorch -c nvidia pytorch torchvision torchaudio pytorch-cuda=11.8 -y && \
-    mamba install -c fastai fastai -y && \
-    mamba clean -afy
-
-# llama dependencies
-RUN pip install fairscale sentencepiece fire && \
-    pip cache purge
-
-# add the llama repo
-RUN git clone https://github.com/facebookresearch/llama /llama
-
-# add the tokenizer
-ADD tokenizer.model /llama/tokenizer.model
-
-# add the weights
-ADD llama-2-7b/ /llama/llama-2-7b/
-
-# add the dialog script
-ADD dialog.py /llama/dialog.py
-
-# run llama example program
-CMD ["torchrun", \
-     "--nproc_per_node", "1", \
-     "/llama/dialog.py", \
-     "--ckpt_dir", "/llama/llama-2-7b/", \
-     "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
-     "--max_batch_size", "6"]