Update old information, increase context length to 4096

Upload Dockerfiles for normal and chat LLama2 variants.

Update old information, increase context length to 4096
Upload Dockerfiles for normal and chat LLama2 variants.
bb3c6528 · Alexandru-Mihai GHERGHESCU · 3eaa8cb4 · 3eaa8cb4 · 3eaa8cb4 · 3eaa8cb4
Unverified Commit bb3c6528 authored 1 year ago by Alexandru-Mihai GHERGHESCU
--- a/Dockerfile-13b
+++ b/Dockerfile-13b
-# there's an extra step needed to install the Nvidia Container Toolkit, which
-# allows the docker containers to access the gpus outside; there's a guide for
-# ubuntu about that here:
-# https://saturncloud.io/blog/how-to-install-pytorch-on-the-gpu-with-docker/
-
-# before building, note that the weights and the script need to be in the
-# current folder
-
-# build image with: `docker build -t gitlab.cs.pub.ro/netsys/llama-images:llama-13b -f Dockerfile-13b .`
-# run image with: `docker run -it --gpus all gitlab.cs.pub.ro/netsys/llama-images:llama-13b`
-
-FROM condaforge/mambaforge
-
-# install stuff inside conda
-RUN mamba install -c pytorch -c nvidia pytorch torchvision torchaudio pytorch-cuda=11.8 -y && \
-    mamba install -c fastai fastai -y && \
-    mamba clean -afy
-
-# llama dependencies
-RUN pip install fairscale sentencepiece fire && \
-    pip cache purge
-
-# add the llama repo
-RUN git clone https://github.com/facebookresearch/llama /llama
-
-# add the tokenizer
-ADD tokenizer.model /llama/tokenizer.model
-
-# add the weights
-ADD llama-2-13b/ /llama/llama-2-13b/
-
-# add the dialog script
-ADD dialog.py /llama/dialog.py
-
-# run llama example program
-CMD ["torchrun", \
-     "--nproc_per_node", "2", \
-     "/llama/dialog.py", \
-     "--ckpt_dir", "/llama/llama-2-13b/", \
-     "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
-     "--max_batch_size", "6"]
--- a/Dockerfile-70b
+++ b/Dockerfile-70b
-# there's an extra step needed to install the Nvidia Container Toolkit, which
-# allows the docker containers to access the gpus outside; there's a guide for
-# ubuntu about that here:
-# https://saturncloud.io/blog/how-to-install-pytorch-on-the-gpu-with-docker/
-
-# before building, note that the weights and the script need to be in the
-# current folder
-
-# build image with: `docker build -t gitlab.cs.pub.ro/netsys/llama-images:llama-70b -f Dockerfile-70b .`
-# run image with: `docker run -it --gpus all gitlab.cs.pub.ro/netsys/llama-images:llama-70b`
-
-FROM condaforge/mambaforge
-
-# install stuff inside conda
-RUN mamba install -c pytorch -c nvidia pytorch torchvision torchaudio pytorch-cuda=11.8 -y && \
-    mamba install -c fastai fastai -y && \
-    mamba clean -afy
-
-# llama dependencies
-RUN pip install fairscale sentencepiece fire && \
-    pip cache purge
-
-# add the llama repo
-RUN git clone https://github.com/facebookresearch/llama /llama
-
-# add the tokenizer
-ADD tokenizer.model /llama/tokenizer.model
-
-# add the weights
-ADD llama-2-70b/ /llama/llama-2-70b/
-
-# add the dialog script
-ADD dialog.py /llama/dialog.py
-
-# run llama example program
-CMD ["torchrun", \
-     "--nproc_per_node", "8", \
-     "/llama/dialog.py", \
-     "--ckpt_dir", "/llama/llama-2-70b/", \
-     "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
-     "--max_batch_size", "6"]
--- a/Dockerfile-7b
+++ b/Dockerfile-7b
-# there's an extra step needed to install the Nvidia Container Toolkit, which
-# allows the docker containers to access the gpus outside; there's a guide for
-# ubuntu about that here:
-# https://saturncloud.io/blog/how-to-install-pytorch-on-the-gpu-with-docker/
-
-# before building, note that the weights and the script need to be in the
-# current folder
-
-# build image with: `docker build -t gitlab.cs.pub.ro/netsys/llama-images:llama-7b -f Dockerfile-7b .`
-# run image with: `docker run -it --gpus all gitlab.cs.pub.ro/netsys/llama-images:llama-7b`
-
-FROM condaforge/mambaforge
-
-# install stuff inside conda
-RUN mamba install -c pytorch -c nvidia pytorch torchvision torchaudio pytorch-cuda=11.8 -y && \
-    mamba install -c fastai fastai -y && \
-    mamba clean -afy
-
-# llama dependencies
-RUN pip install fairscale sentencepiece fire && \
-    pip cache purge
-
-# add the llama repo
-RUN git clone https://github.com/facebookresearch/llama /llama
-
-# add the tokenizer
-ADD tokenizer.model /llama/tokenizer.model
-
-# add the weights
-ADD llama-2-7b/ /llama/llama-2-7b/
-
-# add the dialog script
-ADD dialog.py /llama/dialog.py
-
-# run llama example program
-CMD ["torchrun", \
-     "--nproc_per_node", "1", \
-     "/llama/dialog.py", \
-     "--ckpt_dir", "/llama/llama-2-7b/", \
-     "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
-     "--max_batch_size", "6"]