From f8eee05d4382362732a08294cf0d5d35edec8de8 Mon Sep 17 00:00:00 2001
From: Alexandru Gherghescu <gherghescu_alex1@yahoo.ro>
Date: Fri, 24 Nov 2023 20:36:36 +0200
Subject: [PATCH] Decrease context size to 2048, as otherwise the GPU goes OOM

---
 Dockerfile-13b-chat | 2 +-
 Dockerfile-70b-chat | 2 +-
 Dockerfile-7b-chat  | 2 +-
 README.md           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Dockerfile-13b-chat b/Dockerfile-13b-chat
index f2d53ee..aad444c 100644
--- a/Dockerfile-13b-chat
+++ b/Dockerfile-13b-chat
@@ -38,5 +38,5 @@ CMD ["torchrun", \
      "/llama/dialog.py", \
      "--ckpt_dir", "/llama/llama-2-13b-chat/", \
      "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
+     "--max_seq_len", "2048", \
      "--max_batch_size", "6"]
diff --git a/Dockerfile-70b-chat b/Dockerfile-70b-chat
index ab0c221..225b2af 100644
--- a/Dockerfile-70b-chat
+++ b/Dockerfile-70b-chat
@@ -38,5 +38,5 @@ CMD ["torchrun", \
      "/llama/dialog.py", \
      "--ckpt_dir", "/llama/llama-2-70b-chat/", \
      "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
+     "--max_seq_len", "2048", \
      "--max_batch_size", "6"]
diff --git a/Dockerfile-7b-chat b/Dockerfile-7b-chat
index 4793e9a..0546745 100644
--- a/Dockerfile-7b-chat
+++ b/Dockerfile-7b-chat
@@ -38,5 +38,5 @@ CMD ["torchrun", \
      "/llama/dialog.py", \
      "--ckpt_dir", "/llama/llama-2-7b-chat/", \
      "--tokenizer_path", "/llama/tokenizer.model", \
-     "--max_seq_len", "4096", \
+     "--max_seq_len", "2048", \
      "--max_batch_size", "6"]
diff --git a/README.md b/README.md
index 50400c3..720dcb7 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ command to start a new dialog.
 
 </strike>
 
-Increased the context length of all images to 4096 tokens, therefore the cluster
+Increased the context length of all images to 2048 tokens, therefore the cluster
 GPUs won't be able to run the images anymore. To make it work again on the fep
 cluster, you have to manually modify the context length inside a Dockerfile for
 one of the LLama 7B images (to something like 512), and build the image yourself.
-- 
GitLab