diff --git a/Dockerfile-13b-chat b/Dockerfile-13b-chat index f2d53ee1ade3e289f83d83bcffca3be8cccd8882..aad444c889fadc12d4546166e1772c648e15d2a7 100644 --- a/Dockerfile-13b-chat +++ b/Dockerfile-13b-chat @@ -38,5 +38,5 @@ CMD ["torchrun", \ "/llama/dialog.py", \ "--ckpt_dir", "/llama/llama-2-13b-chat/", \ "--tokenizer_path", "/llama/tokenizer.model", \ - "--max_seq_len", "4096", \ + "--max_seq_len", "2048", \ "--max_batch_size", "6"] diff --git a/Dockerfile-70b-chat b/Dockerfile-70b-chat index ab0c221e3051f7f1f9a595c11a7348847cad1794..225b2af113f21a8955bc4866c15e6af835d05bee 100644 --- a/Dockerfile-70b-chat +++ b/Dockerfile-70b-chat @@ -38,5 +38,5 @@ CMD ["torchrun", \ "/llama/dialog.py", \ "--ckpt_dir", "/llama/llama-2-70b-chat/", \ "--tokenizer_path", "/llama/tokenizer.model", \ - "--max_seq_len", "4096", \ + "--max_seq_len", "2048", \ "--max_batch_size", "6"] diff --git a/Dockerfile-7b-chat b/Dockerfile-7b-chat index 4793e9a3a4d2c028d65d1b9878e1dd8b47602abd..05467451c79ec7ae3d784c676e5185d045c8e4e0 100644 --- a/Dockerfile-7b-chat +++ b/Dockerfile-7b-chat @@ -38,5 +38,5 @@ CMD ["torchrun", \ "/llama/dialog.py", \ "--ckpt_dir", "/llama/llama-2-7b-chat/", \ "--tokenizer_path", "/llama/tokenizer.model", \ - "--max_seq_len", "4096", \ + "--max_seq_len", "2048", \ "--max_batch_size", "6"] diff --git a/README.md b/README.md index 50400c3adda16990da0ffd05f3dc8bd33210603b..720dcb7b1ca62c963c306b3f7bab5870a7f46ae6 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ command to start a new dialog. </strike> -Increased the context length of all images to 4096 tokens, therefore the cluster +Increased the context length of all images to 2048 tokens, therefore the cluster GPUs won't be able to run the images anymore. To make it work again on the fep cluster, you have to manually modify the context length inside a Dockerfile for one of the LLama 7B images (to something like 512), and build the image yourself.