Alexandru-Mihai GHERGHESCU · Alexandru-Mihai GHERGHESCU · b5975f1a · ecd075dc · 161ab0e9 · 536de844
--- a/scripts/memory_compute_estimations/compute_req.py

+ 6

− 6
+++ b/scripts/memory_compute_estimations/compute_req.py

+ 6

− 6
 @@ -31,17 +31,17 @@ GPU_PEAK_TFLOPS = 312
 EXPECTED_GPU_THROUGHPUT = 0.4

 # dataset size (in tokens)
-DATASET_SIZE = 10_000_000_000
+DATASET_SIZE = 2_000_000_000_000

 # expected available GPU's (to correctly assess, you probably want to increase
 # this in multiples of the number of GPU's needed for tensor and pipeline
-# parallelism; e.g. training a 70B requires at least 8x DGX clusters, each with
-# 8 GPU's; therefore, the base number of required GPU's to hold the model is 32;
-# data parallel adds, for each data parallel unit, another 32 GPU's, therefore
-# the number of available GPU's should be 32, 64, 96 etc. to get an accurate
+# parallelism; e.g. training a 70B requires at least 2x DGX clusters, each with
+# 8 GPU's; therefore, the base number of required GPU's to hold the model is 16;
+# data parallel adds, for each data parallel unit, another 16 GPU's, therefore
+# the number of available GPU's should be 16, 32, 48, 64 etc. to get an accurate
 # count; the base number of required GPU's is the output of the `memory_req.py`
 # script)
-EXPECTED_AVAILABLE_GPUS = 32
+EXPECTED_AVAILABLE_GPUS = 2048

 # -- END OF GLOBALS --