diff --git a/launcher_scripts/conf/training/llama/llama3_1_405b.yaml b/launcher_scripts/conf/training/llama/llama3_1_405b.yaml index 587819c62..567348be0 100644 --- a/launcher_scripts/conf/training/llama/llama3_1_405b.yaml +++ b/launcher_scripts/conf/training/llama/llama3_1_405b.yaml @@ -89,6 +89,7 @@ model: openai_gelu: false normalize_attention_scores: true position_embedding_type: rope + rotary_base: 500000.0 rotary_percentage: 1.0 apply_rope_fusion: true attention_type: multihead diff --git a/launcher_scripts/conf/training/llama/llama3_1_70b.yaml b/launcher_scripts/conf/training/llama/llama3_1_70b.yaml index 1433d084d..4aa9d5005 100644 --- a/launcher_scripts/conf/training/llama/llama3_1_70b.yaml +++ b/launcher_scripts/conf/training/llama/llama3_1_70b.yaml @@ -90,6 +90,7 @@ model: normalize_attention_scores: true position_embedding_type: rope rotary_percentage: 1.0 + rotary_base: 500000.0 apply_rope_fusion: true attention_type: multihead share_embeddings_and_output_weights: false diff --git a/launcher_scripts/conf/training/llama/llama3_1_8b.yaml b/launcher_scripts/conf/training/llama/llama3_1_8b.yaml index 7edfadf9a..7c7f91308 100644 --- a/launcher_scripts/conf/training/llama/llama3_1_8b.yaml +++ b/launcher_scripts/conf/training/llama/llama3_1_8b.yaml @@ -90,6 +90,7 @@ model: normalize_attention_scores: true position_embedding_type: rope rotary_percentage: 1.0 + rotary_base: 500000.0 apply_rope_fusion: true cross_entropy_loss_fusion: true attention_type: multihead diff --git a/launcher_scripts/conf/training/llama/llama3_70b.yaml b/launcher_scripts/conf/training/llama/llama3_70b.yaml index c0169ef97..69988f0fc 100644 --- a/launcher_scripts/conf/training/llama/llama3_70b.yaml +++ b/launcher_scripts/conf/training/llama/llama3_70b.yaml @@ -89,6 +89,7 @@ model: openai_gelu: false normalize_attention_scores: true position_embedding_type: rope + rotary_base: 500000.0 rotary_percentage: 1.0 apply_rope_fusion: true attention_type: multihead diff --git a/launcher_scripts/conf/training/llama/llama3_8b.yaml b/launcher_scripts/conf/training/llama/llama3_8b.yaml index db01e0a09..584bd82db 100644 --- a/launcher_scripts/conf/training/llama/llama3_8b.yaml +++ b/launcher_scripts/conf/training/llama/llama3_8b.yaml @@ -89,6 +89,7 @@ model: openai_gelu: false normalize_attention_scores: true position_embedding_type: rope + rotary_base: 500000.0 rotary_percentage: 1.0 apply_rope_fusion: true cross_entropy_loss_fusion: true