From 3bf17842a431da77a704fa84d25c33a062520520 Mon Sep 17 00:00:00 2001 From: cherry Date: Fri, 6 Feb 2026 17:41:30 +0800 Subject: [PATCH 1/2] support deepseek v3_2 megatron --- requirements/framework.txt | 2 +- swift/callbacks/perf_log.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index 874ae2bb46..5fe0c918c5 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -34,7 +34,7 @@ sortedcontainers>=1.5.9 tensorboard tiktoken tqdm -transformers>=4.33,<4.58 +transformers>=4.33 transformers_stream_generator trl>=0.15,<0.25 uvicorn diff --git a/swift/callbacks/perf_log.py b/swift/callbacks/perf_log.py index 6e0ea64c24..9c71f3967b 100644 --- a/swift/callbacks/perf_log.py +++ b/swift/callbacks/perf_log.py @@ -5,7 +5,7 @@ import torch from transformers import TrainerControl, TrainerState -from swift.utils import empty_cache, get_logger +from swift.utils import empty_cache, get_current_device, get_device_count, get_env_args, get_logger from .base import TrainerCallback if TYPE_CHECKING: @@ -43,7 +43,6 @@ def __init__(self, args: 'TrainingArguments', trainer: 'Trainer'): self.step_start_time = None def on_init_end(self, args: 'TrainingArguments', state: TrainerState, control: TrainerControl, **kwargs): - from swift.utils import get_current_device, get_device_count, get_env_args # Top priority. Specify by ENV tflops = get_env_args('DEVICE_TFLOPS', int, None) From c2ed79c8c5ea0ec444140232c86c4e92df36aaed Mon Sep 17 00:00:00 2001 From: cherry Date: Sat, 7 Feb 2026 12:14:12 +0800 Subject: [PATCH 2/2] fix template_type --- swift/template/register.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/template/register.py b/swift/template/register.py index 27e4fce332..9e58983f06 100644 --- a/swift/template/register.py +++ b/swift/template/register.py @@ -25,7 +25,7 @@ def _read_args_json_template_type(model_dir): return from swift.arguments import BaseArguments args = BaseArguments.from_pretrained(model_dir) - return args.template_type + return args.template def get_template_meta(model_info: 'ModelInfo',