diff --git a/.gitignore b/.gitignore index 77b781e7..279c281f 100644 --- a/.gitignore +++ b/.gitignore @@ -321,4 +321,6 @@ cython_debug/ /backend/.idea/* /backend/config/* /BiliNote_frontend/.idea/* -/BiliNote_frontend/src-tauri/bin/ \ No newline at end of file +/BiliNote_frontend/src-tauri/bin/ + +.vscode \ No newline at end of file diff --git a/BillNote_frontend/package.json b/BillNote_frontend/package.json index fb8bb74a..74653377 100644 --- a/BillNote_frontend/package.json +++ b/BillNote_frontend/package.json @@ -32,6 +32,7 @@ "clsx": "^2.1.1", "fuse.js": "^7.1.0", "github-markdown-css": "^5.8.1", + "jszip": "^3.10.1", "katex": "^0.16.22", "lottie-react": "^2.4.1", "lucide-react": "^0.487.0", diff --git a/BillNote_frontend/src/pages/HomePage/components/MarkmapComponent.tsx b/BillNote_frontend/src/pages/HomePage/components/MarkmapComponent.tsx index 0db81365..3e77addc 100644 --- a/BillNote_frontend/src/pages/HomePage/components/MarkmapComponent.tsx +++ b/BillNote_frontend/src/pages/HomePage/components/MarkmapComponent.tsx @@ -3,6 +3,7 @@ import { Markmap } from 'markmap-view' import { transformer } from '@/lib/markmap.ts' import { Toolbar } from 'markmap-toolbar' import 'markmap-toolbar/dist/style.css' +import JSZip from 'jszip' export interface MarkmapEditorProps { /** 要渲染的 Markdown 文本 */ @@ -116,12 +117,207 @@ export default function MarkmapEditor({ } }; + // 导出SVG思维导图(矢量图) + const exportSvg = async () => { + try { + if (!svgRef.current || !mmRef.current) return; + + const svgEl = svgRef.current; + const mm = mmRef.current; + + // 先调用fit()确保显示完整的思维导图内容 + await mm.fit(); + // 等待渲染完成 + await new Promise(resolve => setTimeout(resolve, 100)); + + // 克隆SVG以避免修改原始SVG + const clonedSvg = svgEl.cloneNode(true) as SVGSVGElement; + + // 获取SVG内容的实际边界框 + const gElement = svgEl.querySelector('g'); + if (gElement) { + const bbox = gElement.getBBox(); + // 添加一些边距 + const padding = 50; + const viewBoxX = bbox.x - padding; + const viewBoxY = bbox.y - padding; + const viewBoxWidth = bbox.width + padding * 2; + const viewBoxHeight = bbox.height + padding * 2; + + // 设置viewBox以确保SVG可以无限缩放 + clonedSvg.setAttribute('viewBox', `${viewBoxX} ${viewBoxY} ${viewBoxWidth} ${viewBoxHeight}`); + // 移除固定尺寸,让SVG根据viewBox自适应 + clonedSvg.removeAttribute('width'); + clonedSvg.removeAttribute('height'); + // 设置默认尺寸为100%,可以在任何容器中自适应 + clonedSvg.setAttribute('width', '100%'); + clonedSvg.setAttribute('height', '100%'); + // 保持宽高比 + clonedSvg.setAttribute('preserveAspectRatio', 'xMidYMid meet'); + } + + // 设置SVG的背景为白色 + const style = document.createElementNS('http://www.w3.org/2000/svg', 'style'); + style.textContent = 'svg { background-color: white; }'; + clonedSvg.insertBefore(style, clonedSvg.firstChild); + + // 添加白色背景矩形(确保背景在所有查看器中都是白色) + const bgRect = document.createElementNS('http://www.w3.org/2000/svg', 'rect'); + const viewBox = clonedSvg.getAttribute('viewBox')?.split(' ').map(Number) || [0, 0, 800, 600]; + bgRect.setAttribute('x', viewBox[0].toString()); + bgRect.setAttribute('y', viewBox[1].toString()); + bgRect.setAttribute('width', viewBox[2].toString()); + bgRect.setAttribute('height', viewBox[3].toString()); + bgRect.setAttribute('fill', 'white'); + // 插入到最前面作为背景 + const firstG = clonedSvg.querySelector('g'); + if (firstG) { + clonedSvg.insertBefore(bgRect, firstG); + } else { + clonedSvg.insertBefore(bgRect, clonedSvg.firstChild); + } + + // 确保SVG有正确的命名空间 + clonedSvg.setAttribute('xmlns', 'http://www.w3.org/2000/svg'); + clonedSvg.setAttribute('xmlns:xlink', 'http://www.w3.org/1999/xlink'); + + // 序列化SVG + const svgData = new XMLSerializer().serializeToString(clonedSvg); + + // 创建下载 + const blob = new Blob([svgData], { type: 'image/svg+xml;charset=utf-8' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `${title || 'mindmap'}.svg`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + } catch (error) { + console.error('导出SVG失败:', error); + } + }; + + // 导出XMind格式思维导图 + const exportXMind = async () => { + try { + const { root } = transformer.transform(value); + + // 生成唯一ID + const generateId = () => Math.random().toString(36).substring(2, 15); + + // 解码HTML实体(如 实 -> 实,〹 -> 对应字符) + const decodeHtmlEntities = (text: string): string => { + if (!text) return text; + + // 首先手动处理十六进制数字实体 HHHH; + let decoded = text.replace(/([0-9a-fA-F]+);/g, (_, hex) => { + return String.fromCodePoint(parseInt(hex, 16)); + }); + + // 处理十进制数字实体 DDDD; + decoded = decoded.replace(/(\d+);/g, (_, dec) => { + return String.fromCodePoint(parseInt(dec, 10)); + }); + + // 使用textarea处理命名实体(如 & < > 等) + const textarea = document.createElement('textarea'); + textarea.innerHTML = decoded; + return textarea.value; + }; + + // 清理HTML标签,只保留纯文本 + const stripHtml = (html: string): string => { + if (!html) return html; + // 先解码HTML实体 + let text = decodeHtmlEntities(html); + // 移除HTML标签 + const div = document.createElement('div'); + div.innerHTML = text; + return div.textContent || div.innerText || text; + }; + + // 将 markmap 节点转换为 XMind 节点格式 + const convertToXMindNode = (node: any, isRoot = false): any => { + const rawTitle = node.content || node.payload?.content || '未命名'; + const xmindNode: any = { + id: generateId(), + class: isRoot ? 'topic' : 'topic', + title: stripHtml(rawTitle), + }; + + if (node.children && node.children.length > 0) { + xmindNode.children = { + attached: node.children.map((child: any) => convertToXMindNode(child, false)) + }; + } + + return xmindNode; + }; + + const rootTopic = convertToXMindNode(root, true); + const sheetId = generateId(); + + // XMind content.json 结构 + const content = [{ + id: sheetId, + class: 'sheet', + title: stripHtml(title) || '思维导图', + rootTopic: rootTopic, + topicPositioning: 'fixed' + }]; + + // XMind metadata.json + const metadata = { + creator: { + name: 'BiliNote', + version: '1.0.0' + } + }; + + // XMind manifest.json + const manifest = { + 'file-entries': { + 'content.json': {}, + 'metadata.json': {} + } + }; + + // 使用 JSZip 创建 .xmind 文件 + // 直接传入字符串,JSZip会自动处理UTF-8编码 + const zip = new JSZip(); + zip.file('content.json', JSON.stringify(content, null, 2)); + zip.file('metadata.json', JSON.stringify(metadata, null, 2)); + zip.file('manifest.json', JSON.stringify(manifest, null, 2)); + + // 生成 ZIP 并下载 + const blob = await zip.generateAsync({ type: 'blob' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `${title || 'mindmap'}.xmind`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + } catch (error) { + console.error('导出XMind失败:', error); + } + }; + // 导出PNG思维导图 - const exportPng = () => { + const exportPng = async () => { try { - if (!svgRef.current) return; - + if (!svgRef.current || !mmRef.current) return; + const svgEl = svgRef.current; + const mm = mmRef.current; + + // 先调用fit()确保显示完整的思维导图内容 + await mm.fit(); + // 等待渲染完成 + await new Promise(resolve => setTimeout(resolve, 100)); // 获取SVG实际尺寸 const svgWidth = svgEl.width.baseVal.value || svgEl.clientWidth || 800; @@ -245,17 +441,31 @@ export default function MarkmapEditor({
+年会恢复更新以后放出最新社区地址
diff --git a/backend/app/downloaders/base.py b/backend/app/downloaders/base.py
index ca766f9c..a4dfb075 100644
--- a/backend/app/downloaders/base.py
+++ b/backend/app/downloaders/base.py
@@ -5,6 +5,7 @@
from app.enmus.note_enums import DownloadQuality
from app.models.notes_model import AudioDownloadResult
+from app.models.transcriber_model import TranscriptResult
from os import getenv
QUALITY_MAP = {
"fast": "32",
@@ -36,3 +37,15 @@ def download(self, video_url: str, output_dir: str = None,
def download_video(self, video_url: str,
output_dir: Union[str, None] = None) -> str:
pass
+
+ def download_subtitles(self, video_url: str, output_dir: str = None,
+ langs: list = None) -> Optional[TranscriptResult]:
+ '''
+ 尝试获取平台字幕(人工字幕或自动生成字幕)
+
+ :param video_url: 视频链接
+ :param output_dir: 输出路径
+ :param langs: 优先语言列表,如 ['zh-Hans', 'zh', 'en']
+ :return: TranscriptResult 或 None(无字幕时)
+ '''
+ return None
diff --git a/backend/app/downloaders/bilibili_downloader.py b/backend/app/downloaders/bilibili_downloader.py
index 1dcf91e3..2c23dc50 100644
--- a/backend/app/downloaders/bilibili_downloader.py
+++ b/backend/app/downloaders/bilibili_downloader.py
@@ -1,14 +1,23 @@
import os
+import json
+import logging
from abc import ABC
-from typing import Union, Optional
+from typing import Union, Optional, List
+from pathlib import Path
import yt_dlp
from app.downloaders.base import Downloader, DownloadQuality, QUALITY_MAP
from app.models.notes_model import AudioDownloadResult
+from app.models.transcriber_model import TranscriptResult, TranscriptSegment
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
+logger = logging.getLogger(__name__)
+
+# B站 cookies 文件路径
+BILIBILI_COOKIES_FILE = os.getenv("BILIBILI_COOKIES_FILE", "cookies.txt")
+
class BilibiliDownloader(Downloader, ABC):
def __init__(self):
@@ -111,4 +120,198 @@ def delete_video(self, video_path: str) -> str:
os.remove(video_path)
return f"视频文件已删除: {video_path}"
else:
- return f"视频文件未找到: {video_path}"
\ No newline at end of file
+ return f"视频文件未找到: {video_path}"
+
+ def download_subtitles(self, video_url: str, output_dir: str = None,
+ langs: List[str] = None) -> Optional[TranscriptResult]:
+ """
+ 尝试获取B站视频字幕
+
+ :param video_url: 视频链接
+ :param output_dir: 输出路径
+ :param langs: 优先语言列表
+ :return: TranscriptResult 或 None
+ """
+ if output_dir is None:
+ output_dir = get_data_dir()
+ if not output_dir:
+ output_dir = self.cache_data
+ os.makedirs(output_dir, exist_ok=True)
+
+ if langs is None:
+ langs = ['zh-Hans', 'zh', 'zh-CN', 'ai-zh', 'en', 'en-US']
+
+ video_id = extract_video_id(video_url, "bilibili")
+
+ ydl_opts = {
+ 'writesubtitles': True,
+ 'writeautomaticsub': True,
+ 'subtitleslangs': langs,
+ 'subtitlesformat': 'srt/json3/best', # 支持多种格式
+ 'skip_download': True,
+ 'outtmpl': os.path.join(output_dir, f'{video_id}.%(ext)s'),
+ 'quiet': True,
+ }
+
+ # 添加 cookies 支持
+ cookies_path = Path(BILIBILI_COOKIES_FILE)
+ if not cookies_path.is_absolute():
+ # 相对于 backend 目录
+ cookies_path = Path(__file__).parent.parent.parent / BILIBILI_COOKIES_FILE
+
+ if cookies_path.exists():
+ ydl_opts['cookiefile'] = str(cookies_path)
+ logger.info(f"使用 cookies 文件: {cookies_path}")
+ else:
+ logger.warning(f"B站 cookies 文件不存在: {cookies_path},字幕获取可能失败")
+
+ try:
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(video_url, download=True)
+
+ # 查找下载的字幕文件
+ subtitles = info.get('requested_subtitles') or {}
+ if not subtitles:
+ logger.info(f"B站视频 {video_id} 没有可用字幕")
+ return None
+
+ # 按优先级查找字幕
+ detected_lang = None
+ sub_info = None
+ for lang in langs:
+ if lang in subtitles:
+ detected_lang = lang
+ sub_info = subtitles[lang]
+ break
+
+ # 如果按优先级没找到,取第一个可用的(排除弹幕)
+ if not detected_lang:
+ for lang, info_item in subtitles.items():
+ if lang != 'danmaku': # 排除弹幕
+ detected_lang = lang
+ sub_info = info_item
+ break
+
+ if not sub_info:
+ logger.info(f"B站视频 {video_id} 没有可用字幕(排除弹幕)")
+ return None
+
+ # 检查是否有内嵌数据(yt-dlp 有时直接返回字幕内容)
+ if 'data' in sub_info and sub_info['data']:
+ logger.info(f"直接从返回数据解析字幕: {detected_lang}")
+ return self._parse_srt_content(sub_info['data'], detected_lang)
+
+ # 查找字幕文件
+ ext = sub_info.get('ext', 'srt')
+ subtitle_file = os.path.join(output_dir, f"{video_id}.{detected_lang}.{ext}")
+
+ if not os.path.exists(subtitle_file):
+ logger.info(f"字幕文件不存在: {subtitle_file}")
+ return None
+
+ # 根据格式解析字幕文件
+ if ext == 'json3':
+ return self._parse_json3_subtitle(subtitle_file, detected_lang)
+ else:
+ with open(subtitle_file, 'r', encoding='utf-8') as f:
+ return self._parse_srt_content(f.read(), detected_lang)
+
+ except Exception as e:
+ logger.warning(f"获取B站字幕失败: {e}")
+ return None
+
+ def _parse_srt_content(self, srt_content: str, language: str) -> Optional[TranscriptResult]:
+ """
+ 解析 SRT 格式字幕内容
+
+ :param srt_content: SRT 字幕文本内容
+ :param language: 语言代码
+ :return: TranscriptResult
+ """
+ import re
+ try:
+ segments = []
+ # SRT 格式: 序号\n时间戳\n文本\n\n
+ pattern = r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})\n(.*?)(?=\n\n|\n\d+\n|$)'
+ matches = re.findall(pattern, srt_content, re.DOTALL)
+
+ for match in matches:
+ idx, start_time, end_time, text = match
+ text = text.strip()
+ if not text:
+ continue
+
+ # 转换时间格式 00:00:00,000 -> 秒
+ def time_to_seconds(t):
+ parts = t.replace(',', '.').split(':')
+ return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
+
+ segments.append(TranscriptSegment(
+ start=time_to_seconds(start_time),
+ end=time_to_seconds(end_time),
+ text=text
+ ))
+
+ if not segments:
+ return None
+
+ full_text = ' '.join(seg.text for seg in segments)
+ logger.info(f"成功解析B站SRT字幕,共 {len(segments)} 段")
+ return TranscriptResult(
+ language=language,
+ full_text=full_text,
+ segments=segments,
+ raw={'source': 'bilibili_subtitle', 'format': 'srt'}
+ )
+
+ except Exception as e:
+ logger.warning(f"解析SRT字幕失败: {e}")
+ return None
+
+ def _parse_json3_subtitle(self, subtitle_file: str, language: str) -> Optional[TranscriptResult]:
+ """
+ 解析 json3 格式字幕文件
+
+ :param subtitle_file: 字幕文件路径
+ :param language: 语言代码
+ :return: TranscriptResult
+ """
+ try:
+ with open(subtitle_file, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+
+ segments = []
+ events = data.get('events', [])
+
+ for event in events:
+ # json3 格式中时间单位是毫秒
+ start_ms = event.get('tStartMs', 0)
+ duration_ms = event.get('dDurationMs', 0)
+
+ # 提取文本
+ segs = event.get('segs', [])
+ text = ''.join(seg.get('utf8', '') for seg in segs).strip()
+
+ if text: # 只添加非空文本
+ segments.append(TranscriptSegment(
+ start=start_ms / 1000.0,
+ end=(start_ms + duration_ms) / 1000.0,
+ text=text
+ ))
+
+ if not segments:
+ return None
+
+ full_text = ' '.join(seg.text for seg in segments)
+
+ logger.info(f"成功解析B站字幕,共 {len(segments)} 段")
+ return TranscriptResult(
+ language=language,
+ full_text=full_text,
+ segments=segments,
+ raw={'source': 'bilibili_subtitle', 'file': subtitle_file}
+ )
+
+ except Exception as e:
+ logger.warning(f"解析字幕文件失败: {e}")
+ return None
\ No newline at end of file
diff --git a/backend/app/downloaders/youtube_downloader.py b/backend/app/downloaders/youtube_downloader.py
index 34c6b043..2a081bb9 100644
--- a/backend/app/downloaders/youtube_downloader.py
+++ b/backend/app/downloaders/youtube_downloader.py
@@ -1,14 +1,19 @@
import os
+import json
+import logging
from abc import ABC
-from typing import Union, Optional
+from typing import Union, Optional, List
import yt_dlp
from app.downloaders.base import Downloader, DownloadQuality
from app.models.notes_model import AudioDownloadResult
+from app.models.transcriber_model import TranscriptResult, TranscriptSegment
from app.utils.path_helper import get_data_dir
from app.utils.url_parser import extract_video_id
+logger = logging.getLogger(__name__)
+
class YoutubeDownloader(Downloader, ABC):
def __init__(self):
@@ -92,3 +97,119 @@ def download_video(
raise FileNotFoundError(f"视频文件未找到: {video_path}")
return video_path
+
+ def download_subtitles(self, video_url: str, output_dir: str = None,
+ langs: List[str] = None) -> Optional[TranscriptResult]:
+ """
+ 尝试获取YouTube视频字幕(优先人工字幕,其次自动生成)
+
+ :param video_url: 视频链接
+ :param output_dir: 输出路径
+ :param langs: 优先语言列表
+ :return: TranscriptResult 或 None
+ """
+ if output_dir is None:
+ output_dir = get_data_dir()
+ if not output_dir:
+ output_dir = self.cache_data
+ os.makedirs(output_dir, exist_ok=True)
+
+ if langs is None:
+ langs = ['zh-Hans', 'zh', 'zh-CN', 'zh-TW', 'en', 'en-US']
+
+ video_id = extract_video_id(video_url, "youtube")
+
+ ydl_opts = {
+ 'writesubtitles': True,
+ 'writeautomaticsub': True,
+ 'subtitleslangs': langs,
+ 'subtitlesformat': 'json3',
+ 'skip_download': True,
+ 'outtmpl': os.path.join(output_dir, f'{video_id}.%(ext)s'),
+ 'quiet': True,
+ }
+
+ try:
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(video_url, download=True)
+
+ # 查找下载的字幕文件
+ subtitles = info.get('requested_subtitles') or {}
+ if not subtitles:
+ logger.info(f"YouTube视频 {video_id} 没有可用字幕")
+ return None
+
+ # 按优先级查找字幕文件
+ subtitle_file = None
+ detected_lang = None
+ for lang in langs:
+ if lang in subtitles:
+ subtitle_file = os.path.join(output_dir, f"{video_id}.{lang}.json3")
+ detected_lang = lang
+ break
+
+ # 如果按优先级没找到,取第一个可用的
+ if not subtitle_file:
+ for lang, sub_info in subtitles.items():
+ subtitle_file = os.path.join(output_dir, f"{video_id}.{lang}.json3")
+ detected_lang = lang
+ break
+
+ if not subtitle_file or not os.path.exists(subtitle_file):
+ logger.info(f"字幕文件不存在: {subtitle_file}")
+ return None
+
+ # 解析字幕文件
+ return self._parse_json3_subtitle(subtitle_file, detected_lang)
+
+ except Exception as e:
+ logger.warning(f"获取YouTube字幕失败: {e}")
+ return None
+
+ def _parse_json3_subtitle(self, subtitle_file: str, language: str) -> Optional[TranscriptResult]:
+ """
+ 解析 json3 格式字幕文件
+
+ :param subtitle_file: 字幕文件路径
+ :param language: 语言代码
+ :return: TranscriptResult
+ """
+ try:
+ with open(subtitle_file, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+
+ segments = []
+ events = data.get('events', [])
+
+ for event in events:
+ # json3 格式中时间单位是毫秒
+ start_ms = event.get('tStartMs', 0)
+ duration_ms = event.get('dDurationMs', 0)
+
+ # 提取文本
+ segs = event.get('segs', [])
+ text = ''.join(seg.get('utf8', '') for seg in segs).strip()
+
+ if text: # 只添加非空文本
+ segments.append(TranscriptSegment(
+ start=start_ms / 1000.0,
+ end=(start_ms + duration_ms) / 1000.0,
+ text=text
+ ))
+
+ if not segments:
+ return None
+
+ full_text = ' '.join(seg.text for seg in segments)
+
+ logger.info(f"成功解析YouTube字幕,共 {len(segments)} 段")
+ return TranscriptResult(
+ language=language,
+ full_text=full_text,
+ segments=segments,
+ raw={'source': 'youtube_subtitle', 'file': subtitle_file}
+ )
+
+ except Exception as e:
+ logger.warning(f"解析字幕文件失败: {e}")
+ return None
diff --git a/backend/app/services/note.py b/backend/app/services/note.py
index 1e71fce3..a5cc47d4 100644
--- a/backend/app/services/note.py
+++ b/backend/app/services/note.py
@@ -147,11 +147,15 @@ def generate(
grid_size=grid_size,
)
- # 2. 转写文字
- transcript = self._transcribe_audio(
+ # 2. 获取字幕/转写文字
+ # 优先尝试获取平台字幕,没有再 fallback 到音频转写
+ transcript = self._get_transcript(
+ downloader=downloader,
+ video_url=video_url,
audio_file=audio_meta.file_path,
transcript_cache_file=transcript_cache_file,
status_phase=TaskStatus.TRANSCRIBING,
+ task_id=task_id,
)
# 3. GPT 总结
@@ -400,6 +404,62 @@ def _download_media(
raise
+ def _get_transcript(
+ self,
+ downloader: Downloader,
+ video_url: str,
+ audio_file: str,
+ transcript_cache_file: Path,
+ status_phase: TaskStatus,
+ task_id: Optional[str] = None,
+ ) -> TranscriptResult | None:
+ """
+ 优先获取平台字幕,没有则 fallback 到音频转写
+
+ :param downloader: 下载器实例
+ :param video_url: 视频链接
+ :param audio_file: 音频文件路径(用于 fallback 转写)
+ :param transcript_cache_file: 缓存文件路径
+ :param status_phase: 状态枚举
+ :param task_id: 任务 ID
+ :return: TranscriptResult 对象
+ """
+ self._update_status(task_id, status_phase)
+
+ # 已有缓存,直接返回
+ if transcript_cache_file.exists():
+ logger.info(f"检测到转写缓存 ({transcript_cache_file}),尝试读取")
+ try:
+ data = json.loads(transcript_cache_file.read_text(encoding="utf-8"))
+ segments = [TranscriptSegment(**seg) for seg in data.get("segments", [])]
+ return TranscriptResult(language=data.get("language"), full_text=data["full_text"], segments=segments)
+ except Exception as e:
+ logger.warning(f"加载转写缓存失败,将重新获取:{e}")
+
+ # 1. 先尝试获取平台字幕
+ logger.info("尝试获取平台字幕...")
+ try:
+ transcript = downloader.download_subtitles(video_url)
+ if transcript and transcript.segments:
+ logger.info(f"成功获取平台字幕,共 {len(transcript.segments)} 段")
+ # 缓存结果
+ transcript_cache_file.write_text(
+ json.dumps(asdict(transcript), ensure_ascii=False, indent=2),
+ encoding="utf-8"
+ )
+ return transcript
+ else:
+ logger.info("平台无可用字幕,将使用音频转写")
+ except Exception as e:
+ logger.warning(f"获取平台字幕失败: {e},将使用音频转写")
+
+ # 2. Fallback 到音频转写
+ return self._transcribe_audio(
+ audio_file=audio_file,
+ transcript_cache_file=transcript_cache_file,
+ status_phase=status_phase,
+ )
+
def _transcribe_audio(
self,
audio_file: str,
diff --git a/backend/app/utils/note_helper.py b/backend/app/utils/note_helper.py
index 430c3a06..3621b9d4 100644
--- a/backend/app/utils/note_helper.py
+++ b/backend/app/utils/note_helper.py
@@ -18,7 +18,8 @@ def replacer(match):
total_seconds = int(mm) * 60 + int(ss)
if platform == 'bilibili':
- url = f"https://www.bilibili.com/video/{video_id}?t={total_seconds}"
+ video_id = video_id.replace("_p", "?p=")
+ url = f"https://www.bilibili.com/video/{video_id}&t={total_seconds}"
elif platform == 'youtube':
url = f"https://www.youtube.com/watch?v={video_id}&t={total_seconds}s"
elif platform == 'douyin':
diff --git a/backend/ffmpeg_helper.py b/backend/ffmpeg_helper.py
index b116aeeb..46a6af3c 100644
--- a/backend/ffmpeg_helper.py
+++ b/backend/ffmpeg_helper.py
@@ -11,16 +11,27 @@ def check_ffmpeg_exists() -> bool:
检查 ffmpeg 是否可用。优先使用 FFMPEG_BIN_PATH 环境变量指定的路径。
"""
ffmpeg_bin_path = os.getenv("FFMPEG_BIN_PATH")
- logger.info(f"FFMPEG_BIN_PATH: {ffmpeg_bin_path}")
- if ffmpeg_bin_path and os.path.isdir(ffmpeg_bin_path):
- os.environ["PATH"] = ffmpeg_bin_path + os.pathsep + os.environ.get("PATH", "")
- logger.info(f"ffmpeg 未配置路径,尝试使用系统路径PATH: {os.environ.get('PATH')}")
+
+ # 1. 如果配置了自定义路径,将其注入当前进程的 PATH 前缀
+ if ffmpeg_bin_path:
+ if os.path.isdir(ffmpeg_bin_path):
+ os.environ["PATH"] = ffmpeg_bin_path + os.pathsep + os.environ.get("PATH", "")
+ logger.info(f"已将 FFMPEG_BIN_PATH 加入搜索路径: {ffmpeg_bin_path}")
+ else:
+ logger.warning(f"配置的 FFMPEG_BIN_PATH 不是有效目录: {ffmpeg_bin_path}")
+
+ # 2. 直接尝试运行 ffmpeg
try:
- subprocess.run(["ffmpeg", "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
+ subprocess.run(
+ ["ffmpeg", "-version"],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL,
+ check=True
+ )
logger.info("ffmpeg 已安装")
return True
- except (FileNotFoundError, OSError, subprocess.CalledProcessError):
- logger.info("ffmpeg 未安装")
+ except (FileNotFoundError, subprocess.CalledProcessError):
+ logger.error("ffmpeg 未安装")
return False
@@ -36,4 +47,4 @@ def ensure_ffmpeg_or_raise():
"🪟 Windows 推荐:https://www.gyan.dev/ffmpeg/builds/\n"
"💡 如果你已安装,请将其路径写入 `.env` 文件,例如:\n"
"FFMPEG_BIN_PATH=/your/custom/ffmpeg/bin"
- )
\ No newline at end of file
+ )