diff --git a/backend/utils/formatters.py b/backend/utils/formatters.py index c3998e1..f256c44 100644 --- a/backend/utils/formatters.py +++ b/backend/utils/formatters.py @@ -37,6 +37,30 @@ def format_result(diarized: list) -> list[dict]: return full_transcript +def format_timestamp(seconds_str: str) -> str: + """ + Format timestamp from seconds to MM:SS format. + + Args: + seconds_str: Time in seconds as string (e.g., "65.50") + + Returns: + Formatted timestamp in MM:SS format (e.g., "1:05") + + Example: + >>> format_timestamp("65.50") + '1:05' + >>> format_timestamp("5.25") + '0:05' + """ + try: + total_seconds = max(0, int(float(seconds_str))) + minutes, seconds = divmod(total_seconds, 60) + return f"{minutes}:{seconds:02d}" + except (ValueError, TypeError): + return "0:00" + + def format_speaker_name(speaker_name: str) -> str: """ Format speaker name from SPEAKER_XX format to 'Speaker X' format. diff --git a/backend/utils/markdown_generator.py b/backend/utils/markdown_generator.py index 6fc778c..8ca6f36 100644 --- a/backend/utils/markdown_generator.py +++ b/backend/utils/markdown_generator.py @@ -8,7 +8,7 @@ from io import BytesIO from config import Settings, get_settings -from utils.formatters import format_speaker_name +from utils.formatters import format_speaker_name, format_timestamp def generate_summary_markdown( @@ -58,9 +58,9 @@ def generate_summary_markdown( for entry in transcript_data: speaker = format_speaker_name(entry.get('speaker', 'Unknown Speaker')) text = entry.get('text', '') - start_time = entry.get('start', '0.00') - end_time = entry.get('end', '0.00') - markdown_content += f"**{speaker}** *({start_time}s - {end_time}s)*: {text}\n\n" + start_time = format_timestamp(entry.get('start', '0.00')) + end_time = format_timestamp(entry.get('end', '0.00')) + markdown_content += f"**{speaker}** *({start_time} - {end_time})*: {text}\n\n" # Return as BytesIO buffer return BytesIO(markdown_content.encode('utf-8')) @@ -107,9 +107,9 @@ def generate_transcript_markdown( for entry in transcript_data: speaker = format_speaker_name(entry.get('speaker', 'Unknown Speaker')) text = entry.get('text', '') - start_time = entry.get('start', '0.00') - end_time = entry.get('end', '0.00') - markdown_content += f"**{speaker}** *({start_time}s - {end_time}s)*: {text}\n\n" + start_time = format_timestamp(entry.get('start', '0.00')) + end_time = format_timestamp(entry.get('end', '0.00')) + markdown_content += f"**{speaker}** *({start_time} - {end_time})*: {text}\n\n" # Return as BytesIO buffer return BytesIO(markdown_content.encode('utf-8')) diff --git a/backend/utils/pdf_generator.py b/backend/utils/pdf_generator.py index 6bdddf0..efebf38 100644 --- a/backend/utils/pdf_generator.py +++ b/backend/utils/pdf_generator.py @@ -20,7 +20,7 @@ from svglib.svglib import svg2rlg from config import Settings, get_settings -from utils.formatters import format_speaker_name +from utils.formatters import format_speaker_name, format_timestamp def _process_markdown_text(text: str) -> str: @@ -308,10 +308,10 @@ def _add_transcript_section( for entry in transcript_data: speaker = format_speaker_name(entry.get('speaker', 'Unknown Speaker')) text = entry.get('text', '') - start_time = entry.get('start', '0.00') - end_time = entry.get('end', '0.00') + start_time = format_timestamp(entry.get('start', '0.00')) + end_time = format_timestamp(entry.get('end', '0.00')) - timestamp_text = f"[{start_time}s - {end_time}s]" + timestamp_text = f"[{start_time} - {end_time}]" speaker_line = f"{speaker} {timestamp_text}" story.append(Paragraph(speaker_line, styles['speaker'])) story.append(Paragraph(text, styles['transcript']))