Tomotz · Tomotz · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026
diff --git a/main.py b/main.py
@@ -462,14 +462,6 @@ def flush_batch():
     re.DOTALL | re.IGNORECASE
 )
 
-def _get_skip_ranges(content: str) -> List[Tuple[int, int]]:
-    return [(m.start(), m.end()) for m in SKIP_TAG_PATTERN.finditer(content)]
-
-def _in_skip_range(pos: int, skip_ranges: List[Tuple[int, int]]) -> bool:
-    for start, end in skip_ranges:
-        if start <= pos < end:
-            return True
-    return False
 
 def _decode_html_text(text: str) -> str:
     decoded = html_module.unescape(text)
@@ -539,8 +531,8 @@ def process_html_file(input_path: str, output_path: Optional[str], resume: bool
     with open(input_path, 'r', encoding='utf-8') as f:
         content = f.read()
 
-    skip_ranges = _get_skip_ranges(content)
-    matches = [m for m in PARAGRAPH_PATTERN.finditer(content) if not _in_skip_range(m.start(), skip_ranges)]
+    content = SKIP_TAG_PATTERN.sub('', content)
+    matches = list(PARAGRAPH_PATTERN.finditer(content))
     paragraph_count = len(matches)
 
     checkpoint_path = get_checkpoint_path(output_path) if output_path else None

diff --git a/test_main.py b/test_main.py
@@ -421,7 +421,8 @@ def test_non_paragraph_content_preserved(self, tmp_path):
         process_html_file(str(input_file), str(output_file))
         result = output_file.read_text(encoding="utf-8")
         assert "<h1>Title</h1>" in result
-        assert "<title>Test</title>" in result
+        assert "<head>" not in result
+        assert "<title>" not in result
 
     def test_empty_paragraph_no_crash(self, tmp_path):
         html = "<html><body><p></p><p>Real content here.</p></body></html>"