Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,34 @@ def format_text_output(analysis, verbose=False):

results_table.add_row("Verdict", verdict)

# Add supplementary AI indicators (new toolkit heuristics)
results_table.add_row("", "") # Separator
results_table.add_row("[dim]Supplementary Indicators:[/dim]", "")

# Transition smoothness
trans_color = "green" if analysis.transition_score < 0.3 else "yellow" if analysis.transition_score < 0.6 else "red"
results_table.add_row(
"Transition Smoothness",
f"[{trans_color}]{analysis.transition_rate:.2f}/sent (score: {analysis.transition_score:.2f})[/{trans_color}]"
)

# Average comparative clustering
if analysis.echo_scores:
avg_comparative = sum(s.comparative_cluster_score for s in analysis.echo_scores) / len(analysis.echo_scores)
comp_color = "green" if avg_comparative < 0.3 else "yellow" if avg_comparative < 0.6 else "red"
results_table.add_row(
"Comparative Clustering",
f"[{comp_color}]{avg_comparative:.2f} avg[/{comp_color}]"
)

# Average em-dash frequency
avg_em_dash = sum(s.em_dash_score for s in analysis.echo_scores) / len(analysis.echo_scores)
em_color = "green" if avg_em_dash < 0.3 else "yellow" if avg_em_dash < 0.6 else "red"
results_table.add_row(
"Em-dash Frequency",
f"[{em_color}]{avg_em_dash:.2f} avg[/{em_color}]"
)
Comment on lines +183 to +199
Copy link

Copilot AI Nov 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The transition smoothness indicator is displayed unconditionally (line 177-182), but the comparative clustering and em-dash indicators are only displayed when analysis.echo_scores is not empty (line 185). This creates inconsistent display behavior - if there are no echo_scores, the user will see "Supplementary Indicators:" followed by only the transition smoothness metric, which could be confusing. Consider either:

  1. Moving transition smoothness inside the if analysis.echo_scores: block for consistency, or
  2. Adding separate handling to show all three metrics with appropriate default/N/A values when echo_scores is empty.
Suggested change
# Average comparative clustering
if analysis.echo_scores:
avg_comparative = sum(s.comparative_cluster_score for s in analysis.echo_scores) / len(analysis.echo_scores)
comp_color = "green" if avg_comparative < 0.3 else "yellow" if avg_comparative < 0.6 else "red"
results_table.add_row(
"Comparative Clustering",
f"[{comp_color}]{avg_comparative:.2f} avg[/{comp_color}]"
)
# Average em-dash frequency
avg_em_dash = sum(s.em_dash_score for s in analysis.echo_scores) / len(analysis.echo_scores)
em_color = "green" if avg_em_dash < 0.3 else "yellow" if avg_em_dash < 0.6 else "red"
results_table.add_row(
"Em-dash Frequency",
f"[{em_color}]{avg_em_dash:.2f} avg[/{em_color}]"
)
# Average comparative clustering
if analysis.echo_scores:
avg_comparative = sum(s.comparative_cluster_score for s in analysis.echo_scores) / len(analysis.echo_scores)
comp_color = "green" if avg_comparative < 0.3 else "yellow" if avg_comparative < 0.6 else "red"
comp_value = f"[{comp_color}]{avg_comparative:.2f} avg[/{comp_color}]"
else:
comp_value = "[grey50]N/A[/grey50]"
results_table.add_row(
"Comparative Clustering",
comp_value
)
# Average em-dash frequency
if analysis.echo_scores:
avg_em_dash = sum(s.em_dash_score for s in analysis.echo_scores) / len(analysis.echo_scores)
em_color = "green" if avg_em_dash < 0.3 else "yellow" if avg_em_dash < 0.6 else "red"
em_value = f"[{em_color}]{avg_em_dash:.2f} avg[/{em_color}]"
else:
em_value = "[grey50]N/A[/grey50]"
results_table.add_row(
"Em-dash Frequency",
em_value
)

Copilot uses AI. Check for mistakes.

console.print(results_table)
console.print()

Expand All @@ -190,6 +218,8 @@ def format_text_output(analysis, verbose=False):
echo_table.add_column("Phonetic", justify="right")
echo_table.add_column("Structural", justify="right")
echo_table.add_column("Semantic", justify="right")
echo_table.add_column("Comparative", justify="right", style="cyan")
echo_table.add_column("Em-dash", justify="right", style="cyan")
echo_table.add_column("Combined", justify="right", style="bold")

for i, score in enumerate(analysis.echo_scores[:10], 1):
Expand All @@ -198,6 +228,8 @@ def format_text_output(analysis, verbose=False):
f"{score.phonetic_score:.3f}",
f"{score.structural_score:.3f}",
f"{score.semantic_score:.3f}",
f"{score.comparative_cluster_score:.3f}",
f"{score.em_dash_score:.3f}",
f"{score.combined_score:.3f}"
)

Expand Down Expand Up @@ -245,6 +277,10 @@ def format_json_output(analysis):
else "possibly_watermarked" if analysis.final_score > 0.5
else "likely_human"
),
"supplementary_indicators": {
"transition_rate": float(analysis.transition_rate),
"transition_score": float(analysis.transition_score),
},
"metadata": {
"clause_pairs": len(analysis.clause_pairs),
"echo_scores_count": len(analysis.echo_scores),
Expand All @@ -254,6 +290,9 @@ def format_json_output(analysis):
# Add echo scores if available
if analysis.echo_scores:
combined_scores = [float(s.combined_score) for s in analysis.echo_scores]
comparative_scores = [float(s.comparative_cluster_score) for s in analysis.echo_scores]
em_dash_scores = [float(s.em_dash_score) for s in analysis.echo_scores]

result["echo_scores"] = {
"mean": sum(combined_scores) / len(combined_scores),
"max": max(combined_scores),
Expand All @@ -263,11 +302,21 @@ def format_json_output(analysis):
"phonetic": float(s.phonetic_score),
"structural": float(s.structural_score),
"semantic": float(s.semantic_score),
"combined": float(s.combined_score)
"combined": float(s.combined_score),
"comparative_cluster": float(s.comparative_cluster_score),
"em_dash": float(s.em_dash_score)
}
for s in analysis.echo_scores[:10] # First 10 samples
]
}

# Add supplementary indicator averages
result["supplementary_indicators"]["avg_comparative_cluster"] = (
sum(comparative_scores) / len(comparative_scores)
)
result["supplementary_indicators"]["avg_em_dash"] = (
sum(em_dash_scores) / len(em_dash_scores)
)

return json.dumps(result, indent=2)

Expand Down
19 changes: 16 additions & 3 deletions specHO/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .clause_identifier.pipeline import ClauseIdentifier
from .echo_engine.pipeline import EchoAnalysisEngine
from .scoring.pipeline import ScoringModule
from .scoring.transition_analyzer import TransitionSmoothnessAnalyzer
from .validator.pipeline import StatisticalValidator


Expand Down Expand Up @@ -110,6 +111,9 @@ def __init__(self, baseline_path: str = "data/baseline/baseline_stats.pkl"):
self.echo_engine = EchoAnalysisEngine()
self.scoring_module = ScoringModule()
self.validator = StatisticalValidator(baseline_path)

# Initialize supplementary analyzer for transition smoothness
self.transition_analyzer = TransitionSmoothnessAnalyzer()

self.baseline_path = baseline_path

Expand Down Expand Up @@ -222,19 +226,26 @@ def analyze(self, text: str) -> DocumentAnalysis:
z_score, confidence = self.validator.validate(final_score)
logging.debug(f" → Z-score: {z_score:.4f}, Confidence: {confidence:.4f}")

# Stage 6: Supplementary Analysis - Transition Smoothness
logging.debug("Stage 6: Analyzing transition smoothness...")
_, _, transition_rate, transition_score = self.transition_analyzer.analyze_text(text)
logging.debug(f" → Transition rate: {transition_rate:.4f}, Score: {transition_score:.4f}")

# Package complete analysis
analysis = DocumentAnalysis(
text=text,
clause_pairs=clause_pairs,
echo_scores=echo_scores,
final_score=final_score,
z_score=z_score,
confidence=confidence
confidence=confidence,
transition_rate=transition_rate,
transition_score=transition_score
)

logging.info(
f"Analysis complete: score={final_score:.3f}, z={z_score:.2f}, "
f"conf={confidence:.1%}"
f"conf={confidence:.1%}, trans_rate={transition_rate:.2f}"
)

return analysis
Expand All @@ -260,7 +271,9 @@ def _create_empty_analysis(self, text: str) -> DocumentAnalysis:
echo_scores=[],
final_score=0.0,
z_score=0.0,
confidence=0.5 # Neutral confidence for empty input
confidence=0.5, # Neutral confidence for empty input
transition_rate=0.0,
transition_score=0.0
)

def get_pipeline_info(self) -> dict:
Expand Down
3 changes: 3 additions & 0 deletions specHO/echo_engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@
Echo Engine Component

Analyzes phonetic, structural, and semantic echoes between clause pairs.
Also includes supplementary AI watermark indicators (comparative clustering,
em-dash frequency).

Part of the SpecHO watermark detection system.
"""
171 changes: 171 additions & 0 deletions specHO/echo_engine/comparative_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
"""
Comparative Clustering Analyzer for SpecHO Watermark Detector.

Analyzes clustering of comparative terms within clause zones as an AI watermark
indicator. Based on toolkit analysis showing that AI (especially GPT-4) tends to
cluster multiple comparative terms in single sentences, creating a "harmonic
oscillation" pattern.

Part of Component 3: Echo Engine (Supplementary analyzer).
"""

from typing import List, Set
from specHO.models import Token


class ComparativeClusterAnalyzer:
"""
Analyzes comparative term clustering between clause zones.

AI-generated text often contains clusters of comparative and superlative
terms (less/more/shorter/longer/better/worse) that create a rhythmic pattern.
This analyzer detects such clustering as a watermark indicator.

Key AI Tell:
- 5+ comparatives in a sentence pair = EXTREME suspicion
- 3-4 comparatives = HIGH suspicion
- 2 comparatives = MODERATE suspicion
- 0-1 comparatives = LOW suspicion

Examples from toolkit analysis:
"learned less, invested less effort, wrote advice that was shorter,
less factual and more generic" → 5 comparatives (strong AI tell)
"""

# Comprehensive list of comparative terms typical in AI writing
COMPARATIVE_TERMS: Set[str] = {
# Basic comparatives
'less', 'more', 'fewer', 'greater',
'smaller', 'larger', 'shorter', 'longer',
'better', 'worse', 'deeper', 'shallower',
'higher', 'lower', 'stronger', 'weaker',
'faster', 'slower', 'easier', 'harder',

# Superlatives
'least', 'most', 'fewest', 'greatest',
'smallest', 'largest', 'shortest', 'longest',
'best', 'worst', 'deepest', 'shallowest',
'highest', 'lowest', 'strongest', 'weakest',
'fastest', 'slowest', 'easiest', 'hardest',

# Adjective comparatives (common in AI)
'simpler', 'clearer', 'broader', 'narrower',
'richer', 'poorer', 'newer', 'older',
'younger', 'earlier', 'later', 'nearer',
'farther', 'further', 'closer', 'tighter',
'looser', 'wider', 'thinner', 'thicker'
}

def analyze(self, zone_a: List[Token], zone_b: List[Token]) -> float:
"""
Calculate comparative clustering score for a clause pair.

Args:
zone_a: First zone (terminal content words from clause A)
zone_b: Second zone (initial content words from clause B)

Returns:
Float in [0, 1] representing comparative clustering intensity.
- 0.0-0.2: No/minimal clustering (0-1 comparatives)
- 0.2-0.4: Mild clustering (2 comparatives)
- 0.4-0.7: Moderate clustering (3 comparatives)
- 0.7-0.9: High clustering (4 comparatives)
- 0.9-1.0: Extreme clustering (5+ comparatives)

Algorithm:
1. Extract all tokens from both zones
2. Count comparative terms (case-insensitive)
3. Map count to [0,1] score using threshold function
4. Return clustering score
"""
if not zone_a and not zone_b:
return 0.0

# Extract all text tokens from both zones
all_tokens = zone_a + zone_b
token_texts = [token.text.lower() for token in all_tokens if token.text]

# Count comparative terms
comparative_count = sum(
1 for text in token_texts if text in self.COMPARATIVE_TERMS
)

# Map count to [0,1] score using threshold function
# Based on toolkit analysis thresholds
score = self._count_to_score(comparative_count)

return score

def _count_to_score(self, count: int) -> float:
"""
Convert comparative count to normalized score.

Scoring function based on toolkit analysis:
- 0-1 comparatives: 0.0-0.2 (minimal)
- 2 comparatives: 0.3 (mild)
- 3 comparatives: 0.5 (moderate)
- 4 comparatives: 0.8 (high)
- 5+ comparatives: 0.95-1.0 (extreme)

Args:
count: Number of comparative terms found

Returns:
Float in [0, 1]
"""
if count == 0:
return 0.0
elif count == 1:
return 0.15
elif count == 2:
return 0.3
elif count == 3:
return 0.5
elif count == 4:
return 0.8
elif count >= 5:
# Scale beyond 5: 0.95 for 5, approach 1.0 asymptotically
return min(1.0, 0.9 + (count - 5) * 0.02)

return 0.0
Copy link

Copilot AI Nov 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This return 0.0 statement is unreachable code because all possible integer values of count are handled by the if-elif chain above (count == 0, == 1, == 2, == 3, == 4, >= 5 covers all non-negative integers). This should be removed or replaced with an assertion/exception if the intent is to handle unexpected cases.

Suggested change
return 0.0
raise ValueError(f"Unexpected count value in _count_to_score: {count}")

Copilot uses AI. Check for mistakes.

def get_comparatives_in_zones(self, zone_a: List[Token], zone_b: List[Token]) -> List[str]:
"""
Get list of comparative terms found in the zones (for debugging/display).

Args:
zone_a: First zone
zone_b: Second zone

Returns:
List of comparative terms found (lowercase)

Example:
>>> analyzer = ComparativeClusterAnalyzer()
>>> comparatives = analyzer.get_comparatives_in_zones(zone_a, zone_b)
>>> print(comparatives)
['less', 'more', 'shorter', 'better']
"""
all_tokens = zone_a + zone_b
token_texts = [token.text.lower() for token in all_tokens if token.text]

found_comparatives = [
text for text in token_texts if text in self.COMPARATIVE_TERMS
]

return found_comparatives


def quick_comparative_analysis(zone_a: List[Token], zone_b: List[Token]) -> float:
"""
Convenience function for quick comparative clustering analysis.

Args:
zone_a: First zone
zone_b: Second zone

Returns:
Comparative clustering score in [0, 1]
"""
analyzer = ComparativeClusterAnalyzer()
return analyzer.analyze(zone_a, zone_b)
Loading
Loading