koala73 · princelevant · Feb 24, 2026 · Feb 24, 2026
diff --git a/src/App.ts b/src/App.ts
@@ -34,6 +34,7 @@ import { fetchClimateAnomalies } from '@/services/climate';
 import { enrichEventsWithExposure } from '@/services/population-exposure';
 import { buildMapUrl, debounce, loadFromStorage, parseMapUrlState, saveToStorage, ExportPanel, getCircuitBreakerCooldownInfo, isMobileDevice, setTheme, getCurrentTheme } from '@/utils';
 import { reverseGeocode } from '@/utils/reverse-geocode';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 import { CountryBriefPage } from '@/components/CountryBriefPage';
 import { maybeShowDownloadBanner } from '@/components/DownloadBanner';
 import { mountCommunityWidget } from '@/components/CommunityWidget';
@@ -3166,15 +3167,13 @@ export class App {
   }
 
   private findFlashLocation(title: string): { lat: number; lon: number } | null {
-    const titleLower = title.toLowerCase();
     let bestMatch: { lat: number; lon: number; matches: number } | null = null;
-
+    const tokens = tokenizeForMatch(title);
     const countKeywordMatches = (keywords: string[] | undefined): number => {
       if (!keywords) return 0;
       let matches = 0;
       for (const keyword of keywords) {
-        const cleaned = keyword.trim().toLowerCase();
-        if (cleaned.length >= 3 && titleLower.includes(cleaned)) {
+        if (matchKeyword(tokens, keyword)) {
           matches++;
         }
       }

diff --git a/src/components/DeckGLMap.ts b/src/components/DeckGLMap.ts
@@ -3,6 +3,7 @@
  * Uses deck.gl for high-performance rendering of large datasets
  * Mobile devices gracefully degrade to the D3/SVG-based Map component
  */
+import { tokenizeForMatch, matchKeyword, matchesAnyKeyword, findMatchingKeywords } from '@/utils/keyword-match';
 import { MapboxOverlay } from '@deck.gl/mapbox';
 import type { Layer, LayersList, PickingInfo } from '@deck.gl/core';
 import { GeoJsonLayer, ScatterplotLayer, PathLayer, IconLayer, TextLayer } from '@deck.gl/layers';
@@ -3382,10 +3383,9 @@ export class DeckGLMap {
     const matchCounts = new Map<string, number>();
 
     recentNews.forEach(item => {
+      const tokens = tokenizeForMatch(item.title);
       this.hotspots.forEach(hotspot => {
-        if (hotspot.keywords.some(kw =>
-          item.title.toLowerCase().includes(kw.toLowerCase())
-        )) {
+        if (matchesAnyKeyword(tokens, hotspot.keywords)) {
           breakingKeywords.add(hotspot.id);
           matchCounts.set(hotspot.id, (matchCounts.get(hotspot.id) || 0) + 1);
         }
@@ -3411,22 +3411,22 @@ export class DeckGLMap {
 
     return this.news
       .map((item) => {
-        const titleLower = item.title.toLowerCase();
-        const matchedKeywords = hotspot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
+        const tokens = tokenizeForMatch(item.title);
+        const matchedKeywords = findMatchingKeywords(tokens, hotspot.keywords);
 
         if (matchedKeywords.length === 0) return null;
 
         // Check if this news mentions other hotspot conflict topics
         const conflictMatches = conflictTopics.filter(t =>
-          titleLower.includes(t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
+          matchKeyword(tokens, t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
         );
 
         // If article mentions a major conflict topic that isn't this hotspot, deprioritize heavily
         if (conflictMatches.length > 0) {
           // Only include if it ALSO has a strong local keyword (city name, agency)
           const strongLocalMatch = matchedKeywords.some(kw =>
             kw.toLowerCase() === hotspot.name.toLowerCase() ||
-            hotspot.agencies?.some(a => titleLower.includes(a.toLowerCase()))
+            hotspot.agencies?.some(a => matchKeyword(tokens, a))
           );
           if (!strongLocalMatch) return null;
         }

diff --git a/src/components/Map.ts b/src/components/Map.ts
@@ -1,6 +1,7 @@
 import * as d3 from 'd3';
 import * as topojson from 'topojson-client';
 import { escapeHtml } from '@/utils/sanitize';
+import { tokenizeForMatch, matchKeyword, findMatchingKeywords } from '@/utils/keyword-match';
 import { getCSSColor } from '@/utils';
 import type { Topology, GeometryCollection } from 'topojson-specification';
 import type { Feature, Geometry } from 'geojson';
@@ -2737,22 +2738,22 @@ export class MapComponent {
 
     return this.news
       .map((item) => {
-        const titleLower = item.title.toLowerCase();
-        const matchedKeywords = hotspot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
+        const tokens = tokenizeForMatch(item.title);
+        const matchedKeywords = findMatchingKeywords(tokens, hotspot.keywords);
 
         if (matchedKeywords.length === 0) return null;
 
         // Check if this news mentions other hotspot conflict topics
         const conflictMatches = conflictTopics.filter(t =>
-          titleLower.includes(t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
+          matchKeyword(tokens, t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
         );
 
         // If article mentions a major conflict topic that isn't this hotspot, deprioritize heavily
         if (conflictMatches.length > 0) {
           // Only include if it ALSO has a strong local keyword (city name, agency)
           const strongLocalMatch = matchedKeywords.some(kw =>
             kw.toLowerCase() === hotspot.name.toLowerCase() ||
-            hotspot.agencies?.some(a => titleLower.includes(a.toLowerCase()))
+            hotspot.agencies?.some(a => matchKeyword(tokens, a))
           );
           if (!strongLocalMatch) return null;
         }
@@ -2776,8 +2777,8 @@ export class MapComponent {
       let matchedCount = 0;
 
       news.forEach((item) => {
-        const titleLower = item.title.toLowerCase();
-        const matches = spot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
+        const tokens = tokenizeForMatch(item.title);
+        const matches = findMatchingKeywords(tokens, spot.keywords);
 
         if (matches.length > 0) {
           matchedCount++;

diff --git a/src/config/geo.ts b/src/config/geo.ts
@@ -81,7 +81,7 @@ export const INTEL_HOTSPOTS: Hotspot[] = [
     lat: 38.9,
     lon: -77.0,
     location: 'Washington D.C., USA',
-    keywords: ['pentagon', 'white house', 'congress', 'cia', 'nsa', 'washington', 'biden', 'trump', 'house', 'senate', 'supreme court', 'vance', 'elon', 'us '],
+    keywords: ['pentagon', 'white house', 'congress', 'cia', 'nsa', 'washington', 'biden', 'trump', 'senate', 'supreme court', 'vance', 'elon'],
     agencies: ['Pentagon', 'CIA', 'NSA', 'State Dept'],
     description: 'US government and military headquarters. Intelligence community center.',
     status: 'Monitoring',
@@ -375,7 +375,7 @@ export const INTEL_HOTSPOTS: Hotspot[] = [
     lat: 33.5,
     lon: 36.3,
     location: 'Syria',
-    keywords: ['syria', 'damascus', 'assad', 'syrian', 'hts'],
+    keywords: ['syria', 'damascus', 'assad', 'syrian', 'hts', 'tahrir al-sham', 'hayat tahrir'],
     agencies: ['Syrian Govt', 'HTS', 'Russian Forces', 'Turkish Forces'],
     description: 'Syrian civil war aftermath. Multiple foreign interventions.',
     status: 'Monitoring',

diff --git a/src/services/country-instability.ts b/src/services/country-instability.ts
@@ -1,6 +1,7 @@
 import type { SocialUnrestEvent, MilitaryFlight, MilitaryVessel, ClusteredEvent, InternetOutage } from '@/types';
 import { INTEL_HOTSPOTS, CONFLICT_ZONES, STRATEGIC_WATERWAYS } from '@/config/geo';
 import { TIER1_COUNTRIES } from '@/config/countries';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 import { focalPointDetector } from './focal-point-detector';
 import type { ConflictEvent, UcdpConflictStatus, HapiConflictSummary } from './conflict';
 import type { CountryDisplacement } from '@/services/displacement';
@@ -196,12 +197,12 @@ export { COUNTRY_BOUNDS };
 export type { CountryData };
 
 function normalizeCountryName(name: string): string | null {
-  const lower = name.toLowerCase();
+  const tokens = tokenizeForMatch(name);
   for (const [code, keywords] of Object.entries(COUNTRY_KEYWORDS)) {
-    if (keywords.some(kw => lower.includes(kw))) return code;
+    if (keywords.some(kw => matchKeyword(tokens, kw))) return code;
   }
   for (const [code, countryName] of Object.entries(TIER1_COUNTRIES)) {
-    if (lower.includes(countryName.toLowerCase())) return code;
+    if (matchKeyword(tokens, countryName)) return code;
   }
   return null;
 }
@@ -455,10 +456,10 @@ export function ingestMilitaryForCII(flights: MilitaryFlight[], vessels: Militar
 
 export function ingestNewsForCII(events: ClusteredEvent[]): void {
   for (const e of events) {
-    const title = e.primaryTitle.toLowerCase();
+    const tokens = tokenizeForMatch(e.primaryTitle);
     for (const [code] of Object.entries(TIER1_COUNTRIES)) {
       const keywords = COUNTRY_KEYWORDS[code] || [];
-      if (keywords.some(kw => title.includes(kw))) {
+      if (keywords.some(kw => matchKeyword(tokens, kw))) {
         if (!countryDataMap.has(code)) countryDataMap.set(code, initCountryData());
         countryDataMap.get(code)!.newsEvents.push(e);
       }

diff --git a/src/services/entity-index.ts b/src/services/entity-index.ts
@@ -99,7 +99,6 @@ export function findEntitiesInText(text: string): EntityMatch[] {
   const index = getEntityIndex();
   const matches: EntityMatch[] = [];
   const seen = new Set<string>();
-  const textLower = text.toLowerCase();
 
   for (const [alias, entityId] of index.byAlias) {
     if (alias.length < 3) continue;
@@ -123,18 +122,19 @@ export function findEntitiesInText(text: string): EntityMatch[] {
 
   for (const [keyword, entityIds] of index.byKeyword) {
     if (keyword.length < 3) continue;
-    if (!textLower.includes(keyword)) continue;
+    const kwRegex = new RegExp(`\\b${escapeRegex(keyword)}\\b`, 'gi');
+    const kwMatch = kwRegex.exec(text);
+    if (!kwMatch) continue;
 
     for (const entityId of entityIds) {
       if (seen.has(entityId)) continue;
 
-      const pos = textLower.indexOf(keyword);
       matches.push({
         entityId,
         matchedText: keyword,
         matchType: 'keyword',
         confidence: 0.7,
-        position: pos,
+        position: kwMatch.index,
       });
       seen.add(entityId);
     }

diff --git a/src/services/geo-hub-index.ts b/src/services/geo-hub-index.ts
@@ -1,4 +1,5 @@
 // Geopolitical Hub Index - aggregates news by strategic locations
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 
 export interface GeoHubLocation {
   id: string;
@@ -109,20 +110,11 @@ export interface GeoHubMatch {
 export function inferGeoHubsFromTitle(title: string): GeoHubMatch[] {
   const index = buildGeoHubIndex();
   const matches: GeoHubMatch[] = [];
-  const titleLower = title.toLowerCase();
+  const tokens = tokenizeForMatch(title);
   const seenHubs = new Set<string>();
 
   for (const [keyword, hubIds] of index.byKeyword) {
-    if (keyword.length < 2) continue;
-
-    // Word boundary check for short keywords to avoid false positives
-    const regex = keyword.length < 5
-      ? new RegExp(`\\b${keyword}\\b`, 'i')
-      : null;
-
-    const found = regex
-      ? regex.test(titleLower)
-      : titleLower.includes(keyword);
+    const found = matchKeyword(tokens, keyword);
 
     if (found) {
       for (const hubId of hubIds) {

diff --git a/src/services/related-assets.ts b/src/services/related-assets.ts
@@ -1,5 +1,6 @@
 import type { ClusteredEvent, RelatedAsset, AssetType, RelatedAssetContext } from '@/types';
 import { t } from '@/services/i18n';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 import {
   INTEL_HOTSPOTS,
   CONFLICT_ZONES,
@@ -27,24 +28,20 @@ interface AssetOrigin {
   label: string;
 }
 
-function toTitleLower(titles: string[]): string[] {
-  return titles.map(title => title.toLowerCase());
-}
-
 function detectAssetTypes(titles: string[]): AssetType[] {
-  const normalized = toTitleLower(titles);
+  const tokenized = titles.map(t => tokenizeForMatch(t));
   const types = Object.entries(ASSET_KEYWORDS)
     .filter(([, keywords]) =>
-      normalized.some(title => keywords.some(keyword => title.includes(keyword)))
+      tokenized.some(tokens => keywords.some(keyword => matchKeyword(tokens, keyword)))
     )
     .map(([type]) => type as AssetType);
   return types;
 }
 
 function countKeywordMatches(titles: string[], keywords: string[]): number {
-  const normalized = toTitleLower(titles);
+  const tokenized = titles.map(t => tokenizeForMatch(t));
   return keywords.reduce((count, keyword) => {
-    return count + normalized.filter(title => title.includes(keyword)).length;
+    return count + tokenized.filter(tokens => matchKeyword(tokens, keyword)).length;
   }, 0);
 }
 

diff --git a/src/services/story-data.ts b/src/services/story-data.ts
@@ -1,6 +1,7 @@
 import { calculateCII, type CountryScore } from './country-instability';
 import type { ClusteredEvent } from '@/types';
 import type { ThreatLevel } from './threat-classifier';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 
 const COUNTRY_KEYWORDS: Record<string, string[]> = {
   US: ['united states', 'usa', 'america', 'washington', 'biden', 'trump', 'pentagon'],
@@ -87,8 +88,8 @@ export function collectStoryData(
 
   const keywords = COUNTRY_KEYWORDS[countryCode] || [countryName.toLowerCase()];
   const countryNews = allNews.filter(e => {
-    const lower = e.primaryTitle.toLowerCase();
-    return keywords.some(kw => lower.includes(kw));
+    const tokens = tokenizeForMatch(e.primaryTitle);
+    return keywords.some(kw => matchKeyword(tokens, kw));
   });
 
   const sortedNews = [...countryNews].sort((a, b) => {
@@ -104,8 +105,8 @@ export function collectStoryData(
   ) || null;
 
   const countryMarkets = predictionMarkets.filter(m => {
-    const lower = m.title.toLowerCase();
-    return keywords.some(kw => lower.includes(kw));
+    const tokens = tokenizeForMatch(m.title);
+    return keywords.some(kw => matchKeyword(tokens, kw));
   });
 
   const threatCounts = { critical: 0, high: 0, medium: 0, categories: new Set<string>() };

diff --git a/src/services/tech-hub-index.ts b/src/services/tech-hub-index.ts
@@ -1,6 +1,7 @@
 import { STARTUP_ECOSYSTEMS } from '@/config/startup-ecosystems';
 import { TECH_COMPANIES } from '@/config/tech-companies';
 import { STARTUP_HUBS } from '@/config/tech-geo';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 
 export interface TechHubLocation {
   id: string;
@@ -211,14 +212,14 @@ export interface HubMatch {
 export function inferHubsFromTitle(title: string): HubMatch[] {
   const index = buildTechHubIndex();
   const matches: HubMatch[] = [];
-  const titleLower = title.toLowerCase();
+  const titleTokens = tokenizeForMatch(title);
   const seenHubs = new Set<string>();
 
   // Check each keyword
   for (const [keyword, hubIds] of index.byKeyword) {
     if (keyword.length < 3) continue; // Skip very short keywords
 
-    if (titleLower.includes(keyword)) {
+    if (matchKeyword(titleTokens, keyword)) {
       for (const hubId of hubIds) {
         if (seenHubs.has(hubId)) continue;
         seenHubs.add(hubId);

diff --git a/src/utils/keyword-match.ts b/src/utils/keyword-match.ts
@@ -0,0 +1,64 @@
+/**
+ * Tokenization-based keyword matching for geo-tagging.
+ * Single source of truth — all geo/hotspot keyword matching imports from here.
+ *
+ * Uses Set-based lookups (O(1)) instead of regex to eliminate:
+ *  - Substring false positives ("assad" inside "ambassador")
+ *  - Per-keyword RegExp allocations in hot loops
+ *
+ * @see https://github.com/koala73/worldmonitor/issues/324
+ */
+
+export interface TokenizedTitle {
+  /** Unique lowercase words for O(1) single-word lookups */
+  words: Set<string>;
+  /** Ordered lowercase words for contiguous phrase matching */
+  ordered: string[];
+}
+
+/**
+ * Tokenize a title into lowercase words.
+ * Call once per title, reuse across all keyword checks.
+ */
+export function tokenizeForMatch(title: string): TokenizedTitle {
+  const ordered = title.toLowerCase().split(/[^a-z0-9'-]+/).filter(w => w.length > 0);
+  return { words: new Set(ordered), ordered };
+}
+
+/**
+ * Check if a single keyword matches within a tokenized title.
+ * - Single-word keywords: O(1) Set lookup
+ * - Multi-word keywords (e.g. "white house"): contiguous phrase search
+ */
+export function matchKeyword(tokens: TokenizedTitle, keyword: string): boolean {
+  const parts = keyword.toLowerCase().split(/\s+/).filter(w => w.length > 0);
+  if (parts.length === 0) return false;
+
+  if (parts.length === 1) {
+    return tokens.words.has(parts[0]!);
+  }
+
+  // Multi-word: find contiguous phrase in ordered tokens
+  const { ordered } = tokens;
+  for (let i = 0; i <= ordered.length - parts.length; i++) {
+    let match = true;
+    for (let j = 0; j < parts.length; j++) {
+      if (ordered[i + j] !== parts[j]) {
+        match = false;
+        break;
+      }
+    }
+    if (match) return true;
+  }
+  return false;
+}
+
+/** Check if any keyword in the list matches the tokenized title. */
+export function matchesAnyKeyword(tokens: TokenizedTitle, keywords: string[]): boolean {
+  return keywords.some(kw => matchKeyword(tokens, kw));
+}
+
+/** Return all keywords that match the tokenized title. */
+export function findMatchingKeywords(tokens: TokenizedTitle, keywords: string[]): string[] {
+  return keywords.filter(kw => matchKeyword(tokens, kw));
+}