diff --git a/CHANGELOG.md b/CHANGELOG.md index 71308a2..018baff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,24 @@ Please, document here only changes visible to the client app. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.8.0] - 2026-02-11 + +### [23 Added Strava Activity Signals Extraction Package](https://github.com/mrbalov/pace/issues/23) + +### Added +- New `@pace/strava-activity-signals` package for extracting semantic signals from Strava activity data +- Activity validation module to ensure data integrity before processing +- Intensity classification based on activity pace (Easy, Moderate, Hard, Threshold, Max Effort) +- Elevation classification based on total elevation gain (Flat, Rolling, Hilly, Mountainous) +- Time of day signal extraction from activity timestamps (Early Morning, Morning, Midday, Afternoon, Evening, Night) +- Tag extraction and normalization from activity metadata +- Semantic context extraction from activity name and description using NER techniques +- Forbidden content checking to filter inappropriate language +- Pace calculation utility converting speed to seconds per kilometer +- Text sanitization utility for cleaning and normalizing user input +- Comprehensive signal validation with sanitization fallbacks +- Full test coverage for all signal extraction modules (3689 lines of tests and implementation) + ## [1.7.0] - 2026-02-10 ### [28 Introduced Test-Driven Development (TDD) Enforcement and Enhanced Development Workflow](https://github.com/mrbalov/pace/issues/28) diff --git a/package.json b/package.json index 7941cc8..2b6a670 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pace", - "version": "1.7.0", + "version": "1.8.0", "description": "Generates AI images based on Strava activity data.", "type": "module", "private": true, diff --git a/packages/strava-activity-signals/check-forbidden-content/check-forbidden-content.test.ts b/packages/strava-activity-signals/check-forbidden-content/check-forbidden-content.test.ts new file mode 100644 index 0000000..36e9093 --- /dev/null +++ b/packages/strava-activity-signals/check-forbidden-content/check-forbidden-content.test.ts @@ -0,0 +1,161 @@ +import { describe, test, expect } from 'bun:test'; + +import checkForbiddenContent from './check-forbidden-content'; + +type Case = [string, string, boolean]; + +describe('check-forbidden-content', () => { + describe('detects person-related forbidden content', () => { + test.each([ + ['detects person keyword', 'A person running', true], + ['detects people keyword', 'Many people at the park', true], + ['detects individual keyword', 'An individual athlete', true], + ['detects human keyword', 'Human performance', true], + ['detects man keyword', 'A man running', true], + ['detects woman keyword', 'Woman jogging', true], + ['detects child keyword', 'Child playing', true], + ['detects kid keyword', 'Kid running around', true], + ['detects baby keyword', 'Baby in stroller', true], + ['detects face keyword', 'Face in the photo', true], + ['detects portrait keyword', 'Portrait photography', true], + ['detects photo keyword', 'Photo of the run', true], + ['detects picture keyword', 'Picture perfect day', true], + ['detects image keyword', 'Image of runner', true], + ])('%#. %s', (_name, text, expected) => { + const result = checkForbiddenContent(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('detects political forbidden content', () => { + test.each([ + ['detects political keyword', 'Political rally', true], + ['detects politics keyword', 'Politics discussion', true], + ['detects government keyword', 'Government building', true], + ['detects president keyword', 'President election', true], + ['detects election keyword', 'Election day run', true], + ['detects vote keyword', 'Vote for change', true], + ['detects democracy keyword', 'Democracy march', true], + ['detects republican keyword', 'Republican event', true], + ['detects democrat keyword', 'Democrat gathering', true], + ['detects flag keyword', 'Flag ceremony', true], + ['detects banner keyword', 'Banner display', true], + ['detects symbol keyword', 'Symbol of freedom', true], + ['detects emblem keyword', 'Emblem on shirt', true], + ['detects crest keyword', 'Family crest', true], + ])('%#. %s', (_name, text, expected) => { + const result = checkForbiddenContent(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('detects violence forbidden content', () => { + test.each([ + ['detects violence keyword', 'Violence in the streets', true], + ['detects violent keyword', 'Violent storm', true], + ['detects fight keyword', 'Fight for victory', true], + ['detects war keyword', 'War memorial', true], + ['detects battle keyword', 'Battle training', true], + ['detects weapon keyword', 'Weapon training', true], + ['detects gun keyword', 'Starting gun', true], + ['detects knife keyword', 'Knife edge ridge', true], + ['detects sword keyword', 'Sword monument', true], + ['detects attack keyword', 'Attack the hill', true], + ['detects kill keyword', 'Kill the workout', true], + ['detects death keyword', 'Death valley run', true], + ['detects blood keyword', 'Blood donation', true], + ['detects combat keyword', 'Combat training', true], + ['detects military keyword', 'Military base', true], + ['detects soldier keyword', 'Soldier field', true], + ['detects army keyword', 'Army run', true], + ['detects navy keyword', 'Navy pier', true], + ])('%#. %s', (_name, text, expected) => { + const result = checkForbiddenContent(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('detects sexual content forbidden content', () => { + test.each([ + ['detects sexual keyword', 'Sexual content warning', true], + ['detects sex keyword', 'Sex education', true], + ['detects nude keyword', 'Nude beach', true], + ['detects naked keyword', 'Naked truth', true], + ['detects explicit keyword', 'Explicit content', true], + ['detects adult keyword', 'Adult supervision', true], + ['detects porn keyword', 'Porn website', true], + ])('%#. %s', (_name, text, expected) => { + const result = checkForbiddenContent(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('detects typography forbidden content', () => { + test.each([ + ['detects text keyword', 'Text message', true], + ['detects word keyword', 'Word of the day', true], + ['detects letter keyword', 'Letter of recommendation', true], + ['detects alphabet keyword', 'Alphabet song', true], + ['detects typography keyword', 'Typography design', true], + ['detects caption keyword', 'Caption this photo', true], + ['detects label keyword', 'Label the items', true], + ['detects title keyword', 'Title of the run', true], + ['detects heading keyword', 'Heading north', true], + ['detects font keyword', 'Font selection', true], + ['detects type keyword', 'Type of workout', true], + ['detects write keyword', 'Write a review', true], + ['detects print keyword', 'Print the results', true], + ['detects display keyword', 'Display on screen', true], + ['detects show keyword', 'Show the data', true], + ['detects say keyword', 'Say hello', true], + ['detects tell keyword', 'Tell a story', true], + ['detects read keyword', 'Read the instructions', true], + ])('%#. %s', (_name, text, expected) => { + const result = checkForbiddenContent(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles safe content correctly', () => { + test.each([ + ['allows safe running text', 'Morning trail run', false], + ['allows safe location text', 'Running through the park', false], + ['allows safe activity text', 'Easy recovery jog', false], + ['allows safe weather text', 'Sunny morning', false], + ['allows safe terrain text', 'Mountain trail', false], + ['allows safe distance text', '10k run', false], + ['allows safe time text', 'Early morning workout', false], + ['allows safe pace text', 'Quick tempo run', false], + ['allows safe gear text', 'New running shoes', false], + ['allows safe feeling text', 'Feeling strong', false], + ])('%#. %s', (_name, text, expected) => { + const result = checkForbiddenContent(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles edge cases correctly', () => { + test.each([ + ['handles empty string', '', false], + ['handles whitespace only', ' ', false], + ['handles uppercase forbidden keyword', 'PEOPLE running', true], + ['handles mixed case forbidden keyword', 'PeOpLe running', true], + ['handles forbidden keyword at start', 'Government building run', true], + ['handles forbidden keyword at end', 'Running with people', true], + ['handles forbidden keyword in middle', 'Great people filled event', true], + ['handles multiple forbidden keywords', 'Government people with weapons', true], + ['handles partial word match that should not trigger', 'Manhattan beach run', false], + ['handles special characters', '!@#$%^&*()', false], + ])('%#. %s', (_name, text, expected) => { + const result = checkForbiddenContent(text); + + expect(result).toStrictEqual(expected); + }); + }); +}); diff --git a/packages/strava-activity-signals/check-forbidden-content/check-forbidden-content.ts b/packages/strava-activity-signals/check-forbidden-content/check-forbidden-content.ts new file mode 100644 index 0000000..292ead9 --- /dev/null +++ b/packages/strava-activity-signals/check-forbidden-content/check-forbidden-content.ts @@ -0,0 +1,22 @@ +import { PATTERNS } from './constants'; + +/** + * Checks if text contains forbidden content patterns. + * + * Forbidden content includes: + * - Real persons or identifiable individuals + * - Political or ideological symbols + * - Explicit violence or sexual content + * - Military or combat scenes + * - Text/captions/typography instructions + * + * @param {string} text - Text to check for forbidden content. + * @returns {boolean} True if forbidden content detected, false otherwise. + */ +const checkForbiddenContent = (text: string): boolean => { + const lowerText = text.toLowerCase(); + + return PATTERNS.some((pattern) => pattern.test(lowerText)); +}; + +export default checkForbiddenContent; diff --git a/packages/strava-activity-signals/check-forbidden-content/constants.ts b/packages/strava-activity-signals/check-forbidden-content/constants.ts new file mode 100644 index 0000000..f113d4d --- /dev/null +++ b/packages/strava-activity-signals/check-forbidden-content/constants.ts @@ -0,0 +1,44 @@ +/** + * Patterns for real persons/identifiable individuals. + */ +export const PERSON_PATTERNS = [ + /\b(person|people|individual|human|man|woman|child|kid|baby)\b/, + /\b(face|portrait|photo|picture|image|photo)\b/, +]; + +/** + * Patterns for political/ideological symbols. + */ +export const POLITICAL_PATTERNS = [ + /\b(political|politics|government|president|election|vote|democracy|republican|democrat)\b/, + /\b(flag|banner|symbol|emblem|crest)\b/, +]; + +/** + * Patterns for violence. + */ +export const VIOLENCE_PATTERNS = [ + /\b(violence|violent|fight|war|battle|weapon|gun|knife|sword|attack|kill|death|blood)\b/, + /\b(combat|military|soldier|army|navy|air force)\b/, +]; + +/** + * Patterns for sexual content. + */ +export const SEXUAL_PATTERNS = [/\b(sexual|sex|nude|naked|explicit|adult|porn)\b/]; + +/** + * Patterns for text/typography instructions. + */ +export const TEXT_PATTERNS = [ + /\b(text|word|letter|alphabet|typography|caption|label|title|heading|font|type)\b/, + /\b(write|print|display|show|say|tell|read)\b/, +]; + +export const PATTERNS = [ + ...PERSON_PATTERNS, + ...POLITICAL_PATTERNS, + ...VIOLENCE_PATTERNS, + ...SEXUAL_PATTERNS, + ...TEXT_PATTERNS, +]; diff --git a/packages/strava-activity-signals/check-forbidden-content/index.ts b/packages/strava-activity-signals/check-forbidden-content/index.ts new file mode 100644 index 0000000..85ce96a --- /dev/null +++ b/packages/strava-activity-signals/check-forbidden-content/index.ts @@ -0,0 +1 @@ +export { default } from './check-forbidden-content'; diff --git a/packages/strava-activity-signals/constants.ts b/packages/strava-activity-signals/constants.ts new file mode 100644 index 0000000..161f7df --- /dev/null +++ b/packages/strava-activity-signals/constants.ts @@ -0,0 +1,53 @@ +export const INTENSITIES = ['low', 'medium', 'high'] as const; + +export const ELEVATIONS = ['flat', 'rolling', 'mountainous'] as const; + +export const TIMES_OF_DAY = ['morning', 'day', 'evening', 'night'] as const; + +/** + * World record pace is around 2:30 min/km, + * so anything faster than 2:00 min/km is suspicious. + */ +export const MAX_PACE = 120 as const; + +/** + * Classification thresholds and constants for Strava activity signals. + * Defines thresholds for classifying activity intensity, elevation, and time of day. + * Used across classification and validation logic to ensure consistency. + */ +export const CLASSIFICATIONS = { + /** Intensity classification thresholds. */ + INTENSITY: { + /** + * Low intensity threshold for pace (seconds per km). + * 6:00 min/km. + */ + LOW_PACE_THRESHOLD: 360, + + /** + * High intensity threshold for pace (seconds per km). + * 4:00 min/km. + */ + HIGH_PACE_THRESHOLD: 240, + }, + + /** Elevation classification thresholds (meters). */ + ELEVATION: { + /** Flat terrain threshold. */ + FLAT_THRESHOLD: 50, + /** Rolling terrain threshold. */ + ROLLING_THRESHOLD: 500, + }, + + /** Time of day classification. */ + TIME_OF_DAY: { + /** Morning start hour (0-23). */ + MORNING_START: 5, + /** Morning end hour (0-23). */ + MORNING_END: 10, + /** Evening start hour (0-23). */ + EVENING_START: 17, + /** Night start hour (0-23). */ + NIGHT_START: 20, + }, +}; diff --git a/packages/strava-activity-signals/get-pace-seconds-per-km/get-pace-seconds-per-km.test.ts b/packages/strava-activity-signals/get-pace-seconds-per-km/get-pace-seconds-per-km.test.ts new file mode 100644 index 0000000..ed9249f --- /dev/null +++ b/packages/strava-activity-signals/get-pace-seconds-per-km/get-pace-seconds-per-km.test.ts @@ -0,0 +1,182 @@ +import { describe, test, expect } from 'bun:test'; + +import getPaceSecondsPerKm from './get-pace-seconds-per-km'; + +type Case = [ + string, + { + movingTime: number; + distance: number; + expectedPace: number; + }, +]; + +describe('get-pace-seconds-per-km', () => { + test.each([ + [ + 'standard 5k run at 5 min/km pace', + { + movingTime: 1500, + distance: 5000, + expectedPace: 300, + }, + ], + [ + 'standard 10k run at 6 min/km pace', + { + movingTime: 3600, + distance: 10000, + expectedPace: 360, + }, + ], + [ + 'marathon at 4:30 min/km pace', + { + movingTime: 11385, + distance: 42195, + expectedPace: 269.8186988979737, + }, + ], + [ + 'short 1k sprint at 3 min/km pace', + { + movingTime: 180, + distance: 1000, + expectedPace: 180, + }, + ], + [ + 'ultra distance 50k run at 7 min/km pace', + { + movingTime: 21000, + distance: 50000, + expectedPace: 420, + }, + ], + [ + 'very slow pace at 10 min/km', + { + movingTime: 3000, + distance: 5000, + expectedPace: 600, + }, + ], + [ + 'fast pace at 3:30 min/km', + { + movingTime: 1050, + distance: 5000, + expectedPace: 210, + }, + ], + [ + 'short distance with fractional kilometers', + { + movingTime: 150, + distance: 500, + expectedPace: 300, + }, + ], + [ + 'very short distance 100m', + { + movingTime: 20, + distance: 100, + expectedPace: 200, + }, + ], + [ + 'long distance bike ride', + { + movingTime: 7200, + distance: 100000, + expectedPace: 72, + }, + ], + [ + 'zero moving time returns zero pace', + { + movingTime: 0, + distance: 5000, + expectedPace: 0, + }, + ], + [ + 'negative moving time returns zero pace', + { + movingTime: -100, + distance: 5000, + expectedPace: 0, + }, + ], + [ + 'zero distance returns zero pace', + { + movingTime: 1500, + distance: 0, + expectedPace: 0, + }, + ], + [ + 'negative distance returns zero pace', + { + movingTime: 1500, + distance: -5000, + expectedPace: 0, + }, + ], + [ + 'both zero values return zero pace', + { + movingTime: 0, + distance: 0, + expectedPace: 0, + }, + ], + [ + 'both negative values return zero pace', + { + movingTime: -100, + distance: -5000, + expectedPace: 0, + }, + ], + [ + 'very small distance calculates correctly', + { + movingTime: 10, + distance: 50, + expectedPace: 200, + }, + ], + [ + 'very large distance calculates correctly', + { + movingTime: 36000, + distance: 200000, + expectedPace: 180, + }, + ], + [ + 'decimal result is preserved', + { + movingTime: 1000, + distance: 3333, + // eslint-disable-next-line no-loss-of-precision + expectedPace: 300.03000300030005, + }, + ], + [ + 'pace calculation with typical real-world values', + { + movingTime: 2456, + distance: 8234, + expectedPace: 298.27544328394464, + }, + ], + ])('%#. %s', (_name, { movingTime, distance, expectedPace }) => { + const result = getPaceSecondsPerKm(movingTime, distance); + + expect(result).toBe(expectedPace); + }); +}); diff --git a/packages/strava-activity-signals/get-pace-seconds-per-km/get-pace-seconds-per-km.ts b/packages/strava-activity-signals/get-pace-seconds-per-km/get-pace-seconds-per-km.ts new file mode 100644 index 0000000..d6599f0 --- /dev/null +++ b/packages/strava-activity-signals/get-pace-seconds-per-km/get-pace-seconds-per-km.ts @@ -0,0 +1,15 @@ +/** + * Calculates pace in seconds per kilometer from moving time and distance. + * @param {number} movingTime - Moving time in seconds. + * @param {number} distance - Distance in meters. + * @returns {number} Pace in seconds per kilometer. + */ +const getPaceSecondsPerKm = (movingTime: number, distance: number): number => { + if (movingTime <= 0 || distance <= 0) { + return 0; + } else { + return movingTime / (distance / 1000); + } +}; + +export default getPaceSecondsPerKm; diff --git a/packages/strava-activity-signals/get-pace-seconds-per-km/index.ts b/packages/strava-activity-signals/get-pace-seconds-per-km/index.ts new file mode 100644 index 0000000..92e23a1 --- /dev/null +++ b/packages/strava-activity-signals/get-pace-seconds-per-km/index.ts @@ -0,0 +1 @@ +export { default } from './get-pace-seconds-per-km'; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/classify-elevation.test.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/classify-elevation.test.ts new file mode 100644 index 0000000..c41b204 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/classify-elevation.test.ts @@ -0,0 +1,30 @@ +import { describe, test, expect } from 'bun:test'; + +import { Input } from './types'; +import classifyElevation from './classify-elevation'; +import { StravaActivitySignalsElevation } from '../../types'; + +type Case = [string, Input, StravaActivitySignalsElevation]; + +describe('classify-elevation', () => { + test.each([ + ['flat terrain with low elevation gain', { total_elevation_gain: 30 }, 'flat'], + ['rolling terrain with moderate elevation gain', { total_elevation_gain: 300 }, 'rolling'], + [ + 'mountainous terrain with moderate elevation gain', + { total_elevation_gain: 600 }, + 'mountainous', + ], + ['mountainous terrain with high elevation gain', { total_elevation_gain: 800 }, 'mountainous'], + [ + 'default to flat when elevation gain is undefined', + { total_elevation_gain: undefined }, + 'flat', + ], + ['default to flat when elevation gain is not defined', {}, 'flat'], + ])('%#. %s', (_name, elevationGain, expected) => { + const result = classifyElevation(elevationGain); + + expect(result).toBe(expected); + }); +}); diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/classify-elevation.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/classify-elevation.ts new file mode 100644 index 0000000..f8c83f1 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/classify-elevation.ts @@ -0,0 +1,31 @@ +import { CLASSIFICATIONS } from '../../constants'; +import { StravaActivitySignalsElevation } from '../../types'; +import { Input } from './types'; + +/** + * Classifies elevation terrain based on elevation gain. + * + * Categorizes terrain as flat, rolling, or mountainous based on + * total elevation gain from the activity. + * + * Classification thresholds: + * - Flat: < 50m elevation gain + * - Rolling: 50m - 500m elevation gain + * - Mountainous: > 500m elevation gain + * + * @param {Input} input - Strava activity data containing elevation gain. + * @returns {StravaActivitySignalsElevation} Elevation classification. + */ +const classifyElevation = ({ total_elevation_gain }: Input): StravaActivitySignalsElevation => { + if (total_elevation_gain === undefined) { + return 'flat'; + } else if (total_elevation_gain < CLASSIFICATIONS.ELEVATION.FLAT_THRESHOLD) { + return 'flat'; + } else if (total_elevation_gain >= CLASSIFICATIONS.ELEVATION.ROLLING_THRESHOLD) { + return 'mountainous'; + } else { + return 'rolling'; + } +}; + +export default classifyElevation; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/index.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/index.ts new file mode 100644 index 0000000..04ffc87 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/index.ts @@ -0,0 +1 @@ +export { default } from './classify-elevation'; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/types.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/types.ts new file mode 100644 index 0000000..51762a3 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-elevation/types.ts @@ -0,0 +1,3 @@ +export interface Input { + total_elevation_gain?: number; +} diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/classify-intensity.test.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/classify-intensity.test.ts new file mode 100644 index 0000000..a938870 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/classify-intensity.test.ts @@ -0,0 +1,55 @@ +import { describe, test, expect } from 'bun:test'; + +import { Input } from './types'; +import classifyIntensity from './classify-intensity'; +import { StravaActivitySignalsIntensity } from '../../types'; + +type Case = [string, Input, StravaActivitySignalsIntensity]; + +describe('classify-intensity', () => { + test.each([ + [ + 'low intensity based on slow pace', + { + distance: 5000, // 5km + moving_time: 1800, // 30 minutes = 6:00 min/km + }, + 'low', + ], + [ + 'high intensity based on fast pace', + { + distance: 5000, // 5km + moving_time: 1200, // 20 minutes = 4:00 min/km + }, + 'high', + ], + [ + 'medium intensity based on moderate pace', + { + distance: 5000, // 5km + moving_time: 1500, // 25 minutes = 5:00 min/km + }, + 'medium', + ], + [ + 'high intensity based on high power', + { + average_watts: 300, + }, + 'high', + ], + [ + 'low intensity based on low power', + { + average_watts: 100, + }, + 'low', + ], + ['default to medium when no clear indicators', {}, 'medium'], + ])('%#. %s', (_name, activity, expected) => { + const result = classifyIntensity(activity); + + expect(result).toBe(expected); + }); +}); diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/classify-intensity.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/classify-intensity.ts new file mode 100644 index 0000000..e333e08 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/classify-intensity.ts @@ -0,0 +1,49 @@ +import { StravaActivitySignalsIntensity } from '../../types'; +import { CLASSIFICATIONS } from '../../constants'; +import { Input } from './types'; +import getPaceSecondsPerKm from '../../get-pace-seconds-per-km'; + +/** + * Classifies activity intensity based on metrics. + * + * Analyzes pace, heart rate, and power data to determine if activity + * intensity is low, medium, or high. Uses deterministic thresholds + * from configuration. + * + * Classification logic: + * - Low: Slow pace (>6:00 min/km) OR low heart rate (<120 bpm) + * - High: Fast pace (<4:00 min/km) OR high heart rate (>160 bpm) OR high power + * - Medium: Everything else + * + * @param {Input} input - Strava activity data to classify. + * @returns {StravaActivitySignalsIntensity} Intensity classification. + */ +const classifyIntensity = ({ + average_watts, + weighted_average_watts, + distance, + moving_time, +}: Input): StravaActivitySignalsIntensity => { + const hasPower = average_watts !== undefined; + const hasWeightedPower = weighted_average_watts !== undefined; + const hasPaceData = distance !== undefined && moving_time !== undefined && distance > 0; + const paceSecondsPerKm = hasPaceData ? getPaceSecondsPerKm(moving_time, distance) : 0; + + if (hasPaceData && paceSecondsPerKm >= CLASSIFICATIONS.INTENSITY.LOW_PACE_THRESHOLD) { + return 'low'; + } else if (hasPaceData && paceSecondsPerKm <= CLASSIFICATIONS.INTENSITY.HIGH_PACE_THRESHOLD) { + return 'high'; + } else if (hasPower && average_watts! > 250) { + return 'high'; + } else if (hasPower && average_watts! < 150) { + return 'low'; + } else if (hasWeightedPower && weighted_average_watts! > 250) { + return 'high'; + } else if (hasWeightedPower && weighted_average_watts! < 150) { + return 'low'; + } else { + return 'medium'; + } +}; + +export default classifyIntensity; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/index.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/index.ts new file mode 100644 index 0000000..850e185 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/index.ts @@ -0,0 +1 @@ +export { default } from './classify-intensity'; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/types.ts b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/types.ts new file mode 100644 index 0000000..b52c323 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/classify-intensity/types.ts @@ -0,0 +1,6 @@ +export interface Input { + average_watts?: number; + weighted_average_watts?: number; + distance?: number; + moving_time?: number; +} diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/extract-semantic-context.test.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/extract-semantic-context.test.ts new file mode 100644 index 0000000..460d23e --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/extract-semantic-context.test.ts @@ -0,0 +1,563 @@ +import { describe, test, expect } from 'bun:test'; + +import extractSemanticContext from './extract-semantic-context'; +import { Input } from './types'; + +type Case = [string, Input, string[] | undefined]; + +describe('extract-semantic-context', () => { + describe('extracts context from name field', () => { + test.each([ + [ + 'extracts trail keyword from name', + { + name: 'Morning trail run', + }, + ['trail'], + ], + [ + 'extracts road keyword from name', + { + name: 'Easy road run', + }, + ['road'], + ], + [ + 'extracts track keyword from name', + { + name: 'Track workout', + }, + ['track'], + ], + [ + 'extracts indoor keyword from name', + { + name: 'Indoor cycling', + }, + ['indoor'], + ], + [ + 'extracts outdoor keyword from name', + { + name: 'Outdoor adventure', + }, + ['outdoor'], + ], + [ + 'extracts park keyword from name', + { + name: 'Running in the park', + }, + ['park'], + ], + [ + 'extracts beach keyword from name', + { + name: 'Beach run', + }, + ['beach'], + ], + [ + 'extracts mountain keyword from name', + { + name: 'Mountain bike ride', + }, + ['mountain'], + ], + [ + 'extracts hill keyword from name', + { + name: 'Hill repeats', + }, + ['hill'], + ], + [ + 'extracts multiple keywords from name', + { + name: 'Trail run through the park', + }, + ['trail', 'park'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('extracts context from description field', () => { + test.each([ + [ + 'extracts trail keyword from description', + { + description: 'Great trail run today', + }, + ['trail'], + ], + [ + 'extracts road keyword from description', + { + description: 'Long road cycling session', + }, + ['road'], + ], + [ + 'extracts track keyword from description', + { + description: 'Speed work on the track', + }, + ['track'], + ], + [ + 'extracts indoor keyword from description', + { + description: 'Indoor training session', + }, + ['indoor'], + ], + [ + 'extracts outdoor keyword from description', + { + description: 'Beautiful outdoor weather', + }, + ['outdoor'], + ], + [ + 'extracts park keyword from description', + { + description: 'Running through Central Park', + }, + ['park'], + ], + [ + 'extracts beach keyword from description', + { + description: 'Morning beach jog', + }, + ['beach'], + ], + [ + 'extracts mountain keyword from description', + { + description: 'Climbing mountain trails', + }, + ['trail', 'mountain'], + ], + [ + 'extracts hill keyword from description', + { + description: 'Tough hill workout', + }, + ['hill'], + ], + [ + 'extracts multiple keywords from description', + { + description: 'Outdoor run through mountain trails', + }, + ['trail', 'outdoor', 'mountain'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('extracts context from both name and description', () => { + test.each([ + [ + 'extracts keywords from both fields', + { + name: 'Trail run', + description: 'Beautiful park scenery', + }, + ['trail', 'park'], + ], + [ + 'combines keywords from both fields', + { + name: 'Morning road run', + description: 'Running through the park', + }, + ['road', 'park'], + ], + [ + 'handles duplicate keywords from both fields', + { + name: 'Trail run', + description: 'Amazing trail conditions', + }, + ['trail', 'trail'], + ], + [ + 'extracts all keywords from both fields', + { + name: 'Outdoor mountain trail', + description: 'Running through park and beach areas', + }, + ['trail', 'outdoor', 'mountain', 'park', 'beach'], + ], + [ + 'extracts keywords when only name has keywords', + { + name: 'Trail run', + description: 'Great workout', + }, + ['trail'], + ], + [ + 'extracts keywords when only description has keywords', + { + name: 'Morning run', + description: 'Through the park', + }, + ['park'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('returns undefined when no keywords found', () => { + test.each([ + [ + 'returns undefined for empty name', + { + name: '', + }, + undefined, + ], + [ + 'returns undefined for empty description', + { + description: '', + }, + undefined, + ], + [ + 'returns undefined for both empty', + { + name: '', + description: '', + }, + undefined, + ], + [ + 'returns undefined when name has no keywords', + { + name: 'Morning run', + }, + undefined, + ], + [ + 'returns undefined when description has no keywords', + { + description: 'Great workout today', + }, + undefined, + ], + [ + 'returns undefined when both have no keywords', + { + name: 'Morning run', + description: 'Great workout', + }, + undefined, + ], + [ + 'returns undefined for whitespace only name', + { + name: ' ', + }, + undefined, + ], + [ + 'returns undefined for whitespace only description', + { + description: ' ', + }, + undefined, + ], + [ + 'returns undefined when both are whitespace only', + { + name: ' ', + description: ' ', + }, + undefined, + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles missing fields correctly', () => { + test.each([ + ['returns undefined when both fields missing', {}, undefined], + [ + 'handles missing name field', + { + description: 'Trail run today', + }, + ['trail'], + ], + [ + 'handles missing description field', + { + name: 'Trail run', + }, + ['trail'], + ], + [ + 'returns undefined when missing name and description has no keywords', + { + description: 'Morning workout', + }, + undefined, + ], + [ + 'returns undefined when missing description and name has no keywords', + { + name: 'Morning run', + }, + undefined, + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles case insensitivity', () => { + test.each([ + [ + 'extracts lowercase keywords', + { + name: 'trail run', + }, + ['trail'], + ], + [ + 'extracts uppercase keywords', + { + name: 'TRAIL RUN', + }, + ['trail'], + ], + [ + 'extracts mixed case keywords', + { + name: 'TrAiL RuN', + }, + ['trail'], + ], + [ + 'extracts keywords from mixed case description', + { + description: 'BeAuTiFuL PaRk ScEnErY', + }, + ['park'], + ], + [ + 'extracts keywords from both fields with mixed case', + { + name: 'TRAIL run', + description: 'beautiful PARK scenery', + }, + ['trail', 'park'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles forbidden content correctly', () => { + test.each([ + [ + 'returns undefined when name contains forbidden content', + { + name: 'Run with people', + }, + undefined, + ], + [ + 'returns undefined when description contains forbidden content', + { + description: 'Government building trail', + }, + undefined, + ], + [ + 'returns undefined when both contain forbidden content', + { + name: 'Running with people', + description: 'Near government building', + }, + undefined, + ], + [ + 'returns keywords from name when description has forbidden content', + { + name: 'Trail run', + description: 'With people today', + }, + ['trail'], + ], + [ + 'returns keywords from description when name has forbidden content', + { + name: 'Run with people', + description: 'Trail conditions', + }, + ['trail'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles special characters and whitespace', () => { + test.each([ + [ + 'extracts keywords with punctuation', + { + name: 'Trail, road, and park running!', + }, + ['trail', 'road', 'park'], + ], + [ + 'extracts keywords with multiple spaces', + { + name: 'Trail run workout', + }, + ['trail'], + ], + [ + 'extracts keywords with tabs', + { + name: 'Trail\trun\tworkout', + }, + ['trail'], + ], + [ + 'extracts keywords with newlines', + { + description: 'Trail\nrun\nmountain', + }, + ['trail', 'mountain'], + ], + [ + 'extracts keywords with hyphens', + { + name: 'Trail-running adventure', + }, + ['trail'], + ], + [ + 'extracts keywords with numbers', + { + name: '10k trail run', + }, + ['trail'], + ], + [ + 'handles leading and trailing spaces', + { + name: ' Trail run ', + }, + ['trail'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles edge cases', () => { + test.each([ + [ + 'handles very long name with keywords', + { + name: 'This is a very long activity name that describes my amazing trail run through the park and up the mountain', + }, + ['trail', 'park', 'mountain'], + ], + [ + 'handles very long description with keywords', + { + description: + 'This is a very long description about my outdoor trail run through the park with beautiful beach views and mountain scenery', + }, + ['trail', 'outdoor', 'park', 'beach', 'mountain'], + ], + [ + 'handles single character text', + { + name: 'a', + }, + undefined, + ], + [ + 'handles keywords at boundaries', + { + name: 'trail', + description: 'park', + }, + ['trail', 'park'], + ], + [ + 'handles all keywords in one text', + { + name: 'trail road track indoor outdoor park beach mountain hill', + }, + ['trail', 'road', 'track', 'indoor', 'outdoor', 'park', 'beach', 'mountain', 'hill'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('maintains keyword order from KEYWORDS array', () => { + test.each([ + [ + 'returns keywords in KEYWORDS array order', + { + name: 'park trail road', + }, + ['trail', 'road', 'park'], + ], + [ + 'returns keywords in order regardless of text order', + { + name: 'mountain beach outdoor', + }, + ['outdoor', 'beach', 'mountain'], + ], + [ + 'returns combined keywords in order', + { + name: 'hill mountain', + description: 'trail park', + }, + ['mountain', 'hill', 'trail', 'park'], + ], + ])('%#. %s', (_name, input, expected) => { + const result = extractSemanticContext(input); + + expect(result).toStrictEqual(expected); + }); + }); +}); diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/extract-semantic-context.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/extract-semantic-context.ts new file mode 100644 index 0000000..f98fdc5 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/extract-semantic-context.ts @@ -0,0 +1,31 @@ +import extractTextSignals from '../extract-text-signals'; +import { Input } from './types'; + +/** + * Extracts semantic context from Strava activity user text. + * @param {Input} input - Strava activity data. + * @returns {string[] | undefined} Extracted semantic context or undefined if none found. + */ +const extractSemanticContext = ({ name, description }: Input): string[] | undefined => { + const semanticContext: string[] = []; + + if (name) { + const nameSignals = extractTextSignals(name); + + if (nameSignals) { + semanticContext.push(...nameSignals); + } + } + + if (description) { + const descriptionSignals = extractTextSignals(description); + + if (descriptionSignals) { + semanticContext.push(...descriptionSignals); + } + } + + return semanticContext.length > 0 ? semanticContext : undefined; +}; + +export default extractSemanticContext; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/index.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/index.ts new file mode 100644 index 0000000..a33ef77 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/index.ts @@ -0,0 +1 @@ +export { default } from './extract-semantic-context'; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/types.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/types.ts new file mode 100644 index 0000000..69b6a2b --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-semantic-context/types.ts @@ -0,0 +1,4 @@ +export interface Input { + name?: string; + description?: string; +} diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/constants.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/constants.ts new file mode 100644 index 0000000..93a9b36 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/constants.ts @@ -0,0 +1,12 @@ +/** + * Normalized Strava tag names. + */ +export const KNOWN_TAGS = [ + 'recovery', + 'race', + 'commute', + 'with kid', + 'long run', + 'easy', + 'workout', +] as const; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/extract-tag-signals.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/extract-tag-signals.ts new file mode 100644 index 0000000..cffc7d2 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/extract-tag-signals.ts @@ -0,0 +1,48 @@ +import { KNOWN_TAGS } from './constants'; +import { Input } from './types'; + +/** + * Extracts and normalizes tag signals from activity data. + * + * Processes Strava tags and normalizes them to known tag values. + * Tags influence mood, intensity, and scene composition. + * + * Tags are normalized to lowercase and matched against known tag list. + * Unknown tags are filtered out to ensure only safe, recognized tags are used. + * + * Strava API doesn't directly expose tags in the base activity response. + * Tags might be available in extended metadata or user-provided descriptions. + * For now, the function checks common tag indicators in the activity data. + * + * @param {Input} input - Strava activity data to extract tags from. + * @returns {string[]} Array of normalized tag strings or undefined if no valid tags found. + */ +const extractTagSignals = ({ commute }: Input): string[] | undefined => { + const tags: string[] = []; + + if (commute) { + tags.push('commute'); + } + + // Check workout type (Strava uses numeric codes) + // Workout type 10 = Race, but this is activity-specific + // We'll rely on other indicators for now + + // Future enhancement: parse tags from description or extended metadata + // if (activity.description) { + // const desc = activity.description.toLowerCase(); + // KNOWN_TAGS.forEach((tag) => { + // if (desc.includes(tag)) { + // tags.push(tag); + // } + // }); + // } + + const normalizedTags = tags + .map((tag) => tag.toLowerCase().trim()) + .filter((tag) => KNOWN_TAGS.includes(tag as (typeof KNOWN_TAGS)[number])); + + return normalizedTags.length > 0 ? normalizedTags : undefined; +}; + +export default extractTagSignals; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/extract-tag.signals.test.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/extract-tag.signals.test.ts new file mode 100644 index 0000000..5ad8134 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/extract-tag.signals.test.ts @@ -0,0 +1,30 @@ +import { describe, test, expect } from 'bun:test'; + +import extractTagSignals from './extract-tag-signals'; +import { Input } from './types'; + +type Case = [string, Input, string[] | undefined]; + +describe('extract-tag-signals', () => { + describe('extracts commute tag correctly', () => { + test.each([ + ['extracts commute tag when commute is true', { commute: true }, ['commute']], + ['extracts commute tag when commute is false', { commute: false }, undefined], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTagSignals(activity); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('returns undefined when no tags present', () => { + test.each([ + ['returns undefined when commute is undefined', { commute: undefined }, undefined], + ['returns undefined for basic activity without tags', {}, undefined], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTagSignals(activity); + + expect(result).toStrictEqual(expected); + }); + }); +}); diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/index.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/index.ts new file mode 100644 index 0000000..f8fd31b --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/index.ts @@ -0,0 +1 @@ +export { default } from './extract-tag-signals'; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/types.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/types.ts new file mode 100644 index 0000000..431eb4a --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-tag-signals/types.ts @@ -0,0 +1,3 @@ +export interface Input { + commute?: boolean; +} diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/constants.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/constants.ts new file mode 100644 index 0000000..ece28c1 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/constants.ts @@ -0,0 +1,11 @@ +export const KEYWORDS = [ + 'trail', + 'road', + 'track', + 'indoor', + 'outdoor', + 'park', + 'beach', + 'mountain', + 'hill', +]; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/extract-text-signals.test.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/extract-text-signals.test.ts new file mode 100644 index 0000000..e0b72ec --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/extract-text-signals.test.ts @@ -0,0 +1,172 @@ +import { describe, test, expect } from 'bun:test'; + +import extractTextSignals from './extract-text-signals'; + +type Case = [string, string, string[] | undefined]; + +describe('extract-text-signals', () => { + describe('extracts keywords from valid text', () => { + test.each([ + ['finds trail keyword in simple text', 'Morning trail run', ['trail']], + ['finds road keyword in simple text', 'Easy road run', ['road']], + ['finds track keyword in simple text', 'Track workout session', ['track']], + ['finds indoor keyword in simple text', 'Indoor cycling workout', ['indoor']], + ['finds outdoor keyword in simple text', 'Outdoor adventure', ['outdoor']], + ['finds park keyword in simple text', 'Running in the park', ['park']], + ['finds beach keyword in simple text', 'Beach run at sunrise', ['beach']], + ['finds mountain keyword in simple text', 'Mountain bike ride', ['mountain']], + ['finds hill keyword in simple text', 'Hill repeats workout', ['hill']], + ['finds multiple keywords in text', 'Trail run in the mountain', ['trail', 'mountain']], + ['finds multiple keywords with mixed case', 'OUTDOOR PARK RUN', ['outdoor', 'park']], + ['extracts keywords from text with extra spaces', ' trail run ', ['trail']], + ['finds keywords in lowercase text', 'trail run', ['trail']], + ['finds keywords in uppercase text', 'TRAIL RUN', ['trail']], + ['finds keywords in mixed case text', 'Trail Run', ['trail']], + [ + 'finds all keywords when present', + 'trail road track indoor outdoor park beach mountain hill', + ['trail', 'road', 'track', 'indoor', 'outdoor', 'park', 'beach', 'mountain', 'hill'], + ], + [ + 'finds keywords in longer descriptive text', + 'Amazing trail run through the park with beautiful mountain views', + ['trail', 'park', 'mountain'], + ], + ['finds road and park keywords together', 'Road cycling through city park', ['road', 'park']], + [ + 'finds beach and outdoor keywords together', + 'Outdoor beach volleyball', + ['outdoor', 'beach'], + ], + [ + 'finds indoor and track keywords together', + 'Indoor track running session', + ['track', 'indoor'], + ], + [ + 'finds keywords with punctuation', + 'Trail, road, and park running!', + ['trail', 'road', 'park'], + ], + [ + 'finds keywords in text with numbers', + '10k trail run on the mountain', + ['trail', 'mountain'], + ], + [ + 'finds keywords in text with special characters', + 'Trail-run @ park #outdoor', + ['trail', 'outdoor', 'park'], + ], + [ + 'finds keywords when part of longer words', + 'mountainous trail landscape', + ['trail', 'mountain'], + ], + ['finds hill keyword in hillside', 'Running hillside trails', ['trail', 'hill']], + ])('%#. %s', (_name, text, expected) => { + const result = extractTextSignals(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('returns undefined when no keywords found', () => { + test.each([ + ['returns undefined for empty string', '', undefined], + ['returns undefined for whitespace only', ' ', undefined], + ['returns empty array for text with no keywords', 'Morning run workout', []], + [ + 'finds keywords that are part of similar words', + 'roadway pathway tracking', + ['road', 'track'], + ], + ['returns empty array for numeric text', '12345', []], + ['returns empty array for special characters only', '!@#$%^&*()', []], + [ + 'returns empty array for text with no matching keywords', + 'Evening jog around the neighborhood', + [], + ], + ['returns empty array for gibberish text', 'xyzabc defghi', []], + [ + 'returns undefined when keywords are partial', + 'trails roads tracks', + ['trail', 'road', 'track'], + ], + ])('%#. %s', (_name, text, expected) => { + const result = extractTextSignals(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles forbidden content gracefully', () => { + test.each([ + [ + 'returns undefined when text contains violence keywords', + 'trail run with weapon training', + undefined, + ], + [ + 'returns undefined when text contains political keywords', + 'outdoor run to the government building', + undefined, + ], + [ + 'returns undefined when text contains explicit content keywords', + 'mountain climb with explicit adult content', + undefined, + ], + [ + 'returns undefined when text contains person identifiers', + 'trail run with portrait photo', + undefined, + ], + [ + 'returns undefined when text contains typography instructions', + 'beach run display text on screen', + undefined, + ], + ])('%#. %s', (_name, text, expected) => { + const result = extractTextSignals(text); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles edge cases gracefully', () => { + test.each([ + ['handles text with only spaces before keyword', ' trail', ['trail']], + ['handles text with only spaces after keyword', 'trail ', ['trail']], + ['handles text with tabs', '\t\ttrail run', ['trail']], + ['handles text with newlines', 'trail\nrun\nmountain', ['trail', 'mountain']], + [ + 'handles text with mixed whitespace', + ' trail \t road \n park ', + ['trail', 'road', 'park'], + ], + [ + 'handles very long text with keywords', + 'This is a very long description about my amazing trail run that took me through the park and up the mountain and down to the beach where I enjoyed the outdoor scenery and fresh air on this beautiful road that winds through the hill country and passes by an indoor track facility', + ['trail', 'road', 'track', 'indoor', 'outdoor', 'park', 'beach', 'mountain', 'hill'], + ], + ['handles text with duplicate keywords', 'trail trail trail', ['trail']], + [ + 'handles text with keywords in different order', + 'park road trail', + ['trail', 'road', 'park'], + ], + ['handles single character keywords not matching', 'a b c d', []], + [ + 'handles repeated spaces between keywords', + 'trail road park', + ['trail', 'road', 'park'], + ], + ])('%#. %s', (_name, text, expected) => { + const result = extractTextSignals(text); + + expect(result).toStrictEqual(expected); + }); + }); +}); diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/extract-text-signals.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/extract-text-signals.ts new file mode 100644 index 0000000..928e68f --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/extract-text-signals.ts @@ -0,0 +1,37 @@ +import sanitizeText from '../../sanitize-text'; +import { KEYWORDS } from './constants'; + +/** + * Extracts safe semantic signals from user-provided text fields in Strava activities. + * + * Processes text from activity name, description, gear fields etc. + * to extract safe semantic context. Never copies text verbatim. + * + * User-provided text is sanitized and processed to extract semantic + * signals that can safely influence prompt generation. Forbidden + * content is removed, and only safe, normalized signals are returned. + * + * In a future implementation, we might extract activity-related keywords + * like "trail", "road", "track", "indoor", "outdoor", etc. + * + * @param {string} text - User-provided text to extract signals from. + * @returns {string[]} Array of safe semantic signal strings or undefined if none found. + */ +const extractTextSignals = (text: string): string[] | undefined => { + const textSanitized = sanitizeText(text.trim().toLowerCase()); + const hasTextSanitized = textSanitized.length > 0; + + if (hasTextSanitized) { + return KEYWORDS.map((keyword) => { + if (textSanitized.includes(keyword)) { + return keyword; + } else { + return ''; + } + }).filter(Boolean); + } else { + return undefined; + } +}; + +export default extractTextSignals; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/index.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/index.ts new file mode 100644 index 0000000..794f380 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-text-signals/index.ts @@ -0,0 +1 @@ +export { default } from './extract-text-signals'; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/extract-time-of-day-signals.test.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/extract-time-of-day-signals.test.ts new file mode 100644 index 0000000..e72b520 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/extract-time-of-day-signals.test.ts @@ -0,0 +1,412 @@ +import { describe, test, expect } from 'bun:test'; + +import { Input } from './types'; +import extractTimeOfDaySignals from './extract-time-of-day-signals'; +import { StravaActivitySignalsTimeOfDay } from '../../types'; + +type Case = [string, Input, StravaActivitySignalsTimeOfDay]; + +describe('extract-time-of-day-signals', () => { + describe('classifies morning activities correctly (5:00-10:00)', () => { + test.each([ + [ + 'morning at exact start boundary (5:00)', + { + start_date_local: '2024-01-01T05:00:00Z', + }, + 'morning', + ], + [ + 'early morning (6:00)', + { + start_date_local: '2024-01-01T06:00:00Z', + }, + 'morning', + ], + [ + 'mid morning (7:00)', + { + start_date_local: '2024-01-01T07:00:00Z', + }, + 'morning', + ], + [ + 'mid morning (8:00)', + { + start_date_local: '2024-01-01T08:00:00Z', + }, + 'morning', + ], + [ + 'late morning (9:00)', + { + start_date_local: '2024-01-01T09:00:00Z', + }, + 'morning', + ], + [ + 'morning at 5:30', + { + start_date_local: '2024-01-01T05:30:00Z', + }, + 'morning', + ], + [ + 'morning at 9:59', + { + start_date_local: '2024-01-01T09:59:59Z', + }, + 'morning', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); + + describe('classifies day activities correctly (10:00-17:00)', () => { + test.each([ + [ + 'day at exact start boundary (10:00)', + { + start_date_local: '2024-01-01T10:00:00Z', + }, + 'day', + ], + [ + 'late morning/early day (11:00)', + { + start_date_local: '2024-01-01T11:00:00Z', + }, + 'day', + ], + [ + 'noon (12:00)', + { + start_date_local: '2024-01-01T12:00:00Z', + }, + 'day', + ], + [ + 'afternoon (13:00)', + { + start_date_local: '2024-01-01T13:00:00Z', + }, + 'day', + ], + [ + 'afternoon (14:00)', + { + start_date_local: '2024-01-01T14:00:00Z', + }, + 'day', + ], + [ + 'afternoon (15:00)', + { + start_date_local: '2024-01-01T15:00:00Z', + }, + 'day', + ], + [ + 'late afternoon (16:00)', + { + start_date_local: '2024-01-01T16:00:00Z', + }, + 'day', + ], + [ + 'day at 16:59', + { + start_date_local: '2024-01-01T16:59:59Z', + }, + 'day', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); + + describe('classifies evening activities correctly (17:00-20:00)', () => { + test.each([ + [ + 'evening at exact start boundary (17:00)', + { + start_date_local: '2024-01-01T17:00:00Z', + }, + 'evening', + ], + [ + 'early evening (18:00)', + { + start_date_local: '2024-01-01T18:00:00Z', + }, + 'evening', + ], + [ + 'late evening (19:00)', + { + start_date_local: '2024-01-01T19:00:00Z', + }, + 'evening', + ], + [ + 'evening at 17:30', + { + start_date_local: '2024-01-01T17:30:00Z', + }, + 'evening', + ], + [ + 'evening at 19:59', + { + start_date_local: '2024-01-01T19:59:59Z', + }, + 'evening', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); + + describe('classifies night activities correctly (20:00-5:00)', () => { + test.each([ + [ + 'night at exact start boundary (20:00)', + { + start_date_local: '2024-01-01T20:00:00Z', + }, + 'night', + ], + [ + 'late evening/early night (21:00)', + { + start_date_local: '2024-01-01T21:00:00Z', + }, + 'night', + ], + [ + 'late night (22:00)', + { + start_date_local: '2024-01-01T22:00:00Z', + }, + 'night', + ], + [ + 'late night (23:00)', + { + start_date_local: '2024-01-01T23:00:00Z', + }, + 'night', + ], + [ + 'midnight (0:00)', + { + start_date_local: '2024-01-01T00:00:00Z', + }, + 'night', + ], + [ + 'early night (1:00)', + { + start_date_local: '2024-01-01T01:00:00Z', + }, + 'night', + ], + [ + 'early night (2:00)', + { + start_date_local: '2024-01-01T02:00:00Z', + }, + 'night', + ], + [ + 'early night (3:00)', + { + start_date_local: '2024-01-01T03:00:00Z', + }, + 'night', + ], + [ + 'early night (4:00)', + { + start_date_local: '2024-01-01T04:00:00Z', + }, + 'night', + ], + [ + 'night at 4:59', + { + start_date_local: '2024-01-01T04:59:59Z', + }, + 'night', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); + + describe('handles fallback to start_date when start_date_local missing', () => { + test.each([ + [ + 'uses start_date for morning', + { + start_date: '2024-01-01T07:00:00Z', + }, + 'morning', + ], + [ + 'uses start_date for day', + { + start_date: '2024-01-01T14:00:00Z', + }, + 'day', + ], + [ + 'uses start_date for evening', + { + start_date: '2024-01-01T18:00:00Z', + }, + 'evening', + ], + [ + 'uses start_date for night', + { + start_date: '2024-01-01T22:00:00Z', + }, + 'night', + ], + [ + 'prefers start_date_local over start_date', + { + start_date_local: '2024-01-01T07:00:00Z', + start_date: '2024-01-01T22:00:00Z', + }, + 'morning', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); + + describe('defaults to day when no timestamp available', () => { + test.each([ + ['defaults to day when both timestamps missing', {}, 'day'], + [ + 'defaults to day when start_date_local is undefined', + { + start_date_local: undefined, + }, + 'day', + ], + [ + 'defaults to day when start_date is undefined', + { + start_date: undefined, + }, + 'day', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); + + describe('handles different date formats', () => { + test.each([ + [ + 'handles ISO 8601 format', + { + start_date_local: '2024-01-01T07:00:00Z', + }, + 'morning', + ], + [ + 'handles ISO format with milliseconds', + { + start_date_local: '2024-01-01T07:00:00.000Z', + }, + 'morning', + ], + [ + 'handles ISO format with timezone offset', + { + start_date_local: '2024-01-01T07:00:00+00:00', + }, + 'morning', + ], + [ + 'handles different year', + { + start_date_local: '2023-12-31T18:00:00Z', + }, + 'evening', + ], + [ + 'handles leap year date', + { + start_date_local: '2024-02-29T15:00:00Z', + }, + 'day', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); + + describe('handles various activity types', () => { + test.each([ + [ + 'Run activity at morning', + { + start_date_local: '2024-01-01T07:00:00Z', + }, + 'morning', + ], + [ + 'Ride activity at day', + { + start_date_local: '2024-01-01T14:00:00Z', + }, + 'day', + ], + [ + 'Swim activity at evening', + { + start_date_local: '2024-01-01T18:00:00Z', + }, + 'evening', + ], + [ + 'VirtualRide at night', + { + start_date_local: '2024-01-01T22:00:00Z', + }, + 'night', + ], + [ + 'Walk activity at morning', + { + start_date_local: '2024-01-01T08:00:00Z', + }, + 'morning', + ], + ])('%#. %s', (_name, activity, expected) => { + const result = extractTimeOfDaySignals(activity); + + expect(result).toBe(expected); + }); + }); +}); diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/extract-time-of-day-signals.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/extract-time-of-day-signals.ts new file mode 100644 index 0000000..09efb49 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/extract-time-of-day-signals.ts @@ -0,0 +1,53 @@ +import { StravaActivitySignalsTimeOfDay } from '../../types'; +import { CLASSIFICATIONS } from '../../constants'; +import { Input } from './types'; + +/** + * Extracts time of day signal from activity timestamps. + * + * Determines time of day (morning, day, evening, night) based on + * activity start time. Uses local time if available, otherwise UTC. + * + * Time classifications: + * - Morning: 5:00 - 10:00 + * - Day: 10:00 - 17:00 + * - Evening: 17:00 - 20:00 + * - Night: 20:00 - 5:00 + * + * @param {Input} input - Activity data to extract time from. + * @returns {StravaActivitySignalsTimeOfDay} Time of day classification. + */ +const extractTimeSignals = ({ + start_date_local, + start_date, +}: Input): StravaActivitySignalsTimeOfDay => { + const startDate = start_date_local || start_date; + + if (startDate) { + const date = new Date(startDate); + const hour = date.getHours(); + const isMorning = + hour >= CLASSIFICATIONS.TIME_OF_DAY.MORNING_START && + hour < CLASSIFICATIONS.TIME_OF_DAY.MORNING_END; + const isDay = + hour >= CLASSIFICATIONS.TIME_OF_DAY.MORNING_END && + hour < CLASSIFICATIONS.TIME_OF_DAY.EVENING_START; + const isEvening = + hour >= CLASSIFICATIONS.TIME_OF_DAY.EVENING_START && + hour < CLASSIFICATIONS.TIME_OF_DAY.NIGHT_START; + + if (isMorning) { + return 'morning'; + } else if (isDay) { + return 'day'; + } else if (isEvening) { + return 'evening'; + } else { + return 'night'; + } + } else { + return 'day'; + } +}; + +export default extractTimeSignals; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/index.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/index.ts new file mode 100644 index 0000000..5d38330 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/index.ts @@ -0,0 +1 @@ +export { default } from './extract-time-of-day-signals'; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/types.ts b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/types.ts new file mode 100644 index 0000000..09bb7df --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/extract-time-of-day-signals/types.ts @@ -0,0 +1,4 @@ +export interface Input { + start_date_local?: string; + start_date?: string; +} diff --git a/packages/strava-activity-signals/get-strava-activity-signals/get-strava-activity-signals.test.ts b/packages/strava-activity-signals/get-strava-activity-signals/get-strava-activity-signals.test.ts new file mode 100644 index 0000000..abd7550 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/get-strava-activity-signals.test.ts @@ -0,0 +1,73 @@ +import { describe, test, expect } from 'bun:test'; + +import getStravaActivitySignals from './get-strava-activity-signals'; +import { StravaActivity, StravaActivitySignals } from '../types'; + +type Case = [string, StravaActivity, StravaActivitySignals]; + +describe('get-activity-signals', () => { + describe('it extracts signals from valid activity', () => { + test.each([ + [ + 'valid mountain bike ride with minimal fields', + { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + distance: 28099, + moving_time: 4207, + total_elevation_gain: 516, + start_date_local: '2018-02-16T06:52:54Z', + }, + { + activityType: 'MountainBikeRide', + intensity: 'high', + elevation: 'mountainous', + timeOfDay: 'morning', + tags: undefined, + brands: undefined, + semanticContext: undefined, + }, + ], + [ + 'valid running activity with all fields', + { + id: 123456, + type: 'Run', + sport_type: 'Run', + name: 'Morning Run', + description: 'Nice run in the park', + distance: 5000, + moving_time: 1500, + total_elevation_gain: 50, + start_date_local: '2024-01-01T07:00:00Z', + gear: { name: 'Nike Shoes' }, + }, + { + activityType: 'Run', + intensity: 'medium', + elevation: 'rolling', + timeOfDay: 'morning', + tags: undefined, + brands: ['Nike Shoes'], + semanticContext: ['park'], + }, + ], + ])('%#. %s', (_name, activity, expected) => { + const signals = getStravaActivitySignals(activity); + + expect(signals).toStrictEqual(expected); + }); + }); + + describe('it throws error for invalid activity', () => { + test('throws error for invalid activity', () => { + const activity: StravaActivity = { + id: 123456, + // Missing required fields... + } as StravaActivity; + + expect(() => getStravaActivitySignals(activity)).toThrow(); + }); + }); +}); diff --git a/packages/strava-activity-signals/get-strava-activity-signals/get-strava-activity-signals.ts b/packages/strava-activity-signals/get-strava-activity-signals/get-strava-activity-signals.ts new file mode 100644 index 0000000..f2e4985 --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/get-strava-activity-signals.ts @@ -0,0 +1,58 @@ +import { StravaActivitySignals, StravaActivity } from '../types'; +import validateActivity from '../validate-activity'; +import validateSignals from '../validate-signals'; +import classifyIntensity from './classify-intensity'; +import classifyElevation from './classify-elevation'; +import extractTimeSignals from './extract-time-of-day-signals'; +import extractTagSignals from './extract-tag-signals'; +import extractSemanticContext from './extract-semantic-context'; + +/** + * Extracts semantic signals from Strava activity data. + * + * Main entry point for signal extraction. Processes activity data to extract + * all semantic signals needed for prompt generation, including intensity, + * elevation, time of day, tags, and semantic context from user text. + * + * Signal extraction process: + * 1. Validates activity data + * 2. Extracts activity type from sport_type + * 3. Classifies intensity based on pace + * 4. Classifies elevation based on elevation gain + * 5. Extracts time of day from activity timestamps + * 6. Normalizes and extracts tags + * 7. Processes user text (name, description) for semantic context + * 8. Validates extracted signals + * + * @param {StravaActivity} activity - Strava activity data to extract signals from + * @returns {StravaActivitySignals} Extracted and validated activity signals + * @throws {Error} Throws error if activity validation fails + */ +const getStravaActivitySignals = (activity: StravaActivity): StravaActivitySignals => { + const activityValidation = validateActivity(activity); + + if (activityValidation.valid) { + const signals: StravaActivitySignals = { + activityType: activity.sport_type ?? activity.type ?? 'Unknown', + intensity: classifyIntensity(activity), + elevation: classifyElevation(activity), + timeOfDay: extractTimeSignals(activity), + tags: extractTagSignals(activity), + brands: activity.gear?.name ? [activity.gear.name] : undefined, + semanticContext: extractSemanticContext(activity), + }; + const signalsValidation = validateSignals(signals); + + if (signalsValidation.valid) { + return signals; + } else if (signalsValidation.sanitized) { + return signalsValidation.sanitized; + } else { + throw new Error(`Signals validation failed: ${signalsValidation.errors.join(', ')}`); + } + } else { + throw new Error(`Activity validation failed: ${activityValidation.errors.join(', ')}`); + } +}; + +export default getStravaActivitySignals; diff --git a/packages/strava-activity-signals/get-strava-activity-signals/index.ts b/packages/strava-activity-signals/get-strava-activity-signals/index.ts new file mode 100644 index 0000000..468db9b --- /dev/null +++ b/packages/strava-activity-signals/get-strava-activity-signals/index.ts @@ -0,0 +1 @@ +export { default } from './get-strava-activity-signals'; diff --git a/packages/strava-activity-signals/index.ts b/packages/strava-activity-signals/index.ts new file mode 100644 index 0000000..a1289c0 --- /dev/null +++ b/packages/strava-activity-signals/index.ts @@ -0,0 +1 @@ +export { default as getStravaActivitySignals } from './get-strava-activity-signals'; diff --git a/packages/strava-activity-signals/package.json b/packages/strava-activity-signals/package.json new file mode 100644 index 0000000..81acd84 --- /dev/null +++ b/packages/strava-activity-signals/package.json @@ -0,0 +1,5 @@ +{ + "name": "@pace/strava-activity-signals", + "private": true, + "type": "module" +} diff --git a/packages/strava-activity-signals/sanitize-text/index.ts b/packages/strava-activity-signals/sanitize-text/index.ts new file mode 100644 index 0000000..5af3f4f --- /dev/null +++ b/packages/strava-activity-signals/sanitize-text/index.ts @@ -0,0 +1 @@ +export { default } from './sanitize-text'; diff --git a/packages/strava-activity-signals/sanitize-text/sanitize-text.test.ts b/packages/strava-activity-signals/sanitize-text/sanitize-text.test.ts new file mode 100644 index 0000000..0429401 --- /dev/null +++ b/packages/strava-activity-signals/sanitize-text/sanitize-text.test.ts @@ -0,0 +1,127 @@ +import { describe, test, expect } from 'bun:test'; + +import sanitizeText from './sanitize-text'; + +type Case = [string, string, string]; + +describe('sanitize-text', () => { + describe('sanitizes safe text correctly', () => { + test.each([ + ['returns trimmed text for simple input', ' Morning run ', 'Morning run'], + [ + 'normalizes multiple spaces to single space', + 'Morning run workout', + 'Morning run workout', + ], + ['normalizes tabs to single space', 'Morning\t\trun\tworkout', 'Morning run workout'], + ['normalizes newlines to single space', 'Morning\nrun\nworkout', 'Morning run workout'], + [ + 'normalizes mixed whitespace to single space', + 'Morning \t\n run \t workout', + 'Morning run workout', + ], + ['preserves text with special characters', 'Morning run @ 5k!', 'Morning run @ 5k!'], + ['preserves text with numbers', 'Morning run 10k', 'Morning run 10k'], + ['preserves text with punctuation', 'Morning run, very nice!', 'Morning run, very nice!'], + ['preserves text with hyphens', 'Trail-running adventure', 'Trail-running adventure'], + ['preserves text with apostrophes', "It's a great run", "It's a great run"], + ['handles text with only spaces at start', ' Trail run', 'Trail run'], + ['handles text with only spaces at end', 'Trail run ', 'Trail run'], + ['handles text with spaces on both ends', ' Trail run ', 'Trail run'], + ['handles single word', 'Running', 'Running'], + [ + 'handles very long text', + 'This is a very long description about my amazing morning run that took me through beautiful trails and parks', + 'This is a very long description about my amazing morning run that took me through beautiful trails and parks', + ], + ['preserves unicode characters', 'Morning run 🏃‍♂️', 'Morning run 🏃‍♂️'], + [ + 'handles text with multiple consecutive newlines', + 'Line one\n\n\nLine two', + 'Line one Line two', + ], + ['handles text with carriage returns', 'Line one\r\nLine two', 'Line one Line two'], + ['preserves uppercase letters', 'MORNING RUN', 'MORNING RUN'], + ['preserves mixed case', 'MoRnInG RuN', 'MoRnInG RuN'], + ])('%#. %s', (_name, input, expected) => { + const result = sanitizeText(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('returns empty string for invalid input', () => { + test.each([ + ['returns empty string for empty input', '', ''], + ['returns empty string for whitespace only', ' ', ''], + ['returns empty string for tabs only', '\t\t\t', ''], + ['returns empty string for newlines only', '\n\n\n', ''], + ['returns empty string for mixed whitespace only', ' \t\n ', ''], + ['returns empty string for carriage returns only', '\r\n\r\n', ''], + ])('%#. %s', (_name, input, expected) => { + const result = sanitizeText(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('removes forbidden content', () => { + test.each([ + ['returns empty string for text with person keywords', 'Running with people in the park', ''], + ['returns empty string for text with face keywords', 'Morning run with portrait photo', ''], + [ + 'returns empty string for text with political keywords', + 'Run to the government building', + '', + ], + ['returns empty string for text with violence keywords', 'Running with weapon training', ''], + ['returns empty string for text with combat keywords', 'Military training run', ''], + ['returns empty string for text with sexual keywords', 'Explicit content in description', ''], + ['returns empty string for text with typography keywords', 'Display text on screen', ''], + ['returns empty string for text with write instruction', 'Write something here', ''], + [ + 'returns empty string for text with multiple forbidden keywords', + 'Government people with weapons', + '', + ], + ['returns empty string for uppercase forbidden keywords', 'PEOPLE RUNNING', ''], + ['returns empty string for mixed case forbidden keywords', 'PeOpLe running', ''], + ['returns empty string for forbidden keyword at start', 'Government building run', ''], + ['returns empty string for forbidden keyword at end', 'Morning run with people', ''], + ['returns empty string for forbidden keyword in middle', 'Great people filled run', ''], + ['returns empty string for text with man keyword', 'Man running marathon', ''], + ['returns empty string for text with woman keyword', 'Woman jogging', ''], + ['returns empty string for text with child keyword', 'Child playing', ''], + ['returns empty string for text with battle keyword', 'Battle training session', ''], + ['returns empty string for text with flag keyword', 'Running past the flag', ''], + ['returns empty string for text with army keyword', 'Army base run', ''], + ])('%#. %s', (_name, input, expected) => { + const result = sanitizeText(input); + + expect(result).toStrictEqual(expected); + }); + }); + + describe('handles edge cases', () => { + test.each([ + ['handles single character', 'a', 'a'], + ['handles two characters', 'ab', 'ab'], + ['handles very short safe text', 'run', 'run'], + [ + 'handles text with many spaces between words', + 'word1 word2 word3', + 'word1 word2 word3', + ], + ['handles text starting with special character', '!Important', '!Important'], + ['handles text ending with special character', 'Important!', 'Important!'], + ['handles text with only numbers', '12345', '12345'], + ['handles text with only special characters', '!@#$%', '!@#$%'], + ['handles text with leading tabs and trailing spaces', '\t\tRun ', 'Run'], + ['handles empty string with special characters removed', '', ''], + ])('%#. %s', (_name, input, expected) => { + const result = sanitizeText(input); + + expect(result).toStrictEqual(expected); + }); + }); +}); diff --git a/packages/strava-activity-signals/sanitize-text/sanitize-text.ts b/packages/strava-activity-signals/sanitize-text/sanitize-text.ts new file mode 100644 index 0000000..388cefd --- /dev/null +++ b/packages/strava-activity-signals/sanitize-text/sanitize-text.ts @@ -0,0 +1,36 @@ +import checkForbiddenContent from '../check-forbidden-content'; + +/** + * Sanitizes user-provided text by removing forbidden content. + * + * This function processes text to extract safe semantic signals while + * removing any forbidden content patterns. User text should never be + * copied verbatim into prompts. + * + * @param {string} text - User-provided text to sanitize. + * @returns {string} Sanitized text with forbidden content removed. + */ +const sanitizeText = (text: string): string => { + const hasText = text.trim().length > 0; + + if (hasText) { + const hasForbidden = checkForbiddenContent(text); + + if (hasForbidden) { + // Return empty string if forbidden content detected. + // In the future implementation, we might extract safe semantic signals. + // But for now, we'll return empty to be safe. + return ''; + } else { + // Basic sanitization: trim and normalize whitespace. + // In the future, we might also remove special characters, etc. + const sanitized = text.trim().replace(/\s+/g, ' '); + + return sanitized; + } + } else { + return ''; + } +}; + +export default sanitizeText; diff --git a/packages/strava-activity-signals/types.ts b/packages/strava-activity-signals/types.ts new file mode 100644 index 0000000..46afa5a --- /dev/null +++ b/packages/strava-activity-signals/types.ts @@ -0,0 +1,103 @@ +import { ELEVATIONS, INTENSITIES, TIMES_OF_DAY } from './constants'; + +/** + * Strava Activity type. + * The type is dictated by the Strava API and used internally by the system. + * @see {@link https://developers.strava.com/docs/reference/#api-Activities-getActivityById | Strava Activity Response Format} + */ +export interface StravaActivity { + id: number; + type: string; + sport_type: string; + name?: string; + description?: string; + distance?: number; + moving_time?: number; + total_elevation_gain?: number; + start_date?: string; + start_date_local?: string; + timezone?: string; + utc_offset?: number; + start_latlng?: [number, number]; + end_latlng?: [number, number]; + achievement_count?: number; + comment_count?: number; + athlete_count?: number; + photo_count?: number; + trainer?: boolean; + commute?: boolean; + average_speed?: number; + max_speed?: number; + average_cadence?: number; + average_temp?: number; + average_watts?: number; + weighted_average_watts?: number; + kilojoules?: number; + device_watts?: boolean; + max_watts?: number; + elev_high?: number; + elev_low?: number; + pr_count?: number; + total_photo_count?: number; + workout_type?: number; + suffer_score?: number | null; + calories?: number; + photos?: Record; + device_name?: string; + athlete?: { + id: number; + }; + gear?: { + id?: string; + name?: string; + }; + map?: { + id?: string; + polyline?: string; + summary_polyline?: string; + }; +} + +export type StravaActivitySignalsElevation = (typeof ELEVATIONS)[number]; + +export type StravaActivitySignalsIntensity = (typeof INTENSITIES)[number]; + +export type StravaActivitySignalsTimeOfDay = (typeof TIMES_OF_DAY)[number]; +/** + * Strava activity signals extracted from the Strava activity data. + */ +export interface StravaActivitySignals { + /** Activity type from sport_type field. */ + activityType: string; + /** Intensity classification. */ + intensity: StravaActivitySignalsIntensity; + /** Elevation classification. */ + elevation: StravaActivitySignalsElevation; + /** Time of day classification. */ + timeOfDay: StravaActivitySignalsTimeOfDay; + /** Normalized tags from activity. */ + tags?: string[]; + /** Extracted brand names from gear/description (if compliant). */ + brands?: string[]; + /** Safe semantic signals extracted from user text. */ + semanticContext?: string[]; +} + +export interface ValidationResult { + /** Whether the validation passed. */ + valid: boolean; + /** Array of error messages if validation failed. */ + errors: string[]; + /** Sanitized version of the input if validation failed but sanitization was possible. */ + sanitized?: T; +} + +/** + * Validation result for activity data. + */ +export type StravaActivityValidationResult = ValidationResult; + +/** + * Validation result for activity signals. + */ +export type StravaActivitySignalsValidationResult = ValidationResult; diff --git a/packages/strava-activity-signals/validate-activity/index.ts b/packages/strava-activity-signals/validate-activity/index.ts new file mode 100644 index 0000000..0d8281d --- /dev/null +++ b/packages/strava-activity-signals/validate-activity/index.ts @@ -0,0 +1 @@ +export { default } from './validate-activity'; diff --git a/packages/strava-activity-signals/validate-activity/validate-activity.test.ts b/packages/strava-activity-signals/validate-activity/validate-activity.test.ts new file mode 100644 index 0000000..cfcef8d --- /dev/null +++ b/packages/strava-activity-signals/validate-activity/validate-activity.test.ts @@ -0,0 +1,121 @@ +import { describe, test, expect } from 'bun:test'; + +import { StravaActivity, StravaActivityValidationResult } from '../types'; +import validateActivity from './validate-activity'; + +type Case = [string, StravaActivity, StravaActivityValidationResult]; + +describe('validate-activity', () => { + test.each([ + [ + 'valid activity with required fields', + { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + }, + { + valid: true, + errors: [], + }, + ], + [ + 'activity missing type field', + { + id: 123456, + sport_type: 'MountainBikeRide', + } as StravaActivity, + { + valid: false, + errors: ['Activity type is required and must be a string'], + }, + ], + [ + 'activity missing sport_type field', + { + id: 123456, + type: 'Ride', + } as StravaActivity, + { + valid: false, + errors: ['Activity sport_type is required and must be a string'], + }, + ], + [ + 'activity with invalid distance', + { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + distance: -100, + }, + { + valid: false, + errors: ['Distance must be greater than 0'], + sanitized: { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + distance: undefined, + }, + }, + ], + [ + 'activity with zero distance', + { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + distance: 0, + }, + { + valid: false, + errors: ['Distance must be greater than 0'], + sanitized: { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + distance: undefined, + }, + }, + ], + [ + 'activity with negative elevation gain', + { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + total_elevation_gain: -50, + }, + { + valid: false, + errors: ['Elevation gain must be non-negative'], + sanitized: { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + total_elevation_gain: 0, + }, + }, + ], + [ + 'activity with valid optional fields', + { + id: 123456, + type: 'Ride', + sport_type: 'MountainBikeRide', + distance: 10000, + total_elevation_gain: 500, + moving_time: 3600, + }, + { + valid: true, + errors: [], + }, + ], + ])('%#. %s', (_name, activity, expected) => { + const result = validateActivity(activity); + + expect(result).toStrictEqual(expected); + }); +}); diff --git a/packages/strava-activity-signals/validate-activity/validate-activity.ts b/packages/strava-activity-signals/validate-activity/validate-activity.ts new file mode 100644 index 0000000..f2ded97 --- /dev/null +++ b/packages/strava-activity-signals/validate-activity/validate-activity.ts @@ -0,0 +1,139 @@ +import { MAX_PACE } from '../constants'; +import getPaceSecondsPerKm from '../get-pace-seconds-per-km'; +import { StravaActivityValidationResult, StravaActivity } from '../types'; + +/** + * Validates activity value constraints. + * + * Checks if values are within allowed ranges and clamps/normalizes + * invalid values according to guardrails specification. + * + * @param {StravaActivity} activity - Activity to validate. + * @returns {StravaActivityValidationResult} Validation result with sanitized activity if needed. + * @internal + */ +const validateActivityValues = (activity: StravaActivity): StravaActivityValidationResult => { + const errors: string[] = []; + const sanitized: StravaActivity = { ...activity }; + + // Validate distance. + if (sanitized.distance !== undefined && sanitized.distance <= 0) { + errors.push('Distance must be greater than 0'); + sanitized.distance = undefined; + } + + // Validate pace (derived from distance and moving_time). + if (sanitized.distance !== undefined && sanitized.moving_time !== undefined) { + const paceSecondsPerKm = getPaceSecondsPerKm(sanitized.moving_time, sanitized.distance); + + if (paceSecondsPerKm <= 0) { + errors.push('Pace must be greater than 0'); + } + } + + // Validate elevation gain. + if (sanitized.total_elevation_gain !== undefined && sanitized.total_elevation_gain < 0) { + errors.push('Elevation gain must be non-negative'); + sanitized.total_elevation_gain = 0; + } + + const valid = errors.length === 0; + + return { + valid, + errors, + sanitized: valid ? undefined : sanitized, + }; +}; + +/** + * Validates activity semantic consistency. + * + * Checks if activity values are semantically consistent with the activity type. + * For example, running pace faster than human limits would be inconsistent. + * + * @param {StravaActivity} activity - Activity to validate. + * @returns {StravaActivityValidationResult} Validation result. + * @internal + */ +const validateActivitySemantics = (activity: StravaActivity): StravaActivityValidationResult => { + const errors: string[] = []; + + // Check for unrealistic pace (faster than human limits). + // World record pace is around 2:30 min/km, so anything faster than 2:00 min/km is suspicious. + if (activity.distance !== undefined && activity.moving_time !== undefined) { + const paceSecondsPerKm = getPaceSecondsPerKm(activity.moving_time, activity.distance); + + if (paceSecondsPerKm < MAX_PACE && activity.type === 'Run') { + errors.push('Running pace is faster than realistic human limits'); + } + } + + return { + valid: errors.length === 0, + errors, + }; +}; + +/** + * Validates activity data according to guardrails specification. + * + * Checks required fields, value constraints, and semantic consistency. + * Returns validation result with sanitized activity if validation fails + * but sanitization is possible. + * + * Required fields: type, sport_type + * Value constraints: distance > 0, avg_hr in [40, 220], pace > 0, elevation_gain >= 0 + * Semantic validation: values must be consistent with activity type + * + * @param {StravaActivity} activity - Activity data to validate + * @returns {StravaActivityValidationResult} Validation result with errors and optional sanitized activity + */ +const validateActivity = (activity: StravaActivity): StravaActivityValidationResult => { + const errors: string[] = []; + + if (!activity.type || typeof activity.type !== 'string') { + errors.push('Activity type is required and must be a string'); + } + + if (!activity.sport_type || typeof activity.sport_type !== 'string') { + errors.push('Activity sport_type is required and must be a string'); + } + + const hasRequiredFields = errors.length === 0; + + if (hasRequiredFields) { + const valueValidation = validateActivityValues(activity); + + if (valueValidation.valid) { + const semanticValidation = validateActivitySemantics(activity); + + if (semanticValidation.valid) { + return { + valid: true, + errors: [], + }; + } else { + // Semantic errors are warnings. + // Prefer graceful degradation. + return { + valid: true, // Still valid, but with warnings. + errors: semanticValidation.errors, + }; + } + } else { + return { + valid: false, + errors: [...errors, ...valueValidation.errors], + sanitized: valueValidation.sanitized, + }; + } + } else { + return { + valid: false, + errors, + }; + } +}; + +export default validateActivity; diff --git a/packages/strava-activity-signals/validate-signals/index.ts b/packages/strava-activity-signals/validate-signals/index.ts new file mode 100644 index 0000000..6d4393f --- /dev/null +++ b/packages/strava-activity-signals/validate-signals/index.ts @@ -0,0 +1 @@ +export { default } from './validate-signals'; diff --git a/packages/strava-activity-signals/validate-signals/validate-signals.test.ts b/packages/strava-activity-signals/validate-signals/validate-signals.test.ts new file mode 100644 index 0000000..dff2d20 --- /dev/null +++ b/packages/strava-activity-signals/validate-signals/validate-signals.test.ts @@ -0,0 +1,96 @@ +import { describe, test, expect } from 'bun:test'; + +import validateActivitySignals from './validate-signals'; +import { StravaActivitySignals, StravaActivitySignalsValidationResult } from '../types'; + +type Case = [string, StravaActivitySignals, StravaActivitySignalsValidationResult]; + +describe('validate-signals', () => { + test.each([ + [ + 'valid signals with all required fields', + { + activityType: 'Run', + intensity: 'medium', + elevation: 'flat', + timeOfDay: 'day', + tags: [], + }, + { + valid: true, + errors: [], + sanitized: undefined, + }, + ], + [ + 'signals with invalid intensity', + { + activityType: 'Run', + intensity: 'invalid' as 'low', + elevation: 'flat', + timeOfDay: 'day', + tags: [], + }, + { + valid: false, + errors: ['Intensity must be one of: low, medium, high'], + sanitized: { + activityType: 'Run', + intensity: 'invalid' as 'low', + elevation: 'flat', + timeOfDay: 'day', + tags: [], + semanticContext: undefined, + }, + }, + ], + [ + 'signals with invalid elevation', + { + activityType: 'Run', + intensity: 'medium', + elevation: 'invalid' as 'flat', + timeOfDay: 'day', + tags: [], + }, + { + valid: false, + errors: ['Elevation must be one of: flat, rolling, mountainous'], + sanitized: { + activityType: 'Run', + intensity: 'medium', + elevation: 'invalid' as 'flat', + timeOfDay: 'day', + tags: [], + semanticContext: undefined, + }, + }, + ], + [ + 'signals with invalid time of day', + { + activityType: 'Run', + intensity: 'medium', + elevation: 'flat', + timeOfDay: 'invalid' as 'morning', + tags: [], + }, + { + valid: false, + errors: ['Time of day must be one of: morning, day, evening, night'], + sanitized: { + activityType: 'Run', + intensity: 'medium', + elevation: 'flat', + timeOfDay: 'invalid' as 'morning', + tags: [], + semanticContext: undefined, + }, + }, + ], + ])('%#. %s', (_name, signals, expected) => { + const result = validateActivitySignals(signals); + + expect(result).toStrictEqual(expected); + }); +}); diff --git a/packages/strava-activity-signals/validate-signals/validate-signals.ts b/packages/strava-activity-signals/validate-signals/validate-signals.ts new file mode 100644 index 0000000..be8dc6e --- /dev/null +++ b/packages/strava-activity-signals/validate-signals/validate-signals.ts @@ -0,0 +1,92 @@ +import { StravaActivitySignals, StravaActivitySignalsValidationResult } from '../types'; +import checkForbiddenContent from '../check-forbidden-content'; +import { ELEVATIONS, INTENSITIES, TIMES_OF_DAY } from '../constants'; + +/** + * Validates activity signals according to guardrails specification. + * + * Checks that signals comply with guardrails, tags are normalized, + * and intensity/elevation classifications are valid. + * + * Validates: + * - Intensity is one of: low, medium, high + * - Elevation is one of: flat, rolling, mountainous + * - Time of day is one of: morning, day, evening, night + * - Tags are normalized strings + * - No forbidden content in semantic context + * + * @param {StravaActivitySignals} signals - Activity signals to validate. + * @returns {StravaActivitySignalsValidationResult} Validation result with errors and optional sanitized signals. + */ +const validateActivitySignals = ( + signals: StravaActivitySignals, +): StravaActivitySignalsValidationResult => { + const errors: string[] = []; + + // Validate activity type. + if (!signals.activityType || typeof signals.activityType !== 'string') { + errors.push('Activity type is required and must be a string'); + } + + // Validate intensity. + if (!INTENSITIES.includes(signals.intensity)) { + errors.push(`Intensity must be one of: ${INTENSITIES.join(', ')}`); + } + + // Validate elevation. + if (!ELEVATIONS.includes(signals.elevation)) { + errors.push(`Elevation must be one of: ${ELEVATIONS.join(', ')}`); + } + + // Validate time of day. + if (!TIMES_OF_DAY.includes(signals.timeOfDay)) { + errors.push(`Time of day must be one of: ${TIMES_OF_DAY.join(', ')}`); + } + + // Validate tags are normalized (array of strings). + if (signals.tags) { + const invalidTags = signals.tags.filter((tag) => typeof tag !== 'string'); + + if (invalidTags.length > 0) { + errors.push('All tags must be strings'); + } + } + + // Check for forbidden content in semantic context. + if (signals.semanticContext) { + const hasForbiddenContent = signals.semanticContext.some((context) => + checkForbiddenContent(context), + ); + + if (hasForbiddenContent) { + errors.push('Semantic context contains forbidden content'); + } + } + + // Validate brands. + if (signals.brands) { + const invalidBrands = signals.brands.filter((brand) => typeof brand !== 'string'); + + if (invalidBrands.length > 0) { + errors.push('All brands must be strings'); + } + } + + const valid = errors.length === 0; + const sanitized: StravaActivitySignals | undefined = valid + ? undefined + : { + ...signals, + semanticContext: signals.semanticContext?.filter( + (context) => !checkForbiddenContent(context), + ), + }; + + return { + valid, + errors, + sanitized, + }; +}; + +export default validateActivitySignals;