Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 132 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,17 @@ mod secrets {
pub mod anthropic;
pub mod aws;
pub mod basic_auth;
pub mod digitalocean;
pub mod discord;
pub mod gitlab;
pub mod jwt;
pub mod npm;
pub mod openai;
pub mod private_key;
pub mod pypi;
pub mod slack;
pub mod stripe;
pub mod twilio;
}

/// Secret class representing a detected secret
Expand Down Expand Up @@ -122,6 +129,62 @@ fn detect_chunk(secret_owned: String, secret_types: &Option<Vec<String>>) -> Vec
}));
}

// Discord token detector
if should_run_detector("discord", secret_types) {
detector_tasks.push(Box::new({
let s = secret_owned.clone();
move || secrets::discord::detect_discord_tokens(&s)
}));
}

// PyPI token detector
if should_run_detector("pypi", secret_types) {
detector_tasks.push(Box::new({
let s = secret_owned.clone();
move || secrets::pypi::detect_pypi_tokens(&s)
}));
}

// Slack token detector
if should_run_detector("slack", secret_types) {
detector_tasks.push(Box::new({
let s = secret_owned.clone();
move || secrets::slack::detect_slack_tokens(&s)
}));
}

// Stripe access key detector
if should_run_detector("stripe", secret_types) {
detector_tasks.push(Box::new({
let s = secret_owned.clone();
move || secrets::stripe::detect_stripe_keys(&s)
}));
}

// Twilio API key detector
if should_run_detector("twilio", secret_types) {
detector_tasks.push(Box::new({
let s = secret_owned.clone();
move || secrets::twilio::detect_twilio_keys(&s)
}));
}

// GitLab token detector
if should_run_detector("gitlab", secret_types) {
detector_tasks.push(Box::new({
let s = secret_owned.clone();
move || secrets::gitlab::detect_gitlab_tokens(&s)
}));
}

// DigitalOcean API key detector
if should_run_detector("digitalocean", secret_types) {
detector_tasks.push(Box::new({
let s = secret_owned.clone();
move || secrets::digitalocean::detect_digitalocean_keys(&s)
}));
}

// Process detector tasks in batches based on CPU count
let mut all_secrets = Vec::new();
let mut task_iter = detector_tasks.into_iter();
Expand Down Expand Up @@ -169,13 +232,20 @@ fn detect_chunk(secret_owned: String, secret_types: &Option<Vec<String>>) -> Vec
/// - Private Keys (RSA, EC, DSA, OpenSSH, PGP, SSH2, PuTTY) - filter: "private_key"
/// - Basic Auth Credentials (passwords in URIs like user:pass@host) - filter: "basic_auth"
/// - NPM Tokens (npmrc authToken) - filter: "npm"
/// - Discord Bot Tokens ([M|N|O]...) - filter: "discord"
/// - PyPI Tokens (pypi-AgE...) - filter: "pypi"
/// - Slack Tokens and Webhooks (xox* or hooks.slack.com) - filter: "slack"
/// - Stripe Access Keys (sk_live/rk_live) - filter: "stripe"
/// - Twilio API Keys (AC..., SK...) - filter: "twilio"
/// - GitLab Tokens (glpat, glrt, etc.) - filter: "gitlab"
/// - DigitalOcean API Keys (dop_v1, doo_v1, dor_v1) - filter: "digitalocean"
/// - More detectors can be added here in the future
///
/// # Arguments
/// * `py` - Python context (used to release GIL during computation)
/// * `secret` - The string to check for secret patterns
/// * `secret_types` - Optional list of secret types to detect. If None, all types are detected.
/// Valid values: "aws", "openai", "anthropic", "jwt", "private_key", "basic_auth", "npm"
/// Valid values: "aws", "openai", "anthropic", "jwt", "private_key", "basic_auth", "npm", "discord", "pypi", "slack", "stripe", "twilio", "gitlab", "digitalocean"
///
/// # Returns
/// * `Vec<Secret>` - List of all secrets found (empty list if none detected)
Expand Down Expand Up @@ -248,6 +318,13 @@ mod tests {
let aws_secret = r#"secret = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY""#;
let openai = "sk-aBcDeFgHiJkLmNoPqRsTT3BlbkFJuVwXyZaBcDeFgHiJkLmN";
let anthropic = "sk-ant-api03-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-api03-ABCDEFGHIJKLMNOPQRSTUVWXY";
let discord = format!("M{}.{}.{}", "a".repeat(23), "b".repeat(6), "c".repeat(27));
let pypi = format!("pypi-AgEIcHlwaS5vcmc{}", "a".repeat(70));
let slack = "xoxb-1234567890-123456789012-abcdef123456";
let stripe = "sk_live_1234567890abcdefghijklmn";
let twilio = format!("AC{}", "a".repeat(32));
let gitlab = format!("glpat-{}", "a".repeat(20));
let digitalocean = format!("dop_v1_{}", "a".repeat(64));

Python::initialize();
Python::attach(|py| {
Expand All @@ -266,6 +343,34 @@ mod tests {
let result4 = detect(py, anthropic, None).unwrap();
assert_eq!(result4.len(), 1);
assert_eq!(result4[0].secret_type, "Anthropic API Key");

let result5 = detect(py, &discord, None).unwrap();
assert_eq!(result5.len(), 1);
assert_eq!(result5[0].secret_type, "Discord Bot Token");

let result6 = detect(py, &pypi, None).unwrap();
assert_eq!(result6.len(), 1);
assert_eq!(result6[0].secret_type, "PyPI Token");

let result7 = detect(py, slack, None).unwrap();
assert_eq!(result7.len(), 1);
assert_eq!(result7[0].secret_type, "Slack Token");

let result8 = detect(py, stripe, None).unwrap();
assert_eq!(result8.len(), 1);
assert_eq!(result8[0].secret_type, "Stripe Access Key");

let result9 = detect(py, &twilio, None).unwrap();
assert_eq!(result9.len(), 1);
assert_eq!(result9[0].secret_type, "Twilio API Key");

let result10 = detect(py, &gitlab, None).unwrap();
assert_eq!(result10.len(), 1);
assert_eq!(result10[0].secret_type, "GitLab Token");

let result11 = detect(py, &digitalocean, None).unwrap();
assert_eq!(result11.len(), 1);
assert_eq!(result11[0].secret_type, "DigitalOcean API Key");
});
}

Expand All @@ -289,12 +394,21 @@ mod tests {
#[test]
fn test_detect_line_by_line_scanning() {
// Simulates scanning a file line by line
let discord = format!("M{}.{}.{}", "a".repeat(23), "b".repeat(6), "c".repeat(27));
let pypi = format!("pypi-AgEIcHlwaS5vcmc{}", "a".repeat(70));
let lines = vec![
"# Configuration file",
"AKIAIOSFODNN7EXAMPLE", // AWS key on its own line
r#"AWS_SECRET = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY""#,
"OPENAI_KEY = sk-aBcDeFgHiJkLmNoPqRsTT3BlbkFJuVwXyZaBcDeFgHiJkLmN",
"ANTHROPIC_KEY = sk-ant-api03-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-api03-ABCDEFGHIJKLMNOPQRSTUVWXY",
&discord,
&pypi,
"SLACK_TOKEN = xoxb-1234567890-123456789012-abcdef123456",
"STRIPE_KEY = sk_live_1234567890abcdefghijklmn",
"TWILIO_KEY = ACaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"GITLAB_TOKEN = glpat-aaaaaaaaaaaaaaaaaaaa",
"DIGITALOCEAN_KEY = dop_v1_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"NOT_A_SECRET = hello_world",
];

Expand All @@ -308,12 +422,19 @@ mod tests {
}
}

// Should find all four types
assert_eq!(found_secrets.len(), 4);
// Should find all supported types
assert_eq!(found_secrets.len(), 11);
assert!(found_secrets.contains(&"AWS Access Key ID".to_string()));
assert!(found_secrets.contains(&"AWS Secret Access Key".to_string()));
assert!(found_secrets.contains(&"OpenAI Token".to_string()));
assert!(found_secrets.contains(&"Anthropic API Key".to_string()));
assert!(found_secrets.contains(&"Discord Bot Token".to_string()));
assert!(found_secrets.contains(&"PyPI Token".to_string()));
assert!(found_secrets.contains(&"Slack Token".to_string()));
assert!(found_secrets.contains(&"Stripe Access Key".to_string()));
assert!(found_secrets.contains(&"Twilio API Key".to_string()));
assert!(found_secrets.contains(&"GitLab Token".to_string()));
assert!(found_secrets.contains(&"DigitalOcean API Key".to_string()));
});
}

Expand All @@ -335,9 +456,14 @@ mod tests {
fn test_no_false_positives_with_similar_patterns() {
// Ensure similar-looking strings don't trigger false positives
let non_secrets = vec![
"AKIA123456789", // Too short for AWS key
"sk-project-name-only", // Missing T3BlbkFJ
"secret = \"short_value\"", // Too short for AWS secret
"AKIA123456789", // Too short for AWS key
"sk-project-name-only", // Missing T3BlbkFJ
"secret = \"short_value\"", // Too short for AWS secret
"xoxc-1234567890-123456789012-abcdef123456", // Invalid Slack prefix
"sk_live_1234567890abcdefghijk", // Stripe too short
"ACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // Twilio uppercase
"glpat-aaaaaaaaaaaaaaaaaaa", // GitLab too short
"dop_v1_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // DigitalOcean too short
"",
"completely_normal_text",
];
Expand Down
82 changes: 82 additions & 0 deletions src/secrets/digitalocean.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use once_cell::sync::Lazy;
use regex::Regex;

/// Regex pattern for DigitalOcean API key detection
static DIGITALOCEAN_KEY_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"\b((?:dop|doo|dor)_v1_[a-f0-9]{64})\b").expect("Invalid regex pattern")
});

/// Detects all DigitalOcean API keys in a string
///
/// # Arguments
/// * `secret` - The string to check for DigitalOcean key patterns
///
/// # Returns
/// * `Vec<(String, String)>` - List of all (secret_type, value) pairs found
pub fn detect_digitalocean_keys(secret: &str) -> Vec<(String, String)> {
let mut keys = Vec::new();

for key_match in DIGITALOCEAN_KEY_PATTERN.find_iter(secret) {
keys.push((
"DigitalOcean API Key".to_string(),
key_match.as_str().to_string(),
));
}

keys
}

#[cfg(test)]
mod tests {
use super::*;

fn build_key(prefix: &str) -> String {
format!("{prefix}_v1_{}", "a".repeat(64))
}

#[test]
fn test_valid_digitalocean_key() {
let key = build_key("dop");
let result = detect_digitalocean_keys(&key);
assert!(!result.is_empty());
let (secret_type, value) = result.first().unwrap();
assert_eq!(secret_type, "DigitalOcean API Key");
assert_eq!(value, &key);
}

#[test]
fn test_valid_digitalocean_key_in_code() {
let key = build_key("doo");
let code = format!("DO_KEY = '{key}'");
let result = detect_digitalocean_keys(&code);
assert!(!result.is_empty());
assert_eq!(result.first().unwrap().1, key);
}

#[test]
fn test_multiple_digitalocean_keys() {
let key1 = build_key("dop");
let key2 = build_key("dor");
let content = format!("{key1} {key2}");
let results = detect_digitalocean_keys(&content);
assert_eq!(results.len(), 2);
}

#[test]
fn test_invalid_digitalocean_key_prefix() {
let key = build_key("don");
assert!(detect_digitalocean_keys(&key).is_empty());
}

#[test]
fn test_invalid_digitalocean_key_length() {
let key = format!("dop_v1_{}", "a".repeat(63));
assert!(detect_digitalocean_keys(&key).is_empty());
}

#[test]
fn test_invalid_digitalocean_key_characters() {
let key = format!("dop_v1_{}", "A".repeat(64));
assert!(detect_digitalocean_keys(&key).is_empty());
}
}
76 changes: 76 additions & 0 deletions src/secrets/discord.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use once_cell::sync::Lazy;
use regex::Regex;

/// Regex pattern for Discord bot token detection
/// Format: [M|N|O] + 23-25 chars + '.' + 6 chars + '.' + 27 chars
static DISCORD_TOKEN_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"[MNO][a-zA-Z\d_-]{23,25}\.[a-zA-Z\d_-]{6}\.[a-zA-Z\d_-]{27}")
.expect("Invalid regex pattern")
});

/// Detects all Discord bot tokens in a string
///
/// # Arguments
/// * `secret` - The string to check for Discord bot token patterns
///
/// # Returns
/// * `Vec<(String, String)>` - List of all (secret_type, value) pairs found
pub fn detect_discord_tokens(secret: &str) -> Vec<(String, String)> {
let mut tokens = Vec::new();

for token_match in DISCORD_TOKEN_PATTERN.find_iter(secret) {
tokens.push((
"Discord Bot Token".to_string(),
token_match.as_str().to_string(),
));
}

tokens
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_valid_discord_token() {
let token = format!("M{}.{}.{}", "a".repeat(23), "b".repeat(6), "c".repeat(27));
let result = detect_discord_tokens(&token);
assert!(!result.is_empty());
let (secret_type, value) = result.first().unwrap();
assert_eq!(secret_type, "Discord Bot Token");
assert_eq!(value, &token);
}

#[test]
fn test_valid_discord_token_in_code() {
let token = format!("N{}.{}.{}", "a".repeat(25), "b".repeat(6), "c".repeat(27));
let code = format!("DISCORD_TOKEN = '{token}'");
let result = detect_discord_tokens(&code);
assert!(!result.is_empty());
assert_eq!(result.first().unwrap().1, token);
}

#[test]
fn test_multiple_discord_tokens() {
let token1 = format!("M{}.{}.{}", "a".repeat(23), "b".repeat(6), "c".repeat(27));
let token2 = format!("O{}.{}.{}", "d".repeat(24), "e".repeat(6), "f".repeat(27));
let content = format!("{token1} and {token2}");
let results = detect_discord_tokens(&content);
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, "Discord Bot Token");
assert_eq!(results[1].0, "Discord Bot Token");
}

#[test]
fn test_invalid_discord_token_prefix() {
let token = format!("A{}.{}.{}", "a".repeat(23), "b".repeat(6), "c".repeat(27));
assert!(detect_discord_tokens(&token).is_empty());
}

#[test]
fn test_invalid_discord_token_segment_length() {
let token = format!("M{}.{}.{}", "a".repeat(23), "b".repeat(5), "c".repeat(27));
assert!(detect_discord_tokens(&token).is_empty());
}
}
Loading