Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/audit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ jobs:
contents: read
issues: write
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- uses: actions-rust-lang/audit@v1
name: Audit Rust Dependencies
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
rust: ${{ steps.filter.outputs.rust }}
docs: ${{ steps.filter.outputs.docs }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v3
id: filter
with:
Expand All @@ -51,7 +51,7 @@ jobs:
quality:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@1.91.0
with:
components: rustfmt, clippy
Expand All @@ -73,7 +73,7 @@ jobs:
needs: changes
if: needs.changes.outputs.rust == 'true'
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.91.0
Expand Down Expand Up @@ -112,7 +112,7 @@ jobs:
needs: changes
if: needs.changes.outputs.rust == 'true'
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.91.0
Expand All @@ -132,7 +132,7 @@ jobs:
needs: [changes, test, test-cross-platform]
if: needs.changes.outputs.rust == 'true'
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.91.0
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
name: CodeQL Analyze
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.91.0
Expand Down
10 changes: 2 additions & 8 deletions .github/workflows/copilot-setup-steps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,9 @@ jobs:
# You can define any steps you want, and they will run before the agent starts.
# If you do not check out your code, Copilot will do this for you.
steps:
- name: Checkout code
uses: actions/checkout@v5

- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@1.91.0

- name: Install just task runner
uses: taiki-e/install-action@v2
with:
tool: just
- uses: extractions/setup-just@v3

- name: Set up Python for pre-commit
uses: actions/setup-python@v6
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.91.0
Expand Down
38 changes: 19 additions & 19 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ jobs:
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
- name: Install dist
# we specify bash to get pipefail; it guards against the `curl` command
# failing. otherwise `sh` won't catch that `curl` returned non-0
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.2/cargo-dist-installer.sh | sh"
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.3/cargo-dist-installer.sh | sh"
- name: Cache dist
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v6
with:
name: cargo-dist-cache
path: ~/.cargo/bin/dist
Expand All @@ -82,7 +82,7 @@ jobs:
cat plan-dist-manifest.json
echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v6
with:
name: artifacts-plan-dist-manifest
path: plan-dist-manifest.json
Expand Down Expand Up @@ -120,7 +120,7 @@ jobs:
- name: enable windows longpaths
run: |
git config --global core.longpaths true
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
Expand All @@ -135,7 +135,7 @@ jobs:
run: ${{ matrix.install_dist.run }}
# Get the dist-manifest
- name: Fetch local artifacts
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: target/distrib/
Expand Down Expand Up @@ -168,7 +168,7 @@ jobs:

cp dist-manifest.json "$BUILD_MANIFEST_NAME"
- name: "Upload artifacts"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v6
with:
name: artifacts-build-local-${{ join(matrix.targets, '_') }}
path: |
Expand All @@ -185,12 +185,12 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
Expand All @@ -202,7 +202,7 @@ jobs:
shell: bash
# Get all the local artifacts for the global tasks to use (for e.g. checksums)
- name: Fetch local artifacts
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: target/distrib/
Expand Down Expand Up @@ -233,7 +233,7 @@ jobs:
find . -name '*.cdx.xml' | tee -a "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
- name: "Upload artifacts"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v6
with:
name: artifacts-build-global
path: |
Expand All @@ -254,19 +254,19 @@ jobs:
outputs:
val: ${{ steps.host.outputs.manifest }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
# Fetch artifacts from scratch-storage
- name: Fetch artifacts
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: target/distrib/
Expand All @@ -279,14 +279,14 @@ jobs:
cat dist-manifest.json
echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v6
with:
# Overwrite the previous copy
name: artifacts-dist-manifest
path: dist-manifest.json
# Create a GitHub Release while uploading all files to it
- name: "Download GitHub Artifacts"
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: artifacts
Expand Down Expand Up @@ -319,14 +319,14 @@ jobs:
GITHUB_EMAIL: "admin+bot@axo.dev"
if: ${{ !fromJson(needs.plan.outputs.val).announcement_is_prerelease || fromJson(needs.plan.outputs.val).publish_prereleases }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
persist-credentials: true
repository: "EvilBit-Labs/homebrew-tap"
token: ${{ secrets.HOMEBREW_TAP_TOKEN }}
# So we have access to the formula
- name: Fetch homebrew formulae
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: Formula/
Expand Down Expand Up @@ -366,7 +366,7 @@ jobs:
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
2 changes: 1 addition & 1 deletion .github/workflows/security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
audit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6

- name: Setup Rust
uses: dtolnay/rust-toolchain@1.91.0
Expand Down
10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@ name = "stringy"
path = "src/main.rs"

[dependencies]
clap = { version = "4.5.51", features = ["derive"] }
clap = { version = "4.5.54", features = ["derive"] }
entropy = "0.4.2"
goblin = "0.10.3"
goblin = "0.10.4"
pelite = "0.10.0"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.145"
serde_json = "1.0.148"
thiserror = "2.0.17"

[dev-dependencies]
criterion = "0.8.1"
insta = "1.43.2"
tempfile = "3.23.0"
insta = "1.46.0"
tempfile = "3.24.0"

# The profile that 'dist' will build with
[profile.dist]
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,5 +218,3 @@ Licensed under Apache 2.0.
- Inspired by `strings(1)` and the need for better binary analysis tools
- Built with Rust ecosystem crates: `goblin`, `bstr`, `regex`, `rustc-demangle`
- My coworkers, for their excellent input on the original name selection


8 changes: 4 additions & 4 deletions dist-workspace.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ members = ["cargo:."]
# Config for 'dist'
[dist]
# The preferred dist version to use in CI (Cargo.toml SemVer syntax)
cargo-dist-version = "0.30.2"
cargo-dist-version = "0.30.3"
# CI backends to support
ci = "github"
# The installers to generate for each app
Expand Down Expand Up @@ -54,7 +54,7 @@ install-success-msg = "Successfully installed Stringy! Ready to start looking at
[dist.github]
repository = "EvilBit-Labs/Stringy"
[dist.github-action-commits]
"actions/checkout" = "v5"
"actions/download-artifact" = "v6"
"actions/upload-artifact" = "v5"
"actions/checkout" = "v6"
"actions/download-artifact" = "v7"
"actions/upload-artifact" = "v6"
"actions/attest-build-provenance" = "v3"
4 changes: 0 additions & 4 deletions docs/book.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[book]
authors = ["UncleSp1d3r <unclesp1d3r@evilbitlabs.io>"]
language = "en"
multilingual = false
src = "src"
title = "Stringy User Guide"
description = "Stringy User Guide - A smarter strings extraction tool"
Expand Down Expand Up @@ -47,9 +46,6 @@ heading-split-level = 3
enable = true
level = 1


[preprocessor.alerts]

[preprocessor.mermaid]
command = "mdbook-mermaid"

Expand Down
30 changes: 25 additions & 5 deletions docs/src/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Stringy is built as a modular Rust library with a clear separation of concerns.
## High-Level Architecture

```text
Binary File → Format Detection → Container Parsing → String Extraction → Classification → Ranking → Output
Binary File → Format Detection → Container Parsing → String Extraction → Deduplication → Classification → Ranking → Output
```

## Core Components
Expand Down Expand Up @@ -34,21 +34,35 @@ The parsers implement intelligent section prioritization:
```rust
// Example: ELF section weights
".rodata" | ".rodata.str1.*" => 10.0 // Highest priority
".comment" | ".note.*" => 9.0 // Build info, very likely strings
".comment" | ".note.*" => 9.0 // Build info, very likely strings
".data.rel.ro" => 7.0 // Read-only data
".data" => 5.0 // Writable data
".text" => 1.0 // Code sections (low priority)
```

### 2. Extraction Module (`src/extraction/`) 🚧 **Framework Ready**
### 2. Extraction Module (`src/extraction/`) **Core Complete**

Implements encoding-aware string extraction algorithms with configurable parameters.

- **ASCII/UTF-8**: Scans for printable character sequences with noise filtering
- **UTF-16**: Detects little-endian and big-endian wide strings with confidence scoring
- **Deduplication**: Canonicalizes strings while preserving complete metadata
- **Deduplication**: Groups strings by (text, encoding) keys, preserves all occurrence metadata, merges tags using set union, and calculates combined scores with occurrence-based bonuses
- **Section-Aware**: Uses container parser weights to prioritize extraction areas

#### Deduplication System

The deduplication module (`src/extraction/dedup.rs`) provides comprehensive string deduplication:

- **Grouping Strategy**: Strings are grouped by `(text, encoding)` tuple, ensuring UTF-8 and UTF-16 versions are kept separate
- **Occurrence Preservation**: All occurrence metadata (offset, RVA, section, source, tags, score, confidence) is preserved in `StringOccurrence` structures
- **Tag Merging**: Tags from all occurrences are merged using `HashSet` for uniqueness, then converted to a sorted `Vec<Tag>`
- **Combined Scoring**: Calculates combined scores using:
- Base score: Maximum `original_score` across all occurrences
- Occurrence bonus: `5 * (occurrences.len() - 1)` points for multiple occurrences
- Cross-section bonus: `10` points if string appears in sections with different names
- Multi-source bonus: `15` points if string appears from different `StringSource` variants
- Confidence boost: `(max_confidence * 10.0) as i32` where `max_confidence` is the highest confidence value

### 3. Classification Module (`src/classification/`) 🚧 **Types Defined**

Applies semantic analysis to extracted strings with comprehensive tagging system.
Expand Down Expand Up @@ -127,6 +141,12 @@ all_strings.extend(extract_symbol_strings(&container_info));

// Deduplicate while preserving all metadata
let unique_strings = deduplicate(all_strings);
// Returns Vec<CanonicalString> with:
// - Grouped by (text, encoding) key
// - All occurrences preserved in occurrences field
// - Merged tags from all occurrences
// - Combined scores with occurrence-based bonuses
// - Sorted by combined_score descending
```

### 3. Classification Phase 🚧 **Types Ready**
Expand All @@ -139,7 +159,7 @@ for string in &mut unique_strings {
source: string.source,
encoding: string.encoding,
};

string.tags = classify_string(&string.text, &context);
string.score = calculate_score(&string, &context);
}
Expand Down
Loading
Loading