From e86c1df44830e38c84058c3b0e35dc52b3e3303a Mon Sep 17 00:00:00 2001 From: Mijamind Date: Wed, 18 Feb 2026 14:47:46 +0800 Subject: [PATCH 1/2] feat: add ast-grep search across OpenViking SDK and CLI - add AST-based code search endpoint: POST /api/v1/search/ast-grep - implement VikingFS ast_grep with sg command integration, file materialization, output parsing, and error handling - expose ast_grep in FSService, local/async/sync clients, and HTTP client stack - add Python CLI ast-grep command and Rust CLI ast-grep command support - update English/Chinese API and architecture docs for ast-grep usage/examples - add tests for server API, SDK client, Python client, and CLI help for ast-grep --- crates/ov_cli/README.md | 1 + crates/ov_cli/src/client.rs | 22 ++ crates/ov_cli/src/commands/search.rs | 19 ++ crates/ov_cli/src/main.rs | 56 ++++ docs/en/api/01-overview.md | 1 + docs/en/api/03-filesystem.md | 59 ++++ docs/en/api/06-retrieval.md | 91 +++++++ docs/en/concepts/01-architecture.md | 2 +- docs/zh/api/01-overview.md | 1 + docs/zh/api/03-filesystem.md | 59 ++++ docs/zh/api/06-retrieval.md | 91 +++++++ docs/zh/concepts/01-architecture.md | 2 +- openviking/async_client.py | 22 ++ openviking/client/local.py | 21 ++ openviking/server/routers/search.py | 31 +++ openviking/service/fs_service.py | 24 +- openviking/storage/viking_fs.py | 372 +++++++++++++++++++++++++- openviking/sync_client.py | 23 ++ openviking_cli/cli/commands/search.py | 31 +++ openviking_cli/client/base.py | 14 + openviking_cli/client/http.py | 25 ++ openviking_cli/client/sync_http.py | 23 ++ tests/README.md | 4 +- tests/cli/test_cli.py | 1 + tests/client/test_search.py | 28 ++ tests/server/test_api_search.py | 43 +++ tests/server/test_http_client_sdk.py | 28 ++ 27 files changed, 1088 insertions(+), 6 deletions(-) diff --git a/crates/ov_cli/README.md b/crates/ov_cli/README.md index 12115a0f..732976d0 100644 --- a/crates/ov_cli/README.md +++ b/crates/ov_cli/README.md @@ -77,6 +77,7 @@ ov read viking://resources/... - `search` - Context-aware retrieval - `grep` - Content pattern search - `glob` - File glob pattern +- `ast-grep` - AST-based code search ### System - `system wait` - Wait for async processing diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 53dc3b7b..0364fd00 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -287,6 +287,28 @@ impl HttpClient { self.post("/api/v1/search/glob", &body).await } + pub async fn ast_grep( + &self, + uri: &str, + pattern: Option<&str>, + rule: Option<&str>, + language: Option<&str>, + file_glob: &str, + limit: i32, + max_file_size_kb: i32, + ) -> Result { + let body = serde_json::json!({ + "uri": uri, + "pattern": pattern, + "rule": rule, + "language": language, + "file_glob": file_glob, + "limit": limit, + "max_file_size_kb": max_file_size_kb, + }); + self.post("/api/v1/search/ast-grep", &body).await + } + // ============ Resource Methods ============ pub async fn add_resource( diff --git a/crates/ov_cli/src/commands/search.rs b/crates/ov_cli/src/commands/search.rs index 45a4d00b..abc28485 100644 --- a/crates/ov_cli/src/commands/search.rs +++ b/crates/ov_cli/src/commands/search.rs @@ -55,3 +55,22 @@ pub async fn glob( output_success(&result, output_format, compact); Ok(()) } + +pub async fn ast_grep( + client: &HttpClient, + uri: &str, + pattern: Option<&str>, + rule: Option<&str>, + language: Option<&str>, + file_glob: &str, + limit: i32, + max_file_size_kb: i32, + output_format: OutputFormat, + compact: bool, +) -> Result<()> { + let result = client + .ast_grep(uri, pattern, rule, language, file_glob, limit, max_file_size_kb) + .await?; + output_success(&result, output_format, compact); + Ok(()) +} diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 0d2ac0ce..a470db56 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -249,6 +249,28 @@ enum Commands { #[arg(short, long, default_value = "viking://")] uri: String, }, + /// Run AST-based code search + AstGrep { + /// Target URI + uri: String, + /// ast-grep pattern (required when --rule is not set) + pattern: Option, + /// Rule file path or inline YAML/JSON content + #[arg(long)] + rule: Option, + /// Language hint + #[arg(short, long)] + language: Option, + /// File glob to scan + #[arg(long, default_value = "**/*")] + file_glob: String, + /// Maximum number of matches to return + #[arg(short = 'n', long, default_value = "200")] + limit: i32, + /// Skip files larger than this size (KB) + #[arg(long, default_value = "512")] + max_file_size_kb: i32, + }, /// Configuration management Config { #[command(subcommand)] @@ -404,6 +426,9 @@ async fn main() { Commands::Glob { pattern, uri } => { handle_glob(pattern, uri, ctx).await } + Commands::AstGrep { uri, pattern, rule, language, file_glob, limit, max_file_size_kb } => { + handle_ast_grep(uri, pattern, rule, language, file_glob, limit, max_file_size_kb, ctx).await + } }; if let Err(e) = result { @@ -651,3 +676,34 @@ async fn handle_glob(pattern: String, uri: String, ctx: CliContext) -> Result<() let client = ctx.get_client(); commands::search::glob(&client, &pattern, &uri, ctx.output_format, ctx.compact).await } + +async fn handle_ast_grep( + uri: String, + pattern: Option, + rule: Option, + language: Option, + file_glob: String, + limit: i32, + max_file_size_kb: i32, + ctx: CliContext, +) -> Result<()> { + if pattern.is_some() == rule.is_some() { + return Err(crate::error::Error::Client( + "Exactly one of pattern or --rule must be provided".to_string(), + )); + } + let client = ctx.get_client(); + commands::search::ast_grep( + &client, + &uri, + pattern.as_deref(), + rule.as_deref(), + language.as_deref(), + &file_glob, + limit, + max_file_size_kb, + ctx.output_format, + ctx.compact, + ) + .await +} diff --git a/docs/en/api/01-overview.md b/docs/en/api/01-overview.md index 7b1b9c33..71f6d1b9 100644 --- a/docs/en/api/01-overview.md +++ b/docs/en/api/01-overview.md @@ -303,6 +303,7 @@ Compact JSON with status wrapper, suitable for scripting. Overrides `--output`: | POST | `/api/v1/search/search` | Context-aware search | | POST | `/api/v1/search/grep` | Pattern search | | POST | `/api/v1/search/glob` | File pattern matching | +| POST | `/api/v1/search/ast-grep` | AST-based code search | ### Relations diff --git a/docs/en/api/03-filesystem.md b/docs/en/api/03-filesystem.md index e1a762e2..4a7e7702 100644 --- a/docs/en/api/03-filesystem.md +++ b/docs/en/api/03-filesystem.md @@ -643,6 +643,65 @@ openviking glob "**/*.md" [--uri viking://resources/] --- +### ast_grep() + +Search code structure using [ast-grep](https://ast-grep.github.io/). + +**Parameters** + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| uri | str | Yes | - | Viking URI to search in | +| pattern | str | No* | - | ast-grep pattern | +| rule | str | No* | - | Rule file path or inline YAML/JSON rule content | +| language | str | No | auto by extension | Language hint for parser | +| file_glob | str | No | `"**/*"` | File glob to scan | +| limit | int | No | 200 | Max returned matches | +| max_file_size_kb | int | No | 512 | Skip files larger than this size | + +\* Exactly one of `pattern` or `rule` is required. + +**Python SDK (Embedded / HTTP)** + +```python +results = client.ast_grep( + uri="viking://resources/", + pattern="def $NAME($$$ARGS):", + language="python", + file_glob="**/*.py", +) +``` + +**HTTP API** + +``` +POST /api/v1/search/ast-grep +``` + +```bash +curl -X POST http://localhost:1933/api/v1/search/ast-grep \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d '{ + "uri": "viking://resources/", + "pattern": "def $NAME($$$ARGS):", + "language": "python", + "file_glob": "**/*.py" + }' +``` + +**CLI** + +```bash +openviking ast-grep viking://resources/ "def $NAME($$$ARGS):" \ + --language python \ + --file-glob "**/*.py" +``` + +For full response examples, see [Retrieval API](06-retrieval.md#ast_grep). + +--- + ### link() Create relations between resources. diff --git a/docs/en/api/06-retrieval.md b/docs/en/api/06-retrieval.md index b7b11ba1..fbe63297 100644 --- a/docs/en/api/06-retrieval.md +++ b/docs/en/api/06-retrieval.md @@ -424,6 +424,97 @@ openviking glob "**/*.md" [--uri viking://resources/] --- +### ast_grep() + +Search code structure using [ast-grep](https://ast-grep.github.io/). + +**Parameters** + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| uri | str | Yes | - | Viking URI to search in | +| pattern | str | No* | - | ast-grep pattern | +| rule | str | No* | - | Rule file path or inline YAML/JSON rule content | +| language | str | No | auto by extension | Language hint for parser | +| file_glob | str | No | `"**/*"` | File glob to scan | +| limit | int | No | 200 | Max returned matches | +| max_file_size_kb | int | No | 512 | Skip files larger than this size | + +\* Exactly one of `pattern` or `rule` is required. + +**Python SDK (Embedded / HTTP)** + +```python +results = client.ast_grep( + uri="viking://resources/", + pattern="def $NAME($$$ARGS):", + language="python", + file_glob="**/*.py", + limit=100, +) + +print(f"Found {results['count']} matches") +for match in results["matches"]: + print(f"{match['uri']}:{match['start_line']}:{match['start_col']}") + print(match["content"]) +``` + +**HTTP API** + +``` +POST /api/v1/search/ast-grep +``` + +```bash +curl -X POST http://localhost:1933/api/v1/search/ast-grep \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d '{ + "uri": "viking://resources/", + "pattern": "def $NAME($$$ARGS):", + "language": "python", + "file_glob": "**/*.py", + "limit": 100 + }' +``` + +**CLI** + +```bash +openviking ast-grep viking://resources/ "def $NAME($$$ARGS):" \ + --language python \ + --file-glob "**/*.py" \ + --limit 100 +``` + +**Response** + +```json +{ + "status": "ok", + "result": { + "matches": [ + { + "uri": "viking://resources/app/main.py", + "language": "python", + "start_line": 10, + "start_col": 1, + "end_line": 12, + "end_col": 1, + "content": "def hello(name):\n return f\"Hello {name}\"" + } + ], + "count": 1, + "scanned_files": 3, + "skipped_files": 0, + "truncated": false + }, + "time": 0.1 +} +``` + +--- + ## Retrieval Pipeline ``` diff --git a/docs/en/concepts/01-architecture.md b/docs/en/concepts/01-architecture.md index 88a136f1..2203e0f3 100644 --- a/docs/en/concepts/01-architecture.md +++ b/docs/en/concepts/01-architecture.md @@ -67,7 +67,7 @@ The Service layer decouples business logic from the transport layer, enabling re | Service | Responsibility | Key Methods | |---------|----------------|-------------| -| **FSService** | File system operations | ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob | +| **FSService** | File system operations | ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob, ast-grep | | **SearchService** | Semantic search | search, find | | **SessionService** | Session management | session, sessions, commit, delete | | **ResourceService** | Resource import | add_resource, add_skill, wait_processed | diff --git a/docs/zh/api/01-overview.md b/docs/zh/api/01-overview.md index 1cbdbcf9..c3a58922 100644 --- a/docs/zh/api/01-overview.md +++ b/docs/zh/api/01-overview.md @@ -304,6 +304,7 @@ openviking -o json ls viking://resources/ | POST | `/api/v1/search/search` | 上下文感知搜索 | | POST | `/api/v1/search/grep` | 模式搜索 | | POST | `/api/v1/search/glob` | 文件模式匹配 | +| POST | `/api/v1/search/ast-grep` | 基于 AST 的代码搜索 | ### 关联 diff --git a/docs/zh/api/03-filesystem.md b/docs/zh/api/03-filesystem.md index 75c94f9a..5e8309ce 100644 --- a/docs/zh/api/03-filesystem.md +++ b/docs/zh/api/03-filesystem.md @@ -643,6 +643,65 @@ openviking glob "**/*.md" [--uri viking://resources/] --- +### ast_grep() + +使用 [ast-grep](https://ast-grep.github.io/) 做代码结构搜索。 + +**参数** + +| 参数 | 类型 | 必填 | 默认值 | 说明 | +|------|------|------|--------|------| +| uri | str | 是 | - | 要搜索的 Viking URI | +| pattern | str | 否* | - | ast-grep 模式 | +| rule | str | 否* | - | 规则文件路径或内联 YAML/JSON 规则内容 | +| language | str | 否 | 按扩展名自动推断 | 语言提示 | +| file_glob | str | 否 | `"**/*"` | 要扫描的文件 glob | +| limit | int | 否 | 200 | 最多返回匹配数 | +| max_file_size_kb | int | 否 | 512 | 跳过超过该大小的文件 | + +\* `pattern` 和 `rule` 必须且只能提供一个。 + +**Python SDK (Embedded / HTTP)** + +```python +results = client.ast_grep( + uri="viking://resources/", + pattern="def $NAME($$$ARGS):", + language="python", + file_glob="**/*.py", +) +``` + +**HTTP API** + +``` +POST /api/v1/search/ast-grep +``` + +```bash +curl -X POST http://localhost:1933/api/v1/search/ast-grep \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d '{ + "uri": "viking://resources/", + "pattern": "def $NAME($$$ARGS):", + "language": "python", + "file_glob": "**/*.py" + }' +``` + +**CLI** + +```bash +openviking ast-grep viking://resources/ "def $NAME($$$ARGS):" \ + --language python \ + --file-glob "**/*.py" +``` + +完整响应示例见:[检索 API](06-retrieval.md#ast_grep)。 + +--- + ### link() 创建资源之间的关联。 diff --git a/docs/zh/api/06-retrieval.md b/docs/zh/api/06-retrieval.md index 0c4268a2..6cd4d740 100644 --- a/docs/zh/api/06-retrieval.md +++ b/docs/zh/api/06-retrieval.md @@ -424,6 +424,97 @@ openviking glob "**/*.md" [--uri viking://resources/] --- +### ast_grep() + +使用 [ast-grep](https://ast-grep.github.io/) 做代码结构搜索。 + +**参数** + +| 参数 | 类型 | 必填 | 默认值 | 说明 | +|------|------|------|--------|------| +| uri | str | 是 | - | 要搜索的 Viking URI | +| pattern | str | 否* | - | ast-grep 模式 | +| rule | str | 否* | - | 规则文件路径或内联 YAML/JSON 规则内容 | +| language | str | 否 | 按扩展名自动推断 | 语言提示 | +| file_glob | str | 否 | `"**/*"` | 要扫描的文件 glob | +| limit | int | 否 | 200 | 最多返回匹配数 | +| max_file_size_kb | int | 否 | 512 | 跳过超过该大小的文件 | + +\* `pattern` 和 `rule` 必须且只能提供一个。 + +**Python SDK (Embedded / HTTP)** + +```python +results = client.ast_grep( + uri="viking://resources/", + pattern="def $NAME($$$ARGS):", + language="python", + file_glob="**/*.py", + limit=100, +) + +print(f"Found {results['count']} matches") +for match in results["matches"]: + print(f"{match['uri']}:{match['start_line']}:{match['start_col']}") + print(match["content"]) +``` + +**HTTP API** + +``` +POST /api/v1/search/ast-grep +``` + +```bash +curl -X POST http://localhost:1933/api/v1/search/ast-grep \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d '{ + "uri": "viking://resources/", + "pattern": "def $NAME($$$ARGS):", + "language": "python", + "file_glob": "**/*.py", + "limit": 100 + }' +``` + +**CLI** + +```bash +openviking ast-grep viking://resources/ "def $NAME($$$ARGS):" \ + --language python \ + --file-glob "**/*.py" \ + --limit 100 +``` + +**响应** + +```json +{ + "status": "ok", + "result": { + "matches": [ + { + "uri": "viking://resources/app/main.py", + "language": "python", + "start_line": 10, + "start_col": 1, + "end_line": 12, + "end_col": 1, + "content": "def hello(name):\n return f\"Hello {name}\"" + } + ], + "count": 1, + "scanned_files": 3, + "skipped_files": 0, + "truncated": false + }, + "time": 0.1 +} +``` + +--- + ## 检索流程 ``` diff --git a/docs/zh/concepts/01-architecture.md b/docs/zh/concepts/01-architecture.md index 101917f3..7f42562c 100644 --- a/docs/zh/concepts/01-architecture.md +++ b/docs/zh/concepts/01-architecture.md @@ -66,7 +66,7 @@ Service 层将业务逻辑与传输层解耦,便于 HTTP Server 和 CLI 复用 | Service | 职责 | 主要方法 | |---------|------|----------| -| **FSService** | 文件系统操作 | ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob | +| **FSService** | 文件系统操作 | ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob, ast-grep | | **SearchService** | 语义搜索 | search, find | | **SessionService** | 会话管理 | session, sessions, commit, delete | | **ResourceService** | 资源导入 | add_resource, add_skill, wait_processed | diff --git a/openviking/async_client.py b/openviking/async_client.py index b4f48ed5..04dd1d73 100644 --- a/openviking/async_client.py +++ b/openviking/async_client.py @@ -295,6 +295,28 @@ async def glob(self, pattern: str, uri: str = "viking://") -> Dict: await self._ensure_initialized() return await self._client.glob(pattern, uri=uri) + async def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict: + """Code structure search powered by ast-grep.""" + await self._ensure_initialized() + return await self._client.ast_grep( + uri=uri, + pattern=pattern, + rule=rule, + language=language, + file_glob=file_glob, + limit=limit, + max_file_size_kb=max_file_size_kb, + ) + async def mv(self, from_uri: str, to_uri: str) -> None: """Move resource""" await self._ensure_initialized() diff --git a/openviking/client/local.py b/openviking/client/local.py index 46acca99..b7aaa982 100644 --- a/openviking/client/local.py +++ b/openviking/client/local.py @@ -199,6 +199,27 @@ async def glob(self, pattern: str, uri: str = "viking://") -> Dict[str, Any]: """File pattern matching.""" return await self._service.fs.glob(pattern, uri=uri) + async def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict[str, Any]: + """Code structure search powered by ast-grep.""" + return await self._service.fs.ast_grep( + uri=uri, + pattern=pattern, + rule=rule, + language=language, + file_glob=file_glob, + limit=limit, + max_file_size_kb=max_file_size_kb, + ) + # ============= Relations ============= async def relations(self, uri: str) -> List[Any]: diff --git a/openviking/server/routers/search.py b/openviking/server/routers/search.py index 02e20029..9f85f2b6 100644 --- a/openviking/server/routers/search.py +++ b/openviking/server/routers/search.py @@ -50,6 +50,18 @@ class GlobRequest(BaseModel): uri: str = "viking://" +class AstGrepRequest(BaseModel): + """Request model for ast-grep.""" + + uri: str + pattern: Optional[str] = None + rule: Optional[str] = None + language: Optional[str] = None + file_glob: str = "**/*" + limit: int = 200 + max_file_size_kb: int = 512 + + @router.post("/find") async def find( request: FindRequest, @@ -122,3 +134,22 @@ async def glob( service = get_service() result = await service.fs.glob(request.pattern, uri=request.uri) return Response(status="ok", result=result) + + +@router.post("/ast-grep") +async def ast_grep( + request: AstGrepRequest, + _: bool = Depends(verify_api_key), +): + """Code structure search via ast-grep.""" + service = get_service() + result = await service.fs.ast_grep( + uri=request.uri, + pattern=request.pattern, + rule=request.rule, + language=request.language, + file_glob=request.file_glob, + limit=request.limit, + max_file_size_kb=request.max_file_size_kb, + ) + return Response(status="ok", result=result) diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index 72e5526d..28b804ef 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -3,7 +3,7 @@ """ File System Service for OpenViking. -Provides file system operations: ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob. +Provides file system operations: ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob, ast-grep. """ from typing import Any, Dict, List, Optional @@ -122,3 +122,25 @@ async def glob(self, pattern: str, uri: str = "viking://") -> Dict: """File pattern matching.""" viking_fs = self._ensure_initialized() return await viking_fs.glob(pattern, uri=uri) + + async def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict[str, Any]: + """Code structure search powered by ast-grep.""" + viking_fs = self._ensure_initialized() + return await viking_fs.ast_grep( + uri=uri, + pattern=pattern, + rule=rule, + language=language, + file_glob=file_glob, + limit=limit, + max_file_size_kb=max_file_size_kb, + ) diff --git a/openviking/storage/viking_fs.py b/openviking/storage/viking_fs.py index 3f3f173e..c5adef68 100644 --- a/openviking/storage/viking_fs.py +++ b/openviking/storage/viking_fs.py @@ -14,15 +14,19 @@ import asyncio import json +import shutil +import subprocess +import tempfile from dataclasses import dataclass, field from datetime import datetime -from pathlib import PurePath +from pathlib import Path, PurePath from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from pyagfs import AGFSClient from openviking.storage.vikingdb_interface import VikingDBInterface from openviking.utils.time_utils import format_simplified, get_current_timestamp +from openviking_cli.exceptions import InvalidArgumentError, ProcessingError, UnavailableError from openviking_cli.utils.logger import get_logger from openviking_cli.utils.uri import VikingURI @@ -31,6 +35,41 @@ logger = get_logger(__name__) +_SG_LANGUAGE_BY_SUFFIX = { + ".py": "python", + ".js": "javascript", + ".jsx": "jsx", + ".ts": "typescript", + ".tsx": "tsx", + ".go": "go", + ".rs": "rust", + ".java": "java", + ".kt": "kotlin", + ".kts": "kotlin", + ".c": "c", + ".h": "c", + ".cc": "cpp", + ".cpp": "cpp", + ".cxx": "cpp", + ".hpp": "cpp", + ".cs": "csharp", + ".rb": "ruby", + ".php": "php", + ".swift": "swift", + ".scala": "scala", + ".lua": "lua", + ".json": "json", + ".yaml": "yaml", + ".yml": "yaml", + ".toml": "toml", + ".html": "html", + ".css": "css", + ".scss": "scss", + ".vue": "vue", + ".svelte": "svelte", + ".sql": "sql", +} + # ========== Dataclass ========== @@ -212,6 +251,337 @@ async def glob(self, pattern: str, uri: str = "viking://") -> Dict: matches.append(f"{base_uri}/{rel_path}") return {"matches": matches, "count": len(matches)} + async def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict[str, Any]: + """Search code structure with ast-grep.""" + if bool(pattern) == bool(rule): + raise InvalidArgumentError("Exactly one of 'pattern' or 'rule' must be provided") + if not file_glob: + raise InvalidArgumentError("'file_glob' cannot be empty") + if limit <= 0: + raise InvalidArgumentError("'limit' must be a positive integer") + if max_file_size_kb <= 0: + raise InvalidArgumentError("'max_file_size_kb' must be a positive integer") + if shutil.which("sg") is None: + raise UnavailableError("ast-grep", "missing required binary: sg") + + entries = await self.tree(uri, output="original", show_all_hidden=False) + + skipped_files = 0 + max_file_size_bytes = max_file_size_kb * 1024 + candidates: List[Dict[str, Any]] = [] + + for entry in entries: + if entry.get("isDir"): + continue + rel_path = entry.get("rel_path", "") + if not rel_path or not PurePath(rel_path).match(file_glob): + continue + + size = int(entry.get("size", 0) or 0) + if size > max_file_size_bytes: + skipped_files += 1 + continue + + resolved_language = language or self._infer_sg_language(rel_path) + if resolved_language is None: + skipped_files += 1 + continue + + candidates.append( + { + "rel_path": rel_path, + "uri": entry.get("uri", str(VikingURI(uri).join(rel_path))), + "language": resolved_language, + } + ) + + if not candidates: + return { + "matches": [], + "count": 0, + "scanned_files": 0, + "skipped_files": skipped_files, + "truncated": False, + } + + matches: List[Dict[str, Any]] = [] + scanned_files_total = 0 + with tempfile.TemporaryDirectory(prefix="ov_ast_grep_") as tmp_dir: + scan_root = Path(tmp_dir) / "scan_root" + scan_root.mkdir(parents=True, exist_ok=True) + rule_path = self._prepare_ast_rule_file(rule, tmp_dir) + + groups: Dict[str, List[Dict[str, Any]]] = {} + for candidate in candidates: + candidate_language = language or candidate["language"] + groups.setdefault(candidate_language, []).append(candidate) + + for group_language, group_candidates in groups.items(): + path_map = await self._materialize_ast_grep_files(scan_root, group_candidates) + if not path_map: + skipped_files += len(group_candidates) + continue + + scanned_files = len(path_map) + scanned_files_total += scanned_files + try: + raw_output = await asyncio.to_thread( + self._run_ast_grep_scan, + file_paths=list(path_map.keys()), + pattern=pattern, + rule_path=rule_path, + language=group_language, + cwd=str(scan_root), + ) + except ProcessingError: + raise + except Exception as exc: + raise ProcessingError(f"ast-grep execution failed: {exc}") from exc + + parsed_matches = self._parse_ast_grep_output(raw_output, path_map) + matches.extend(parsed_matches) + skipped_files += len(group_candidates) - scanned_files + + matches.sort( + key=lambda m: ( + m.get("uri", ""), + int(m.get("start_line", 0) or 0), + int(m.get("start_col", 0) or 0), + ) + ) + total_matches = len(matches) + truncated = total_matches > limit + if truncated: + matches = matches[:limit] + + return { + "matches": matches, + "count": total_matches, + "scanned_files": scanned_files_total, + "skipped_files": skipped_files, + "truncated": truncated, + } + + async def _materialize_ast_grep_files( + self, scan_root: Path, candidates: List[Dict[str, Any]] + ) -> Dict[str, Dict[str, Any]]: + path_map: Dict[str, Dict[str, Any]] = {} + for candidate in candidates: + local_file = (scan_root / candidate["rel_path"]).resolve() + local_file.parent.mkdir(parents=True, exist_ok=True) + try: + content = await self.read_file(candidate["uri"]) + except Exception: + continue + local_file.write_text(content, encoding="utf-8") + path_map[str(local_file)] = candidate + path_map[candidate["rel_path"]] = candidate + return path_map + + def _prepare_ast_rule_file(self, rule: Optional[str], tmp_dir: str) -> Optional[str]: + if not rule: + return None + rule_path = Path(rule).expanduser() + if rule_path.exists() and rule_path.is_file(): + return str(rule_path.resolve()) + + generated_rule = Path(tmp_dir) / "ast_rule.yml" + generated_rule.write_text(rule, encoding="utf-8") + return str(generated_rule.resolve()) + + def _infer_sg_language(self, rel_path: str) -> Optional[str]: + return _SG_LANGUAGE_BY_SUFFIX.get(Path(rel_path).suffix.lower()) + + def _run_ast_grep_scan( + self, + *, + file_paths: List[str], + pattern: Optional[str], + rule_path: Optional[str], + language: Optional[str], + cwd: Optional[str] = None, + ) -> str: + commands = self._build_ast_grep_commands( + file_paths=file_paths, + pattern=pattern, + rule_path=rule_path, + language=language, + ) + attempted_errors: List[str] = [] + for command in commands: + try: + proc = subprocess.run( + command, + cwd=cwd, + capture_output=True, + text=True, + check=False, + ) + except FileNotFoundError as exc: + raise UnavailableError("ast-grep", "missing required binary: sg") from exc + + if proc.returncode == 0: + return proc.stdout + + stderr = (proc.stderr or "").strip() + attempted_errors.append(f"{' '.join(command[:4])}: {stderr}") + + details = "; ".join(attempted_errors[-4:]) + raise ProcessingError(f"ast-grep command failed: {details}") + + def _build_ast_grep_commands( + self, + *, + file_paths: List[str], + pattern: Optional[str], + rule_path: Optional[str], + language: Optional[str], + ) -> List[List[str]]: + command_variants: List[List[str]] = [] + base_options = ["--json"] + if pattern is not None: + base_options.extend(["--pattern", pattern]) + if rule_path is not None: + base_options.extend(["--rule", rule_path]) + + language_options: List[List[str]] = [[]] + if language: + language_options = [["--lang", language], ["-l", language], []] + + for lang_option in language_options: + command_variants.append(["sg", "scan", *base_options, *lang_option, *file_paths]) + command_variants.append(["sg", *base_options, *lang_option, *file_paths]) + + return command_variants + + def _parse_ast_grep_output( + self, raw_output: str, path_map: Dict[str, Dict[str, Any]] + ) -> List[Dict[str, Any]]: + payloads = self._decode_ast_grep_payloads(raw_output) + results: List[Dict[str, Any]] = [] + + for payload in payloads: + record = payload + if isinstance(payload, dict) and isinstance(payload.get("matches"), list): + for match in payload["matches"]: + normalized = self._normalize_ast_match(match, path_map) + if normalized is not None: + results.append(normalized) + continue + if isinstance(payload, dict) and isinstance(payload.get("results"), list): + for match in payload["results"]: + normalized = self._normalize_ast_match(match, path_map) + if normalized is not None: + results.append(normalized) + continue + + normalized = self._normalize_ast_match(record, path_map) + if normalized is not None: + results.append(normalized) + + return results + + def _decode_ast_grep_payloads(self, raw_output: str) -> List[Dict[str, Any]]: + text = (raw_output or "").strip() + if not text: + return [] + + try: + payload = json.loads(text) + except json.JSONDecodeError: + payload = None + + if isinstance(payload, list): + return [item for item in payload if isinstance(item, dict)] + if isinstance(payload, dict): + return [payload] + + decoded: List[Dict[str, Any]] = [] + for line in text.splitlines(): + line = line.strip() + if not line: + continue + try: + item = json.loads(line) + except json.JSONDecodeError: + continue + if isinstance(item, dict): + decoded.append(item) + return decoded + + def _normalize_ast_match( + self, match: Dict[str, Any], path_map: Dict[str, Dict[str, Any]] + ) -> Optional[Dict[str, Any]]: + if not isinstance(match, dict): + return None + + file_path = str(match.get("file") or match.get("path") or "") + if not file_path: + return None + normalized_path = str(Path(file_path).resolve()) + + meta = path_map.get(file_path) or path_map.get(normalized_path) + if meta is None: + return None + + range_info = match.get("range", {}) if isinstance(match.get("range"), dict) else {} + start = range_info.get("start", {}) if isinstance(range_info.get("start"), dict) else {} + end = range_info.get("end", {}) if isinstance(range_info.get("end"), dict) else {} + + start_line = self._extract_position_value(start, "line", default=0, add_one=True) + start_col = self._extract_position_value(start, "column", default=0, add_one=True) + end_line = self._extract_position_value(end, "line", default=0, add_one=True) + end_col = self._extract_position_value(end, "column", default=0, add_one=True) + + if start_line == 0: + start_line = self._extract_int(match.get("line"), default=0) + if end_line == 0: + end_line = start_line + + text = match.get("text", "") + if not text: + lines = match.get("lines") + if isinstance(lines, list): + text = "\n".join(str(line) for line in lines) + elif isinstance(lines, str): + text = lines + else: + text = str(match.get("content", "")) + + return { + "uri": meta["uri"], + "language": match.get("language", meta.get("language")), + "start_line": start_line, + "start_col": start_col, + "end_line": end_line, + "end_col": end_col, + "content": text, + } + + def _extract_position_value( + self, position: Dict[str, Any], key: str, default: int = 0, add_one: bool = False + ) -> int: + raw_value = position.get(key) + if raw_value is None: + return default + value = self._extract_int(raw_value, default=default) + return value + 1 if add_one else value + + def _extract_int(self, value: Any, default: int = 0) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default + async def _batch_fetch_abstracts( self, entries: List[Dict[str, Any]], diff --git a/openviking/sync_client.py b/openviking/sync_client.py index 632c9d7a..1df685af 100644 --- a/openviking/sync_client.py +++ b/openviking/sync_client.py @@ -171,6 +171,29 @@ def glob(self, pattern: str, uri: str = "viking://") -> Dict: """File pattern matching""" return run_async(self._async_client.glob(pattern, uri)) + def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict: + """Code structure search powered by ast-grep.""" + return run_async( + self._async_client.ast_grep( + uri=uri, + pattern=pattern, + rule=rule, + language=language, + file_glob=file_glob, + limit=limit, + max_file_size_kb=max_file_size_kb, + ) + ) + def mv(self, from_uri: str, to_uri: str) -> None: """Move resource""" return run_async(self._async_client.mv(from_uri, to_uri)) diff --git a/openviking_cli/cli/commands/search.py b/openviking_cli/cli/commands/search.py index cf3b956e..456f2170 100644 --- a/openviking_cli/cli/commands/search.py +++ b/openviking_cli/cli/commands/search.py @@ -84,3 +84,34 @@ def glob_command( ) -> None: """Run file glob pattern search.""" run(ctx, lambda client: client.glob(pattern, uri=uri)) + + @app.command("ast-grep") + def ast_grep_command( + ctx: typer.Context, + uri: str = typer.Argument(..., help="Target URI"), + pattern: Optional[str] = typer.Argument(None, help="ast-grep pattern"), + rule: Optional[str] = typer.Option( + None, "--rule", help="Rule file path or inline YAML/JSON rule content" + ), + language: Optional[str] = typer.Option(None, "--language", "-l", help="Language hint"), + file_glob: str = typer.Option("**/*", "--file-glob", help="File glob to scan"), + limit: int = typer.Option(200, "--limit", "-n", help="Maximum number of matches"), + max_file_size_kb: int = typer.Option( + 512, + "--max-file-size-kb", + help="Skip files larger than this size (KB)", + ), + ) -> None: + """Run AST-based code search with ast-grep.""" + run( + ctx, + lambda client: client.ast_grep( + uri=uri, + pattern=pattern, + rule=rule, + language=language, + file_glob=file_glob, + limit=limit, + max_file_size_kb=max_file_size_kb, + ), + ) diff --git a/openviking_cli/client/base.py b/openviking_cli/client/base.py index b72d4f3a..2c104d58 100644 --- a/openviking_cli/client/base.py +++ b/openviking_cli/client/base.py @@ -157,6 +157,20 @@ async def glob(self, pattern: str, uri: str = "viking://") -> Dict[str, Any]: """File pattern matching.""" ... + @abstractmethod + async def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict[str, Any]: + """Code structure search powered by ast-grep.""" + ... + # ============= Relations ============= @abstractmethod diff --git a/openviking_cli/client/http.py b/openviking_cli/client/http.py index a8ae3726..58e9b4a5 100644 --- a/openviking_cli/client/http.py +++ b/openviking_cli/client/http.py @@ -439,6 +439,31 @@ async def glob(self, pattern: str, uri: str = "viking://") -> Dict[str, Any]: ) return self._handle_response(response) + async def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict[str, Any]: + """Code structure search powered by ast-grep.""" + response = await self._http.post( + "/api/v1/search/ast-grep", + json={ + "uri": uri, + "pattern": pattern, + "rule": rule, + "language": language, + "file_glob": file_glob, + "limit": limit, + "max_file_size_kb": max_file_size_kb, + }, + ) + return self._handle_response(response) + # ============= Relations ============= async def relations(self, uri: str) -> List[Any]: diff --git a/openviking_cli/client/sync_http.py b/openviking_cli/client/sync_http.py index fdfa450f..8ace5588 100644 --- a/openviking_cli/client/sync_http.py +++ b/openviking_cli/client/sync_http.py @@ -150,6 +150,29 @@ def glob(self, pattern: str, uri: str = "viking://") -> Dict: """File pattern matching.""" return run_async(self._async_client.glob(pattern, uri)) + def ast_grep( + self, + uri: str, + pattern: Optional[str] = None, + rule: Optional[str] = None, + language: Optional[str] = None, + file_glob: str = "**/*", + limit: int = 200, + max_file_size_kb: int = 512, + ) -> Dict: + """Code structure search powered by ast-grep.""" + return run_async( + self._async_client.ast_grep( + uri=uri, + pattern=pattern, + rule=rule, + language=language, + file_glob=file_glob, + limit=limit, + max_file_size_kb=max_file_size_kb, + ) + ) + # ============= File System ============= def ls( diff --git a/tests/README.md b/tests/README.md index f5e18366..af123341 100644 --- a/tests/README.md +++ b/tests/README.md @@ -135,12 +135,12 @@ Tests for the OpenViking HTTP server API and AsyncHTTPClient SDK. | `test_api_resources.py` | Resource management | `add_resource()` with/without wait, file not found, custom target URI, `wait_processed()` | | `test_api_filesystem.py` | Filesystem endpoints | `ls` root/simple/recursive, `mkdir`, `tree`, `stat`, `rm`, `mv` | | `test_api_content.py` | Content endpoints | `read`, `abstract`, `overview` | -| `test_api_search.py` | Search endpoints | `find` with target_uri/score_threshold, `search` with session, `grep` case-insensitive, `glob` | +| `test_api_search.py` | Search endpoints | `find` with target_uri/score_threshold, `search` with session, `grep` case-insensitive, `glob`, `ast-grep` | | `test_api_sessions.py` | Session endpoints | Create, list, get, delete session; add messages; compress; extract | | `test_api_relations.py` | Relations endpoints | Get relations, link single/multiple targets, unlink | | `test_api_observer.py` | Observer endpoints | Queue, VikingDB, VLM, system observer status | | `test_error_scenarios.py` | Error handling | Invalid JSON, missing fields, not found, wrong content type, invalid URI format | -| `test_http_client_sdk.py` | AsyncHTTPClient SDK E2E | Health, add resource, wait, ls, mkdir, tree, session lifecycle, find, full workflow (real HTTP server) | +| `test_http_client_sdk.py` | AsyncHTTPClient SDK E2E | Health, add resource, wait, ls, mkdir, tree, session lifecycle, find, ast-grep, full workflow (real HTTP server) | ### session/ diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 8c92c3c8..a84f88cb 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -106,6 +106,7 @@ def test_cli_help_smoke(): ["search", "--help"], ["grep", "--help"], ["glob", "--help"], + ["ast-grep", "--help"], # serve ["serve", "--help"], # system diff --git a/tests/client/test_search.py b/tests/client/test_search.py index ff661a25..3100d13b 100644 --- a/tests/client/test_search.py +++ b/tests/client/test_search.py @@ -74,3 +74,31 @@ async def test_search(self, client_with_resource_sync): result = await client.search(query="sample", target_uri=parent_uri) assert hasattr(result, "resources") + + async def test_ast_grep(self, client_with_resource_sync, monkeypatch): + client, uri = client_with_resource_sync + + async def fake_ast_grep(**kwargs): + return { + "matches": [ + { + "uri": "viking://resources/sample.py", + "language": "python", + "start_line": 1, + "start_col": 1, + "end_line": 1, + "end_col": 5, + "content": "def f():", + } + ], + "count": 1, + "scanned_files": 1, + "skipped_files": 0, + "truncated": False, + } + + monkeypatch.setattr(client._client._service.fs, "ast_grep", fake_ast_grep) + parent_uri = "/".join(uri.split("/")[:-1]) + "/" + result = await client.ast_grep(uri=parent_uri, pattern="def $NAME($$$ARGS):") + assert isinstance(result, dict) + assert result["count"] == 1 diff --git a/tests/server/test_api_search.py b/tests/server/test_api_search.py index 64ad6c1a..27823605 100644 --- a/tests/server/test_api_search.py +++ b/tests/server/test_api_search.py @@ -117,3 +117,46 @@ async def test_glob(client_with_resource): ) assert resp.status_code == 200 assert resp.json()["status"] == "ok" + + +async def test_ast_grep(client, service): + async def fake_ast_grep(**kwargs): + return { + "matches": [ + { + "uri": "viking://resources/sample.md", + "language": "markdown", + "start_line": 1, + "start_col": 1, + "end_line": 1, + "end_col": 8, + "content": "# Sample", + } + ], + "count": 1, + "scanned_files": 1, + "skipped_files": 0, + "truncated": False, + } + + service.fs.ast_grep = fake_ast_grep + + resp = await client.post( + "/api/v1/search/ast-grep", + json={"uri": "viking://resources/", "pattern": "$X"}, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + assert body["result"]["count"] == 1 + + +async def test_ast_grep_invalid_arguments(client): + resp = await client.post( + "/api/v1/search/ast-grep", + json={"uri": "viking://resources/"}, + ) + assert resp.status_code == 400 + body = resp.json() + assert body["status"] == "error" + assert body["error"]["code"] == "INVALID_ARGUMENT" diff --git a/tests/server/test_http_client_sdk.py b/tests/server/test_http_client_sdk.py index 3fb88cdd..197e2176 100644 --- a/tests/server/test_http_client_sdk.py +++ b/tests/server/test_http_client_sdk.py @@ -121,6 +121,34 @@ async def test_sdk_find(http_client): assert hasattr(result, "total") +async def test_sdk_ast_grep(http_client): + client, svc = http_client + + async def fake_ast_grep(**kwargs): + return { + "matches": [ + { + "uri": "viking://resources/sample.py", + "language": "python", + "start_line": 1, + "start_col": 1, + "end_line": 1, + "end_col": 5, + "content": "def x():", + } + ], + "count": 1, + "scanned_files": 1, + "skipped_files": 0, + "truncated": False, + } + + svc.fs.ast_grep = fake_ast_grep + result = await client.ast_grep(uri="viking://resources/", pattern="def $NAME($$$ARGS):") + assert isinstance(result, dict) + assert result["count"] == 1 + + # =================================================================== # Full workflow # =================================================================== From da43071880ff439e395cc975868b924ede38a6a1 Mon Sep 17 00:00:00 2001 From: Mijamind Date: Wed, 18 Feb 2026 15:40:09 +0800 Subject: [PATCH 2/2] fix lint error and warning --- tests/server/test_api_search.py | 34 ++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tests/server/test_api_search.py b/tests/server/test_api_search.py index 27823605..12bda34e 100644 --- a/tests/server/test_api_search.py +++ b/tests/server/test_api_search.py @@ -3,7 +3,10 @@ """Tests for search endpoints: find, search, grep, glob.""" +import shutil + import httpx +import pytest async def test_find_basic(client_with_resource): @@ -66,9 +69,7 @@ async def test_search_basic(client_with_resource): async def test_search_with_session(client_with_resource): client, uri = client_with_resource # Create a session first - sess_resp = await client.post( - "/api/v1/sessions", json={"user": "test"} - ) + sess_resp = await client.post("/api/v1/sessions", json={"user": "test"}) session_id = sess_resp.json()["result"]["session_id"] resp = await client.post( @@ -160,3 +161,30 @@ async def test_ast_grep_invalid_arguments(client): body = resp.json() assert body["status"] == "error" assert body["error"]["code"] == "INVALID_ARGUMENT" + + +async def test_ast_grep_real_engine(client, service): + if shutil.which("sg") is None: + pytest.skip("ast-grep binary 'sg' is not installed") + + file_uri = "viking://resources/ast_grep_real/sub/sample.py" + await service.viking_fs.write_file( + file_uri, + "def greet(name):\n return f'hello {name}'\n", + ) + + resp = await client.post( + "/api/v1/search/ast-grep", + json={ + "uri": "viking://resources/ast_grep_real/", + "pattern": "def $NAME($$$ARGS): $$$BODY", + "file_glob": "**/*.py", + "limit": 10, + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + assert body["result"]["count"] >= 1 + assert body["result"]["scanned_files"] >= 1 + assert any(m["uri"] == file_uri for m in body["result"]["matches"])