diff --git a/python/src/magika/magika.py b/python/src/magika/magika.py index 120efad4..0c616f49 100644 --- a/python/src/magika/magika.py +++ b/python/src/magika/magika.py @@ -182,7 +182,11 @@ def scan_directory( collected_paths: List[Union[str, os.PathLike]] = [] # Use rglob('*') for recursive scan, glob('*') for single directory - glob_pattern = path_obj.rglob("*") if recursive_scan else path_obj.glob("*") + glob_pattern = ( + sorted(path_obj.rglob("*")) + if recursive_scan + else sorted(path_obj.glob("*")) + ) for item in glob_pattern: # We only want files, not sub-directories themselves diff --git a/python/tests/test_magika_python_module.py b/python/tests/test_magika_python_module.py index d31b7dfa..2ac1aa61 100644 --- a/python/tests/test_magika_python_module.py +++ b/python/tests/test_magika_python_module.py @@ -74,10 +74,20 @@ def test_magika_module_with_explicit_model_dir() -> None: def test_magika_module_with_basic_tests_by_directory() -> None: - tests_paths = utils.get_directory_test_dir() + tests_paths = utils.get_directory_tests_files_dir() m = Magika() - _ = m.scan_directory(tests_paths) + + # Only scan direct children of tests_data/directory. + # Expected output is "directory" content type. + results = m.scan_directory(tests_paths) + direct_children = sorted([p for p in tests_paths.glob("*")]) + check_results_vs_expected_results(direct_children, results) + + # Scan all files recursively. Expected output is content type of each file. + results = m.scan_directory(tests_paths, recursive_scan=True) + all_files = sorted([p for p in tests_paths.rglob("*") if p.is_file()]) + check_results_vs_expected_results(all_files, results) def test_magika_module_with_basic_tests_by_paths() -> None: diff --git a/python/tests/utils.py b/python/tests/utils.py index 8a38eac1..4796b20e 100644 --- a/python/tests/utils.py +++ b/python/tests/utils.py @@ -42,8 +42,8 @@ def get_basic_tests_files_dir() -> Path: return tests_files_dir -def get_directory_test_dir() -> Path: - tests_files_dir = get_tests_data_dir() / "scan_directory" +def get_directory_tests_files_dir() -> Path: + tests_files_dir = get_tests_data_dir() / "directory" assert tests_files_dir.is_dir() return tests_files_dir diff --git a/tests_data/directory/txt/complex-sentence.txt b/tests_data/directory/txt/complex-sentence.txt new file mode 100644 index 00000000..9b6a0c83 --- /dev/null +++ b/tests_data/directory/txt/complex-sentence.txt @@ -0,0 +1 @@ +This is yet another simple test, it includes one simple sentence, but it is not as trivial as other simpler tests. \ No newline at end of file diff --git a/tests_data/scan_directory/code2.py b/tests_data/scan_directory/code2.py deleted file mode 100644 index a2931d6d..00000000 --- a/tests_data/scan_directory/code2.py +++ /dev/null @@ -1,32 +0,0 @@ -import random -import time - -class NumberGuesser: - def __init__(self, limit=100): - self.limit = limit - self.target = random.randint(1, limit) - self.attempts = 0 - - def guess(self, user_input): - self.attempts += 1 - try: - val = int(user_input) - except ValueError: - return "Please enter a valid integer." - - if val < self.target: - return "Too low!" - elif val > self.target: - return "Too high!" - else: - return f"Correct! It took you {self.attempts} tries." - -def main(): - game = NumberGuesser() - print("I'm thinking of a number between 1 and 100.") - # Simulation of a game loop - for i in range(5): - print(f"Simulation guess {i}: {game.guess(i * 20)}") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/tests_data/scan_directory/test.json b/tests_data/scan_directory/test.json deleted file mode 100644 index df29c36a..00000000 --- a/tests_data/scan_directory/test.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "project": { - "id": 1024, - "name": "MagikaTest", - "isActive": true, - "tags": ["python", "cpp", "automation"], - "metadata": { - "created_at": "2023-10-27T10:00:00Z", - "author": "dev_user", - "version": 1.2 - } - }, - "employees": [ - { - "id": 1, - "name": "Alice Smith", - "role": "Lead Developer", - "skills": ["Rust", "C++"] - }, - { - "id": 2, - "name": "Bob Jones", - "role": "QA Engineer", - "skills": ["Python", "Selenium"] - } - ], - "settings": { - "retry_attempts": 5, - "timeout_ms": 3000 - } -} \ No newline at end of file diff --git a/tests_data/scan_directory/test.rs b/tests_data/scan_directory/test.rs deleted file mode 100644 index 3ea8704d..00000000 --- a/tests_data/scan_directory/test.rs +++ /dev/null @@ -1,36 +0,0 @@ -use std::collections::HashMap; - -struct User { - username: String, - email: String, - sign_in_count: u64, - active: bool, -} - -impl User { - fn new(username: String, email: String) -> User { - User { - username, - email, - sign_in_count: 1, - active: true, - } - } - - fn deactivate(&mut self) { - self.active = false; - println!("User {} has been deactivated.", self.username); - } -} - -fn main() { - let mut users = HashMap::new(); - let user1 = User::new(String::from("rust_fan"), String::from("rust@example.com")); - - users.insert("u1", user1); - - match users.get_mut("u1") { - Some(u) => u.deactivate(), - None => println!("User not found"), - } -} \ No newline at end of file diff --git a/tests_data/scan_directory/test.ts b/tests_data/scan_directory/test.ts deleted file mode 100644 index 5773bc3a..00000000 --- a/tests_data/scan_directory/test.ts +++ /dev/null @@ -1,36 +0,0 @@ -interface Task { - id: number; - title: string; - completed: boolean; - completedAt?: Date; // Optional property -} - -class TaskManager { - private tasks: Task[] = []; - - addTask(title: string): void { - const newTask: Task = { - id: this.tasks.length + 1, - title: title, - completed: false - }; - this.tasks.push(newTask); - } - - completeTask(id: number): void { - const task = this.tasks.find(t => t.id === id); - if (task) { - task.completed = true; - task.completedAt = new Date(); - console.log(`Task '${task.title}' marked as done.`); - } - } - - listTasks(): Task[] { - return this.tasks; - } -} - -const manager = new TaskManager(); -manager.addTask("Learn TypeScript"); -manager.completeTask(1); \ No newline at end of file