diff --git a/.asanignore b/.asanignore deleted file mode 100644 index d04dc703..00000000 --- a/.asanignore +++ /dev/null @@ -1,21 +0,0 @@ -# Ignore known harmless leaks (exit, TLS keys, system libs) -leak:libc -leak:libstdc++ -leak:libgcc -leak:__cxa -leak:__tls_get_addr -leak:malloc -leak:epoll -leak:poll -leak:kqueue -leak:fork -leak:exec - -# Ignore sanitizer internals -race:__sanitizer - -# Ignore undefined behavior triggered inside system libraries -# Useful if UBSAN reports issues inside glibc or third-party libraries -undefined:libc -undefined:libstdc++ -undefined:malloc diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 805adbc5..36b9928d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y python3 python3-pip - pip3 install -r requirements.txt || true + pip3 install -r requirements-test.txt || true - name: Build webserv run: make diff --git a/.gitignore b/.gitignore index 8347c4c8..b0f069f6 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ compile_commands.json __pycache__/ *.pyc *.pyo +.pytest_cache/ # --- Virtual environments --- .venv/ @@ -51,3 +52,8 @@ Thumbs.db test_webserv/**/__pycache__/ test_webserv/**/*.pyc test_webserv/**/*.pyo + +# --- Doxygen docs --- + +/docs/html/ +/docs/doxygen.warnings diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index d03599d1..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,669 +0,0 @@ -# Contributing to Webserv - -Thanks for being part of the team! This document outlines our workflow, standards, and best practices to ensure smooth collaboration. - ---- - -## Branching Strategy - -Our Git branching model keeps the codebase **stable**, **collaborative**, and **easy to maintain**. - -- **`main`**: - Stable, production-ready code only. - No direct pushes. Only PRs from `dev` are merged after review and validation. - -- **`dev`**: - Latest working version. - All new features, fixes, and experiments integrate here first. - -- **Feature branches**: - Created from `dev`. - Naming: `feature/` or `bugfix/`. - PRs must target `dev`, not `main`. - -> **Note:** TODO CI would automatically open a PR from `dev` to `main` when `dev` is green. - -### Typical Workflow - -```bash -git checkout dev -git pull origin dev -git checkout -b feature/ -# Work on your feature... -git add . -git commit -m "feature: implement basic GET request handling" -git push origin feature/ -# Open a PR from feature/ → dev -``` - ---- - -## 📄 Issues & Pull Requests - -We use GitHub templates: - -- **[Bug / Feature Requests](.github/ISSUE_TEMPLATE/issue_template)** -- **[Pull Requests](.github/PULL_REQUEST_TEMPLATE/pull_request_template.md)** - - -### PR Title Convention - -```text -: short description -``` - -Example: `http: support chunked transfer decoding`. - ---- - -## Pre-Push Checklist - -- `make` must succeed without warnings. -- `./webserv` must start without errors. -- Run : - - `make asan` - - `make tsan` - - `make ubsan` - -Open a PR only when all local tests pass. - ---- - -## Commit Guidelines - -Good commits = good collaboration. - -### Commit Structure - -```text -: short summary (max 50 chars) - -Optional longer explanation (wrap at 72 chars). -``` - -- **Module**: `http`, `server`, `config`, `utils`, etc. -- **Short Summary**: Clear and specific. -- **Longer Explanation** (optional): Extra context, tricky parts, fixes. - -### Rules - -- Fix commits must mention the bug (`cgi: fix PATH_INFO handling`). -- One commit = one purpose. -- Imperative mode: “implement”, not “implemented”. -- No WIP commits. Before opening a PR, clean up your commit history using: (`git rebase -i origin/dev`). - ---- - -## Pull Requests - -### How to do a Pull Request - -**1.** Start from dev - -```bash -git checkout dev -git pull origin dev -``` - -**2.** Create your feature branch from `dev` - -```bash -git checkout -b feature/my-feature -``` - -**3.** Work and commit -Make your changes, then commit them following the commit guidelines: - -```bash -git add . -git commit -m "module: short, meaningful description" -``` -Repeat as needed. - -**4.** Push your branch - -```bash -git push origin feature/my-feature -``` - -**5.** Open the Pull Request on GitHub - -- Go to your repo on GitHub. - - You’ll see a prompt: “Compare & pull request” — click it -Or go to Pull Requests → New Pull Request - -- Set the base to dev -- Set the compare to feature/my-feature - -- Fill in: - - Title: follow the format: module: short description - - Description: - - What was changed - - Why it was needed - - Anything specific to review/test - -- Submit as a Draft if it's not finished yet, or as a regular PR if it's ready - -**6.** After Review - -- Make changes if requested. -- Push new commits: they are automatically added to the same PR -- Once approved, you or the reviewer can Squash & Merge the PR into dev - -### Opening a PR - -- Always target `dev`, **never** `main`. -- Open a **Draft PR** for early feedback. -- Mark **Ready for Review** once finalized. -- Use the PR template. -- Follow commit/PR title conventions. - -### A PR must - -- Contain one logical change. -- Include a clear title and detailed description: - - What changed? - - Why was it needed? - - Anything special to watch during review. -- Pass local builds/tests. -- Be cleaned up (no debug prints, no commented-out code). -- Be rebased on the latest `dev`. - ---- - -### Code Review - -**1.** You open a Pull Request (PR) -- Once you’ve pushed your feature branch and opened a PR targeting dev, it enters review phase. - -**2.** Teammates are notified -- Anyone watching the repo or specifically requested as a reviewer gets notified. -- You can explicitly assign reviewers via the PR sidebar. - -**3.** Reviewers leave comments -- Reviewers can: - - Approve the PR - - Request changes - - Comment (neutral feedback) - -- They can: - - Leave inline comments on specific lines - - Leave general comments at the top or bottom - - Suggest changes with GitHub's suggestion UI - - Discuss architecture, bugs, naming, clarity, etc. - -**4.** You respond to feedback -- You can: - - Reply to comments - - Commit fixes and push to the same branch — the PR auto-updates - - Mark comments as resolved when done - - GitHub tracks which comments are resolved vs. still active. - -**5.** Once approved, you merge -- After: - - All reviewers approve - - CI passes - - You’ve rebased & cleaned history - -- Then: - - Use Squash & Merge or Rebase & Merge - - Delete the feature branch if no longer needed - -#### Tips for Efficient Review -- Be responsive to feedback -- Keep changes small and scoped -- Avoid "monster PRs" -- Use Draft PRs early to gather feedback -- Address every comment, even if just to explain why no change is needed -- At least **one teammate** must review and approve. -- Clarify anything that’s ambiguous. - ---- - -## Merge Strategies: Rebase & Merge vs Squash & Merge - -When collaborating on GitHub, choosing the right merge strategy affects your repository's history clarity, bisectability, and maintainability. Here's a clear comparison between **Rebase & Merge** and **Squash & Merge**, with guidance for when to use each. - ---- - -### Rebase & Merge - -#### What it does - -* Replays each commit from the PR branch **on top of** the target branch (e.g., `dev`). -* **Preserves** all individual commits. -* Avoids merge commits. - -#### Resulting history - -``` -Before: -main --- A --- B - \ - C --- D (feature) - -After: -main --- A --- B --- C' --- D' (C and D rebased on B) -``` - -#### Pros and Cons - -| Pros | Cons | -| -------------------------------------------- | ------------------------------------------------------- | -| Clean, linear history | Rewrites commit SHAs (not suitable for shared branches) | -| Preserves logical commit structure | Requires clean, well-structured commits | -| Great for detailed `git blame` and bisecting | | - -#### Use when - -* The PR has clean, meaningful commits -* You want to preserve granular history -* Your team follows good commit hygiene - -#### Example Workflow - -```bash -git checkout dev -git pull origin dev -git checkout -b feature/add-auth -echo "// implement auth" > auth.cpp -git add auth.cpp -git commit -m "auth: implement token-based authentication" -git push origin feature/add-auth -# Open a PR to dev -# After review, rebase interactively to clean history: -git rebase -i dev -git push --force-with-lease -# On GitHub: Use "Rebase & Merge" to merge the PR -``` - ---- - -### Squash & Merge - -#### What it does - -* Squashes **all PR commits into one**. -* Commits that were WIP, fixups, etc., are flattened. - -#### Resulting history - -``` -Before: -main --- A --- B - \ - C --- D (feature) - -After: -main --- A --- B --- E (one squashed commit) -``` - -#### Pros and Cons - -| Pros | Cons | -| ------------------------------------ | ------------------------------------------- | -| Extremely clean and concise history | Loses individual commit information | -| Easy to revert a single change | Harder to trace how a feature was developed | -| Great when PR contains noisy commits | | - -#### Use when - -* PR has WIP/fix commits -* You care more about trunk clarity than commit history -* You want one commit per feature/fix - -#### Example Workflow - -```bash -git checkout dev -git pull origin dev -git checkout -b bugfix/fix-upload-path -echo "// fix path bug" > upload.cpp -git add upload.cpp -git commit -m "fix: wrong upload directory" -echo "// debug" >> upload.cpp -git commit -am "debug: temporary logging" -echo "// cleanup" >> upload.cpp -git commit -am "cleanup: remove debug" -git push origin bugfix/fix-upload-path -# Open a PR to dev -# On GitHub: Use "Squash & Merge" and write a clean commit message -``` - ---- - -### Summary Table - -| Feature | Rebase & Merge | Squash & Merge | -| ---------------------------- | -------------------- | -------------- | -| Preserves individual commits | Yes | No | -| Cleans messy commit history | No | Yes | -| Easy to revert feature | Maybe (many commits) | Yes (1 commit) | -| Great for `git blame` | Yes | Depends | -| Suitable for WIP commits | No | Yes | -| Maintains linear history | Yes | Yes | - ---- - -### Best Practices - -* Rebase locally to keep up-to-date: `git pull --rebase` -* Clean up commits before PR: `git rebase -i` -* Use squash for small fixes, doc updates, or messy PRs -* Use rebase when you want full commit fidelity - ---- - -### Suggested Strategy for `webserv` - -| PR Type | Suggested Merge | -| ----------------- | --------------- | -| Clean feature PR | Rebase & Merge | -| Bugfix with noise | Squash & Merge | -| Docs/Chores | Squash & Merge | - -Maintain a balance: clear main branch history with squash, but keep fidelity where it helps future devs understand changes. - - ---- - -### Cleanup Rules - -- No commented-out code. -- Always run `make clean && make fclean` before pushing. -- Make sure `.gitignore` correctly excludes build artifacts and editor files. - ---- - -## clang-tidy Usage and Best Practices - -`clang-tidy` is a powerful static analysis tool for C++ that helps catch bugs, enforce coding standards, and suggest modern best practices. It integrates cleanly into CI/CD pipelines and local development workflows. - ---- - -### What is clang-tidy? - -`clang-tidy` is part of the LLVM/Clang toolchain. It performs static analysis on C++ code using a set of configurable checks. It supports: - -* Bug detection (memory issues, undefined behavior, etc.) -* Style enforcement (naming conventions, brace usage, etc.) -* Modernization (C++11–C++20 idioms) -* Performance improvements (loop transformations, unneeded copies) - ---- - -### Project Integration - -In the Webserv project: - -* We enforce best practices via `clang-tidy` -* Configuration is located in the root of the repository: [`.clang-tidy`](.clang-tidy) - ---- - -### Configuration Overview (`.clang-tidy`) - -```yaml -Checks: > - -*, - bugprone-*, - modernize-*, - performance-*, - readability-*, - clang-analyzer-*, - cppcoreguidelines-*, - misc-* - -WarningsAsErrors: '*' - -HeaderFilterRegex: '.*' - -FormatStyle: file - -CheckOptions: - - key: modernize-use-nullptr.NullMacros - value: 'NULL' - - key: modernize-use-override.CheckSpelling - value: true - - key: readability-identifier-naming.VariableCase - value: lower_case - - key: readability-identifier-naming.ClassCase - value: PascalCase - - key: readability-identifier-naming.FunctionCase - value: camelCase - - key: readability-identifier-naming.PrivateMemberPrefix - value: _ -``` - ---- - -### Enforced Rule Categories - -The configuration above enables the following major rule sets: - -* **`bugprone-*`**: Detects code that is likely to be incorrect or prone to bugs (e.g., uninitialized values, copy-paste errors). -* **`modernize-*`**: Promotes modern C++ usage (e.g., `nullptr`, `override`, range-based loops, smart pointers). -* **`performance-*`**: Highlights inefficient code patterns and suggests optimized alternatives (e.g., unnecessary copies). -* **`readability-*`**: Improves clarity and style consistency (e.g., naming, magic numbers, redundant expressions). -* **`clang-analyzer-*`**: Static path-sensitive analysis to detect dead code, memory leaks, and undefined behavior. -* **`cppcoreguidelines-*`**: Enforces the C++ Core Guidelines to encourage safe, maintainable, and modern C++. -* **`misc-*`**: A miscellaneous set of useful checks not covered by other categories. - -In addition: - -* **All warnings are treated as errors** -* **Naming conventions** are strictly enforced - ---- - -### Naming Conventions Enforced - -`clang-tidy` enforces identifier naming across your codebase using the `readability-identifier-naming` checks. The enforced rules include: - -| Entity Type | Convention | Example | -| --------------- | ------------ | ---------------------- | -| Variables | `lower_case` | `timeout`, `buffer_id` | -| Functions | `camelCase` | `handleRequest()` | -| Classes/Structs | `PascalCase` | `HttpServer`, `Config` | -| Private Members | `_` suffix | `connection_`, `port_` | - -These conventions help maintain a consistent, readable codebase. Violations will be flagged and must be corrected before submission. - ---- - -### How to Run clang-tidy Locally - -#### Prerequisites - -* `clang-tidy` installed (typically via `clang-tools` or `llvm` package) -* Compilation database (`compile_commands.json`) in your build directory - -Generate the compilation database (with CMake): - -```bash -cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -S . -B build -``` - -Or if using CMake presets: - -```bash -cmake --preset debug -``` - -Then symlink or copy it to project root: - -```bash -ln -s build/compile_commands.json . -``` - -#### Manual Run - -```bash -clang-tidy src/Server.cpp -- -Iinclude -``` - -> Use `-- -Iinclude` to pass include paths to the compiler properly. - -#### Run on all files (parallel) - -```bash -find src/ -name '*.cpp' | xargs -P$(nproc) -n1 clang-tidy -- -Iinclude -``` - ---- - -### Fixing Issues - -Use fix mode to auto-apply some suggestions: - -```bash -clang-tidy src/Server.cpp -fix -- -Iinclude -``` - -Always review changes with `git diff` before committing. - ---- - -### CI Integration - -GitHub Actions automatically runs `clang-tidy` on pushes to `dev` and `main`. If any issue is found: - -* CI will fail -* You must fix all warnings before merging or submitting a PR - -Use `clang-tidy` locally to catch issues early. - ---- - -### IDE Integration - -Using an IDE that supports `clang-tidy` greatly improves your workflow: - -### Visual Studio Code (VSCode) - -* Requires extensions: `clangd` or `LLVM`, and optionally `CodeLLDB` -* Make sure `compile_commands.json` is generated -* Set up `c_cpp_properties.json` or use `clangd` with `--compile-commands-dir` -* Configure diagnostics via `.clang-tidy` - -### Other Editors - -* Most modern C++ IDEs (Eclipse CDT, Qt Creator, etc.) can integrate with `clang-tidy` -* Look for settings to point to `.clang-tidy` and the compilation database - -Benefits of IDE integration: - -* Instant feedback while coding -* Fix suggestions as you type -* Fewer CI surprises -* Encourages early cleanup - ---- - -### Development Tips - -* Use `.clang-tidy` overrides via command line: `-checks=*,-some-check` -* For language version issues: `--extra-arg=-std=c++20` -* Add `-p build` if `compile_commands.json` is in a subdirectory -* Use `clang-tidy -export-fixes fixes.yaml` to export fix suggestions - ---- - -### Summary - -* `clang-tidy` enforces consistency and quality -* All warnings are treated as errors -* Used in CI and should be part of local workflow -* IDE integration makes it faster and easier to use -* Configured to match the Webserv style guide -* Covers bug-prone patterns, modern C++ practices, performance, readability, and naming -* Enforces strict identifier naming conventions to ensure clean and uniform code - ---- - -For full documentation: [https://clang.llvm.org/extra/clang-tidy/](https://clang.llvm.org/extra/clang-tidy/) - - ---- - -## Code Style - -Follow the **[Modern C++ Style Guide](STYLEGUIDE.md)** based on **LLVM style**, adapted for us: - -- 4 spaces indentation, no tabs. -- Max 100 columns. -- K&R braces (`if (...) {`). -- `*` and `&` bind to type (`int* ptr`). -- Includes sorted alphabetically but logical grouping preserved. -- No `using namespace std;` at file scope. -- Use smart pointers (`std::unique_ptr`, `std::shared_ptr`) over raw pointers. -- Follow Rule of 0 / Rule of 5. -- Prefer exceptions and `std::optional` / `std::expected` for errors. -- Doxygen comments for all public entities. - ---- - -### Documentation - -The project documentation is automatically generated using **Doxygen** and deployed to **GitHub Pages**: - -- [📚 View Online Documentation](https://to0nsa.github.io/webserv/) - -Documentation is generated from the `include/`, `src/`, and `tests/` directories, and can be regenerated manually using Doxygen. - -- [📄 Local Documentation Guide](docs/README.md) — Explains the structure, regeneration process, and GitHub Pages deployment. - -> Documentation includes call graphs and caller graphs if [Graphviz](https://graphviz.gitlab.io/) is installed. - -#### Documentation Style - -All public classes, functions, and modules must follow the project's [Doxygen Style Guide](DOXYGENSTYLEGUIDE.md). - -This guide defines: - -- Mandatory sections (`@brief`, `@details`, `@param`, `@return`, etc.) -- File headers for all `.hpp`, `.cpp`, and `.tpp` files -- Logical module grouping (`@defgroup`, `@ingroup`) -- Formatting rules and tag order -- Use of present tense and alignment of tags -- Handling of private/internal documentation (`@internal`) - ---- - -## Editor Configuration - -We ship an [`.editorconfig`](.editorconfig): - -- UTF-8 encoding -- 4-space indentation -- Unix (LF) line endings -- End files with newline -- No trailing whitespace (Markdown exempt) -- 100 characters recommended line limit - -Configure your editor accordingly. - ---- - -## Git Attributes - -Our [`.gitattributes`](.gitattributes) ensures: - -- LF line endings everywhere -- Correct binary file handling (e.g., `.png`, `.jpg`, `.gif`) - ---- - -## Git Ignore - -Our [`.gitignore`](.gitignore) excludes: - -- Build artifacts (`build/`, `objs/`, `bin/`) -- Compiler outputs (`*.o`, `*.out`, `*.exe`, `*.a`) -- IDE configs (`.vscode/`, `.idea/`, `*.swp`) -- OS files (`.DS_Store`, `Thumbs.db`) -- Logs (`*.log`) - -**Never** commit ignored files. - ---- diff --git a/DOXYGENSTYLEGUIDE.md b/DOXYGENSTYLEGUIDE.md index 6509119f..9101f363 100644 --- a/DOXYGENSTYLEGUIDE.md +++ b/DOXYGENSTYLEGUIDE.md @@ -1,84 +1,67 @@ # Doxygen Style Guide for Modern C++ Projects (Webserv) -This guide defines how documentation must be written across all files in the Webserv project. -It aligns with the Doxyfile configuration, CI integration, and public documentation tooling. +This guide defines **mandatory documentation rules** for the Webserv project. It is strict by design to ensure consistent, maintainable, and navigable documentation across the entire codebase. --- -## 0. Documentation Structure +## 0. Scope & Enforcement -Documentation is automatically generated from: +* Applies to **all** `.hpp`, `.cpp`, `.tpp`, `.ipp` files in `include/`, `src/`, `tests/`. +* Public API **must** be fully documented. +* Pull requests will be **rejected** if: -* `include/`: public headers -* `src/`: implementation files -* `tests/`: documented test helpers -* `README.md`: used as the main page via `USE_MDFILE_AS_MAINPAGE` - -All files are parsed recursively, and `.md` files are included with full Markdown rendering. + * Any public class/function is undocumented. + * Tag order or format does not follow this guide. + * File headers are missing. --- -## 1. General Doxygen Practices +## 1. General Practices -* All `.hpp`, `.cpp`, and `.tpp` files must start with a `@file` header -* Use `/** */` for documentation blocks (not `///` or `//`) -* Use English and present tense ("Initializes", "Returns") -* Leave a blank line between `@brief` and `@details` -* Use structured tags (`@param`, `@return`, etc.) consistently -* Prefer complete documentation on all public headers, types, and methods +* Always use `/** */` for documentation blocks (never `///` or `//`). +* Write in **English** and **present tense** (e.g., "Initializes", not "Initialized"). +* Leave **one blank line** between `@brief` and `@details`. +* Keep **line length < 100 columns**. +* Use **noun phrases** for `@brief`. +* Document **private/internal methods** when behavior is non-obvious (mark with `@internal`). --- -## 2. Mandatory Tags for Public API - -Use the following tags in this order: +## 2. Tag Order (Mandatory) -1. `@brief`: short summary -2. `@details`: long explanation (optional) -3. `@tparam`: template parameters (if any) -4. `@param`: function arguments -5. `@return`: function return value -6. `@retval`: distinct return codes -7. `@throws`: thrown exceptions -8. `@note`: side information or caveats -9. `@warning`: must-know risk -10. `@todo`: pending feature or task +1. `@brief` – short summary (≤ 1 sentence) +2. `@details` – extended description (optional) +3. `@tparam` – template parameters +4. `@param` – parameters (aligned vertically) +5. `@return` – return value +6. `@retval` – distinct return codes (optional) +7. `@throws` – exceptions thrown +8. `@note` – additional info +9. `@warning` – important risk +10. `@todo` – pending task/feature --- -## 3. Example Template Documentation - -```cpp -/** - * @brief Allocates memory from an arena. - * - * @tparam T Type to allocate. - * @param size Number of elements. - * @return Pointer to the allocated memory. - */ -template -T* arenaAlloc(std::size_t size); -``` - ---- - -## 4. File Header Format +## 3. File Header Format ```cpp /** * @file Server.cpp * @brief Implements the Server class. * - * @details Provides TCP server functionality. + * @details Provides TCP server functionality for Webserv. + * @ingroup core */ ``` +* **Must** be the first comment in the file. +* Include `@ingroup` for module classification. + --- -## 5. Modules and Grouping +## 4. Modules & Grouping -Use `@defgroup` and `@ingroup` for logical documentation structure. -These are aliased in the Doxyfile to ensure visibility. +Use `@defgroup` and `@ingroup` to organize documentation. ```cpp /** @@ -95,17 +78,17 @@ class HttpRequest {}; /** @} */ ``` -Suggested groups: +**Allowed groups:** -* `http`: HTTP parsing and routing -* `socket`: socket abstraction -* `config`: configuration and parsing -* `core`: event loop and dispatcher -* `utils`: shared helpers +* `http` – HTTP parsing and routing +* `socket` – socket abstraction +* `config` – configuration and parsing +* `core` – event loop and dispatcher +* `utils` – shared helpers --- -## 6. Class and Member Documentation +## 5. Class & Member Documentation ```cpp /** @@ -116,17 +99,56 @@ Suggested groups: */ class Server { public: - /// @brief Starts the server loop. + /** @brief Starts the server loop. */ void start(); private: - int port_; ///< Port number the server uses. + int port_; ///< Port number used by the server. }; ``` +* Use `///<` for **inline member comments**. +* Use `/** */` for **methods**. + +--- + +## 6. Function Documentation + +```cpp +/** + * @brief Binds a socket to a port. + * + * @param port Port to bind. + * @return `true` if successful. + * @throws std::runtime_error If binding fails. + */ +bool bindSocket(int port); +``` + +* All params **must** be documented. +* If exceptions are thrown, **must** include `@throws`. + +--- + +## 7. Template & Concepts Documentation + +For C++20 concepts or constrained templates: + +```cpp +/** + * @brief Allocates memory from an arena. + * + * @tparam T Type to allocate. + * @param size Number of elements. + * @return Pointer to allocated memory. + */ +template requires std::is_default_constructible_v +T* arenaAlloc(std::size_t size); +``` + --- -## 7. Enum and Struct Documentation +## 8. Enum & Struct Documentation ```cpp /** @@ -142,9 +164,7 @@ enum class HttpMethod { --- -## 8. Internal or Private Code - -Use `@internal` (aliased in Doxyfile) to exclude internal code from public output. +## 9. Internal/Private Code ```cpp /** @@ -155,53 +175,36 @@ static std::map parseQueryInternal(const std::string& query); ``` ---- +* Mark with `@internal` to exclude from public output. -## 9. Formatting Rules +--- -* Use `/** */` style for all documentation blocks -* Align `@param`, `@return`, and `@throws` blocks -* Leave exactly one space after each `*` -* Keep line length under 100 columns -* Use present tense and noun phrases for `@brief` +## 10. Cross-Referencing -Example: +Use `@ref` to link related docs: ```cpp -/** - * @brief Binds a socket to a port. - * - * @param port Port to bind. - * @return `true` if successful. - */ -bool bindSocket(int port); +/// See also: @ref bindSocket ``` --- -## 10. Graphviz and Source Display - -The following are enabled by the Doxyfile: - -* `HAVE_DOT = YES`: enables all graph generation -* `CALL_GRAPH`, `CALLER_GRAPH`: call/caller relationships -* `SOURCE_BROWSER = YES`: annotated source files in HTML -* `VERBATIM_HEADERS = YES`: headers shown with structure - -Install Graphviz to ensure graph rendering. - ---- - -## 11. Final Checklist +## 11. Final Pre-Merge Checklist Before merging: * [ ] Each file has a `@file` header + * [ ] All public classes and methods are documented + * [ ] All parameters and return values are explained + * [ ] Internal helpers use `@internal` + * [ ] All tags are ordered and formatted + * [ ] Logical modules are defined and grouped + * [ ] No undocumented public entities remain --- diff --git a/Doxyfile b/Doxyfile index 2edd36ec..9dcc44d9 100644 --- a/Doxyfile +++ b/Doxyfile @@ -1,19 +1,19 @@ # --- Project Information --- PROJECT_NAME = Webserv -PROJECT_BRIEF = Lightweight HTTP/1.1 server written in modern C++ +PROJECT_BRIEF = "Lightweight HTTP/1.1 server written in modern C++" OUTPUT_DIRECTORY = docs CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English -_inputENCODING = UTF-8 +INPUT_ENCODING = UTF-8 # --- Input Files --- -INPUT = include/ src/ tests/ README.md +INPUT = include/ src/ README.md RECURSIVE = YES FILE_PATTERNS = *.cpp *.hpp *.tpp *.md USE_MDFILE_AS_MAINPAGE = README.md -EXCLUDE_PATTERNS = /.git/ /build/ +EXCLUDE_PATTERNS = **/.git/** **/build/** # --- Source Code Parsing --- @@ -30,7 +30,7 @@ EXTRACT_PRIVATE = YES EXTRACT_STATIC = YES EXTRACT_LOCAL_METHODS = YES EXTRACT_ANON_NSPACES = YES -IN_lineINHERITED_MEMB = YES +INLINE_INHERITED_MEMB = NO # --- Output Format --- @@ -38,33 +38,44 @@ GENERATE_HTML = YES HTML_OUTPUT = html HTML_FILE_EXTENSION = .html GENERATE_TREEVIEW = YES -HTML_DYNAMIC_SECTIONS = YES +HTML_DYNAMIC_SECTIONS = YES DISABLE_INDEX = NO FULL_PATH_NAMES = YES +TOC_INCLUDE_HEADINGS = 0 +SEPARATE_MEMBER_PAGES = YES + +# Shorten displayed paths in HTML and include graphs +STRIP_FROM_PATH = . +STRIP_FROM_INC_PATH = include # --- Markdown and Style --- MARKDOWN_SUPPORT = YES AUTOLINK_SUPPORT = YES +JAVADOC_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = YES # --- Documentation Layout --- SHOW_INCLUDE_FILES = YES SOURCE_BROWSER = YES -IN_lineSOURCES = NO +INLINE_SOURCES = YES VERBATIM_HEADERS = YES STRIP_CODE_COMMENTS = NO REFERENCED_BY_RELATION = YES REFERENCES_RELATION = YES REFERENCES_LINK_SOURCE = YES +HIDE_UNDOC_MEMBERS = YES +HIDE_UNDOC_CLASSES = YES # --- Graphs and Diagrams --- HAVE_DOT = YES -DOT_NUM_THREADS = 4 +DOT_NUM_THREADS = 1 CALL_GRAPH = YES CALLER_GRAPH = YES -DOT_MULTI_TARGETS = YES +DOT_MULTI_TARGETS = NO +DOT_GRAPH_MAX_NODES = 100 CLASS_DIAGRAMS = YES DOT_IMAGE_FORMAT = svg INTERACTIVE_SVG = YES @@ -77,22 +88,24 @@ DIRECTORY_GRAPH = YES # --- Sorting and Indexing --- -SORT_MEMBER_DOCS = YES -SORT_BRIEF_DOCS = YES +MEMBER_ORDER = GROUPED +SORT_MEMBER_DOCS = NO +SORT_BRIEF_DOCS = NO SORT_GROUP_NAMES = YES SORT_BY_SCOPE_NAME = YES # --- Aliases --- -ALIASES += "ingroup=\ingroup " -ALIASES += "defgroup=\defgroup " -ALIASES += "internal=\internal " +ALIASES += "ingroup=\ingroup " +ALIASES += "defgroup=\defgroup " +ALIASES += "internal=\internal " # --- Warnings --- WARN_IF_UNDOCUMENTED = YES WARN_IF_DOC_ERROR = YES WARN_NO_PARAMDOC = YES +WARN_LOGFILE = docs/doxygen.warnings # --- Misc --- @@ -101,4 +114,3 @@ GENERATE_LATEX = NO GENERATE_MAN = NO GENERATE_RTF = NO GENERATE_XML = NO -MULTI_lineCPP_IS_BRIEF = YES \ No newline at end of file diff --git a/DoxygenLayout.xml b/DoxygenLayout.xml new file mode 100644 index 00000000..26a114bd --- /dev/null +++ b/DoxygenLayout.xml @@ -0,0 +1,265 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Makefile b/Makefile index 00eb270f..9877d678 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ # By: irychkov +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2025/08/09 20:53:27 by nlouis #+# #+# # -# Updated: 2025/08/18 13:03:13 by irychkov ### ########.fr # +# Updated: 2025/08/19 15:24:47 by irychkov ### ########.fr # # # # **************************************************************************** # @@ -53,7 +53,7 @@ SRCS_CORE := \ src/core/Server.cpp \ src/core/main.cpp \ src/core/server_utils.cpp \ - src/core/webserv.cpp + src/core/runWebserv.cpp SRCS_HTTP := \ src/http/HttpRequest.cpp \ @@ -207,4 +207,4 @@ help: @echo " $(GREEN)make help$(RESET) → Show this help message 📚" .PHONY: all clean fclean re test format help install_test_deps --include $(DEPS) +-include $(DEPS) \ No newline at end of file diff --git a/Makefile.to0nsa b/Makefile.to0nsa new file mode 100644 index 00000000..a0110a42 --- /dev/null +++ b/Makefile.to0nsa @@ -0,0 +1,212 @@ +# **************************************************************************** # +# # +# ::: :::::::: # +# Makefile :+: :+: :+: # +# +:+ +:+ +:+ # +# By: nlouis +#+ +:+ +#+ # +# +#+#+#+#+#+ +#+ # +# Created: 2025/08/09 20:53:27 by nlouis #+# #+# # +# Updated: 2025/08/18 20:05:41 by nlouis ### ########.fr # +# # +# **************************************************************************** # + +# Compiler settings +CXX := c++ +CXXFLAGS := -Wall -Wextra -Werror -I include -std=c++20 \ + -O3 -DNDEBUG -flto -march=native +# We use -flto (link time optimization) for better performance. +# -O3 is for optimization, -DNDEBUG disables debug assertions. + +# Executable output +NAME := webserv +BINDIR := bin +TARGET := $(BINDIR)/$(NAME) + +# Object dir (deps live next to objects: .o + .d) +OBJDIR := objs + +# Colors +GREEN := \033[0;32m +CYAN := \033[0;36m +YELLOW := \033[1;33m +RED := \033[0;31m +RESET := \033[0m + +.DEFAULT_GOAL := all + +SRCS_CONFIG := \ + src/config/Config.cpp \ + src/config/normalizeConfig.cpp \ + src/config/validateConfig.cpp + +SRCS_CONFIG_PARSER := \ + src/config/parser/ConfigParseError.cpp \ + src/config/parser/ConfigParser.cpp \ + src/config/parser/directive_handler_table.cpp + +SRCS_CONFIG_TOKENIZER := \ + src/config/tokenizer/Tokenizer.cpp \ + src/config/tokenizer/token.cpp + +SRCS_CORE := \ + src/core/Location.cpp \ + src/core/Server.cpp \ + src/core/main.cpp \ + src/core/server_utils.cpp \ + src/core/runWebserv.cpp + +SRCS_HTTP := \ + src/http/HttpRequest.cpp \ + src/http/HttpRequestParser.cpp \ + src/http/HttpResponse.cpp \ + src/http/handleCgi.cpp \ + src/http/methodsHandler/handleDelete.cpp \ + src/http/methodsHandler/handleGet/generateAutoindex.cpp \ + src/http/methodsHandler/handleGet/handleGet.cpp \ + src/http/methodsHandler/handlePost/handleMultipartForm.cpp \ + src/http/methodsHandler/handlePost/handlePost.cpp \ + src/http/requestRouter.cpp \ + src/http/responseBuilder.cpp + +SRCS_NETWORK := \ + src/network/SocketManager.cpp \ + src/network/SocketManagerRequest.cpp \ + src/network/SocketManagerResponse.cpp \ + src/network/SocketManagerTimeouts.cpp \ + src/network/SocketManagerUtils.cpp + +SRCS_UTILS := \ + src/utils/Logger.cpp \ + src/utils/errorUtils.cpp \ + src/utils/filesystemUtils.cpp \ + src/utils/htmlUtils.cpp \ + src/utils/printInfo.cpp \ + src/utils/stringUtils.cpp \ + src/utils/urlUtils.cpp + +SRCS := \ + $(SRCS_CONFIG) \ + $(SRCS_CONFIG_PARSER) \ + $(SRCS_CONFIG_TOKENIZER) \ + $(SRCS_CORE) \ + $(SRCS_HTTP) \ + $(SRCS_NETWORK) \ + $(SRCS_UTILS) + +HEADERS := \ + include/config/Config.hpp \ + include/config/normalizeConfig.hpp \ + include/config/parser/ConfigParseError.hpp \ + include/config/parser/ConfigParser.hpp \ + include/config/parser/directive_handler_table.hpp \ + include/config/tokenizer/Tokenizer.hpp \ + include/config/tokenizer/token.hpp \ + include/config/validateConfig.hpp \ + include/core/Location.hpp \ + include/core/Server.hpp \ + include/core/server_utils.hpp \ + include/core/runWebserv.hpp \ + include/http/HttpRequest.hpp \ + include/http/HttpRequestParser.hpp \ + include/http/HttpResponse.hpp \ + include/http/Url.hpp \ + include/http/handleCgi.hpp \ + include/http/methodsHandler.hpp \ + include/http/requestRouter.hpp \ + include/http/responseBuilder.hpp \ + include/network/SocketManager.hpp \ + include/utils/Logger.hpp \ + include/utils/errorUtils.hpp \ + include/utils/filesystemUtils.hpp \ + include/utils/htmlUtils.hpp \ + include/utils/printInfo.hpp \ + include/utils/stringUtils.hpp \ + include/utils/urlUtils.hpp + +# ---- Derived files ----------------------------------------------------------- + +# Map each .cpp in $(SRCS) to its corresponding .o in $(OBJDIR). +# Note: patsubst here does NOT perform any filesystem wildcard expansion +# it just does a string substitution on the fixed list in $(SRCS). +# This is safe and deterministic, unlike $(wildcard) or $(shell find ...). +OBJS := $(patsubst src/%.cpp,$(OBJDIR)/%.o,$(SRCS)) +DEPS := $(OBJS:.o=.d) + +# ---- Rules ------------------------------------------------------------------ + +all: $(TARGET) + +$(TARGET): $(OBJS) + @mkdir -p $(BINDIR) + @$(CXX) $(CXXFLAGS) $^ -o $@ + @echo "$(CYAN)🚀 Built executable:$(RESET) $(TARGET)" + +$(OBJDIR)/%.o: src/%.cpp + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) -MMD -MP -c $< -o $@ + @echo "$(GREEN)🛠️ Compiled:$(RESET) $<" + +# Cleaning +clean: + @rm -rf $(OBJDIR) + @echo "$(YELLOW)🧹 Cleaned object and dependency files.$(RESET)" + +fclean: clean + @rm -rf $(TARGET) $(BINDIR) build + @echo "$(YELLOW)🗑️ Completely removed executables, binaries, build/.$(RESET)" + +re: fclean all + +# Python test dependencies +install_test_deps: + @echo "$(CYAN)📦 Creating isolated Python venv for tests...$(RESET)" + @python3 -m venv .venv || { \ + echo "$(RED)python3-venv is missing. Install it: sudo apt-get install -y python3-venv$(RESET)"; \ + exit 1; \ + } + @.venv/bin/python -m pip install --upgrade pip + @.venv/bin/pip install -r requirements-test.txt + +# Tests +test: all install_test_deps + @echo "$(CYAN)🧪 Launching web server in background...$(RESET)" + @./$(TARGET) ./test_webserv/tester/config/tester.conf & echo $$! > .webserv_test.pid + @sleep 1 + @echo "$(CYAN)🧪 Running Python test suite...$(RESET)" + @.venv/bin/python run_test.py || { \ + echo "$(RED)❌ Tests failed.$(RESET)"; \ + kill `cat .webserv_test.pid` >/dev/null 2>&1 || true; \ + rm -f .webserv_test.pid; \ + exit 1; \ + } + @echo "$(CYAN)🧹 Shutting down test server...$(RESET)" + @kill `cat .webserv_test.pid` >/dev/null 2>&1 || true + @rm -f .webserv_test.pid + @echo "$(GREEN)🏆 All tests passed successfully!$(RESET)" + +# Formatting (explicit file lists; no find/globs) +format: + @echo "$(CYAN)🎨 Formatting source files...$(RESET)" + @clang-format -i $(SRCS) $(HEADERS) + +# Help +help: + @echo "$(CYAN)📦 Build Targets:$(RESET)" + @echo " $(GREEN)make$(RESET) → Build the project 🚀" + @echo " $(GREEN)make re$(RESET) → Clean and rebuild everything 🔁" + @echo "" + @echo "$(CYAN)🧪 Test Targets:$(RESET)" + @echo " $(GREEN)make test$(RESET) → Build, install Python deps, run server + Python tests 🧪" + @echo "" + @echo "$(CYAN)🧹 Cleaning Targets:$(RESET)" + @echo " $(GREEN)make clean$(RESET) → Remove object and dependency files 🧹" + @echo " $(GREEN)make fclean$(RESET)→ Remove everything including binaries and build dirs 🗑️" + @echo "" + @echo "$(CYAN)🧹 Code Quality Targets:$(RESET)" + @echo " $(GREEN)make format$(RESET)→ Format all source files 🎨" + @echo "" + @echo "$(CYAN)📚 Other:$(RESET)" + @echo " $(GREEN)make help$(RESET) → Show this help message 📚" + +.PHONY: all clean fclean re test format help install_test_deps +-include $(DEPS) diff --git a/docs/DOCS.md b/docs/DOCS.md deleted file mode 100644 index d4656a15..00000000 --- a/docs/DOCS.md +++ /dev/null @@ -1,37 +0,0 @@ -# Documentation - -This directory contains the generated project documentation. - -## Structure - -- `html/` — The HTML output generated by [Doxygen](https://www.doxygen.nl/). - - Main entry point: [`index.html`](html/index.html) - -## How to regenerate documentation - -If you modify or add code comments, you can regenerate the documentation locally: - -```bash -doxygen Doxyfile -``` - -This will update the `html/` folder automatically. - -## GitHub Pages - -The contents of `docs/html/` are published automatically via GitHub Pages. - -The documentation is automatically regenerated and deployed whenever a `git push` is made to the `main` branch. - -You can view the live documentation at: - -**[https://to0nsa.github.io/webserv/](https://to0nsa.github.io/webserv/)** - ---- - -## Notes - -- The documentation is generated from `include/`, `src/`, and `tests/` directories. -- Graphs (call graphs, caller graphs) are included if [Graphviz](https://graphviz.gitlab.io/) is installed. - ---- diff --git a/docs/doxygen.warnings b/docs/doxygen.warnings new file mode 100644 index 00000000..42d052e2 --- /dev/null +++ b/docs/doxygen.warnings @@ -0,0 +1,201 @@ +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:16: warning: the name 'HttpRequestHandler.hpp' supplied as the argument in the \file statement is not an input file +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:263: warning: no matching file member found for +const std::unordered_map< std::string, ServerHandler > & directive::serverHandlers +Possible candidates: + 'const std::unordered_map< std::string, ServerHandler > & serverHandlers()' at line 263 of file /home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:362: warning: no matching file member found for +const std::unordered_map< std::string, LocationHandler > & directive::locationHandlers +Possible candidates: + 'const std::unordered_map< std::string, LocationHandler > & locationHandlers()' at line 362 of file /home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:247: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:249: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:253: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:255: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:341: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:349: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:353: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:355: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:355: warning: found tag while expecting +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:359: warning: end of comment block while expecting command +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:359: warning: end of comment block while expecting command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:23: warning: unable to resolve reference to 'hasHeader' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:24: warning: unable to resolve reference to 'getHeader' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:25: warning: unable to resolve reference to '_contentLength' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:27: warning: unable to resolve reference to '_parseError' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:28: warning: unable to resolve reference to '_matchedServerIndex' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:34: warning: unable to resolve reference to 'setMatchedServerIndex' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:41: warning: unable to resolve reference to 'getParseErrorCode' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:43: warning: unable to resolve reference to 'getContentLength' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:45: warning: unable to resolve reference to 'getBody' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:46: warning: unable to resolve reference to 'getQuery' for \ref command +/home/toonsa/myProjects/webserv/include/http/HttpRequest.hpp:47: warning: unable to resolve reference to 'getPath' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:31: warning: unable to resolve reference to 'generateAutoindex:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:36: warning: unable to resolve reference to 'handleGet:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:38: warning: unable to resolve reference to 'handlePost:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:40: warning: unable to resolve reference to 'handleDelete:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:44: warning: unable to resolve reference to 'generateAutoindex:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:46: warning: unable to resolve reference to 'handleMultipartForm:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:260: warning: unable to resolve reference to 'processDirectory' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:262: warning: unable to resolve reference to 'serveFile' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:270: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:372: warning: unable to resolve reference to 'listDirectoryEntries' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:382: warning: unable to resolve reference to 'htmlHeader' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:382: warning: unable to resolve reference to 'renderRow' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:383: warning: unable to resolve reference to 'htmlFooter' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:460: warning: unable to resolve reference to 'handleUrlEncodedForm' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:461: warning: unable to resolve reference to 'handleRawBody' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:181: warning: unable to resolve reference to 'unlinkFile' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:184: warning: unable to resolve reference to 'generateDeleteHtml' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:190: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:35: warning: unable to resolve reference to 'redirectOnDirectorySlash' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:39: warning: unable to resolve reference to 'redirectOnConfigured' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:41: warning: unable to resolve reference to 'validateRequestMethod' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:43: warning: unable to resolve reference to 'dispatchByMethod' for \ref command +/home/toonsa/myProjects/webserv/include/http/responseBuilder.hpp:20: warning: unable to resolve reference to 'generateSuccess' for \ref command +/home/toonsa/myProjects/webserv/include/http/responseBuilder.hpp:21: warning: unable to resolve reference to 'generateSuccessFile' for \ref command +/home/toonsa/myProjects/webserv/include/http/responseBuilder.hpp:22: warning: unable to resolve reference to 'generateError' for \ref command +/home/toonsa/myProjects/webserv/include/http/responseBuilder.hpp:24: warning: unable to resolve reference to 'generateRedirect' for \ref command +/home/toonsa/myProjects/webserv/include/http/responseBuilder.hpp:27: warning: unable to resolve reference to 'getDefaultMessage' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:236: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:273: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:327: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:400: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:247: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:249: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:253: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:255: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:341: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:349: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:353: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:355: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:355: warning: found tag while expecting +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:359: warning: end of comment block while expecting command +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:359: warning: end of comment block while expecting command +/home/toonsa/myProjects/webserv/src/config/validateConfig.cpp:355: warning: unable to resolve reference to 'validateUniquePorts' for \ref command +/home/toonsa/myProjects/webserv/src/http/handleCgi.cpp:298: warning: argument 'path' of command @param is not found in the argument list of anonymous_namespace{handleCgi.cpp}::prepareCgiTempFiles(CgiProcess &cgi, const HttpRequest &req, int &bodyFd, int &outputFd) +/home/toonsa/myProjects/webserv/src/http/handleCgi.cpp:298: warning: argument 'errorCode' of command @param is not found in the argument list of anonymous_namespace{handleCgi.cpp}::prepareCgiTempFiles(CgiProcess &cgi, const HttpRequest &req, int &bodyFd, int &outputFd) +/home/toonsa/myProjects/webserv/src/http/handleCgi.cpp:298: warning: The following parameters of anonymous_namespace{handleCgi.cpp}::prepareCgiTempFiles(CgiProcess &cgi, const HttpRequest &req, int &bodyFd, int &outputFd) are not documented: + parameter 'cgi' + parameter 'req' + parameter 'bodyFd' + parameter 'outputFd' +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:181: warning: unable to resolve reference to 'unlinkFile' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:184: warning: unable to resolve reference to 'generateDeleteHtml' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:190: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:22: warning: unable to resolve reference to 'listDirectoryEntries' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:25: warning: unable to resolve reference to 'escapeUriComponent' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:28: warning: unable to resolve reference to 'formatMTimeUTC' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:29: warning: unable to resolve reference to 'renderRow' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:31: warning: unable to resolve reference to 'htmlHeader' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:31: warning: unable to resolve reference to 'htmlFooter' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:103: warning: unable to resolve reference to 'DirEntry' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:119: warning: unable to resolve reference to 'DirEntry' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:287: warning: unable to resolve reference to 'escapeUriComponent' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:347: warning: unable to resolve reference to 'htmlHeader' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:347: warning: unable to resolve reference to 'renderRow' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:372: warning: unable to resolve reference to 'listDirectoryEntries' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:382: warning: unable to resolve reference to 'htmlHeader' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:382: warning: unable to resolve reference to 'renderRow' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:383: warning: unable to resolve reference to 'htmlFooter' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:19: warning: unable to resolve reference to 'detectMimeType' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:21: warning: unable to resolve reference to 'serveFile' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:23: warning: unable to resolve reference to 'processDirectory' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:107: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:111: warning: unable to resolve reference to 'detectMimeType' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:125: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:196: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:260: warning: unable to resolve reference to 'processDirectory' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:262: warning: unable to resolve reference to 'serveFile' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:270: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:408: warning: unable to resolve reference to 'handleUrlEncodedForm' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:410: warning: unable to resolve reference to 'handleRawBody' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:460: warning: unable to resolve reference to 'handleUrlEncodedForm' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:461: warning: unable to resolve reference to 'handleRawBody' for \ref command +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:24: warning: unable to resolve reference to 'redirectOnDirectorySlash' for \ref command +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:26: warning: unable to resolve reference to 'findLocation' for \ref command +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:28: warning: unable to resolve reference to 'redirectOnConfigured' for \ref command +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:30: warning: unable to resolve reference to 'validateRequestMethod' for \ref command +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:32: warning: unable to resolve reference to 'dispatchByMethod' for \ref command +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:137: warning: Found unknown command '@complexity' +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:243: warning: unable to resolve reference to 'validateRequestMethod' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:35: warning: unable to resolve reference to 'redirectOnDirectorySlash' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:39: warning: unable to resolve reference to 'redirectOnConfigured' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:41: warning: unable to resolve reference to 'validateRequestMethod' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:43: warning: unable to resolve reference to 'dispatchByMethod' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:163: warning: unable to resolve reference to 'shouldKeepAlive' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:236: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:273: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:327: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:400: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerRequest.cpp:20: warning: unable to resolve reference to 'receiveFromClient' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerRequest.cpp:22: warning: unable to resolve reference to 'parseAndQueueRequests' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerRequest.cpp:24: warning: unable to resolve reference to 'processPendingRequests' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerRequest.cpp:24: warning: unable to resolve reference to 'handleCgiRequest' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerRequest.cpp:26: warning: unable to resolve reference to 'handleCgiPollEvents' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerRequest.cpp:28: warning: unable to resolve reference to 'handleRequestErrorIfAny' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerResponse.cpp:20: warning: unable to resolve reference to 'logResponseStatus' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerResponse.cpp:21: warning: unable to resolve reference to 'sendFileResponse' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerResponse.cpp:22: warning: unable to resolve reference to 'sendRawResponse' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerResponse.cpp:23: warning: unable to resolve reference to 'sendResponse' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerTimeouts.cpp:20: warning: unable to resolve reference to 'isHeaderTimeout:' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerTimeouts.cpp:21: warning: unable to resolve reference to 'isBodyTimeout:' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerTimeouts.cpp:22: warning: unable to resolve reference to 'isSendTimeout:' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerTimeouts.cpp:23: warning: unable to resolve reference to 'isIdleTimeout:' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerTimeouts.cpp:24: warning: unable to resolve reference to 'checkClientTimeouts:' for \ref command +/home/toonsa/myProjects/webserv/src/network/SocketManagerTimeouts.cpp:26: warning: unable to resolve reference to 'checkRequestLimits:' for \ref command +error: md5 hash does not match for two different runs of /home/toonsa/myProjects/webserv/docs/html/group__url__utils_gabf2641feed820a4fcaf2699a86d1a4f7_gabf2641feed820a4fcaf2699a86d1a4f7_icgraph.dot ! +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:236: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:273: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:327: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:400: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:181: warning: unable to resolve reference to 'unlinkFile' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:184: warning: unable to resolve reference to 'generateDeleteHtml' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleDelete.cpp:190: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:372: warning: unable to resolve reference to 'listDirectoryEntries' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:382: warning: unable to resolve reference to 'htmlHeader' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:382: warning: unable to resolve reference to 'renderRow' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/generateAutoindex.cpp:383: warning: unable to resolve reference to 'htmlFooter' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:107: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:125: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:196: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:260: warning: unable to resolve reference to 'processDirectory' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:262: warning: unable to resolve reference to 'serveFile' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:270: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:408: warning: unable to resolve reference to 'handleUrlEncodedForm' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:410: warning: unable to resolve reference to 'handleRawBody' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:460: warning: unable to resolve reference to 'handleUrlEncodedForm' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handlePost/handlePost.cpp:461: warning: unable to resolve reference to 'handleRawBody' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:35: warning: unable to resolve reference to 'redirectOnDirectorySlash' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:39: warning: unable to resolve reference to 'redirectOnConfigured' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:41: warning: unable to resolve reference to 'validateRequestMethod' for \ref command +/home/toonsa/myProjects/webserv/include/http/requestRouter.hpp:43: warning: unable to resolve reference to 'dispatchByMethod' for \ref command +error: md5 hash does not match for two different runs of /home/toonsa/myProjects/webserv/docs/html/group__url__utils_gabf2641feed820a4fcaf2699a86d1a4f7_gabf2641feed820a4fcaf2699a86d1a4f7_icgraph.dot ! +/home/toonsa/myProjects/webserv/src/http/handleCgi.cpp:298: warning: argument 'path' of command @param is not found in the argument list of anonymous_namespace{handleCgi.cpp}::prepareCgiTempFiles(CgiProcess &cgi, const HttpRequest &req, int &bodyFd, int &outputFd) +/home/toonsa/myProjects/webserv/src/http/handleCgi.cpp:298: warning: argument 'errorCode' of command @param is not found in the argument list of anonymous_namespace{handleCgi.cpp}::prepareCgiTempFiles(CgiProcess &cgi, const HttpRequest &req, int &bodyFd, int &outputFd) +/home/toonsa/myProjects/webserv/src/http/handleCgi.cpp:298: warning: The following parameters of anonymous_namespace{handleCgi.cpp}::prepareCgiTempFiles(CgiProcess &cgi, const HttpRequest &req, int &bodyFd, int &outputFd) are not documented: + parameter 'cgi' + parameter 'req' + parameter 'bodyFd' + parameter 'outputFd' +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:107: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:125: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/methodsHandler/handleGet/handleGet.cpp:196: warning: unable to resolve reference to 'HttpResponse:' for \ref command +/home/toonsa/myProjects/webserv/src/http/requestRouter.cpp:137: warning: Found unknown command '@complexity' +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:247: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:249: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:253: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:255: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:341: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:349: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:353: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:355: warning: Unsupported xml/html tag found +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:355: warning: found tag while expecting +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:359: warning: end of comment block while expecting command +/home/toonsa/myProjects/webserv/src/config/parser/directive_handler_table.cpp:359: warning: end of comment block while expecting command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:217: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:236: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:273: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:327: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/src/http/responseBuilder.cpp:400: warning: unable to resolve reference to 'initializeResponse' for \ref command +/home/toonsa/myProjects/webserv/README.md:294: warning: Found unknown command '\r' +/home/toonsa/myProjects/webserv/README.md:294: warning: Found unknown command '\r' diff --git a/include/config/Config.hpp b/include/config/Config.hpp index 9936f9cb..afe1a426 100644 --- a/include/config/Config.hpp +++ b/include/config/Config.hpp @@ -3,35 +3,60 @@ /* ::: :::::::: */ /* Config.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/04/30 10:34:50 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:16:52 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:43:23 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file Config.hpp + * @brief Declares the Config aggregate for parsed virtual servers. + * + * @details Owns the collection of @ref Server objects produced by the parser + * and subsequently normalized/validated before runtime use. + * + * @ingroup config + */ + #pragma once #include "core/Server.hpp" #include +/** + * @brief Top-level configuration aggregate. + * + * @details Simple container holding all parsed @ref Server instances from the + * configuration input. Acts as the handoff object between parsing and + * later normalization/validation stages. + * + * @ingroup config + */ class Config { public: - //////////////////////////////// - // --- Constructor + //=== Construction & Special Members ===================================== + + /** @name Construction & special members */ + ///@{ Config() = default; ~Config() = default; Config(const Config&) = default; Config& operator=(const Config&) = default; Config(Config&&) noexcept = default; Config& operator=(Config&&) noexcept = default; + ///@} + + //=== Public API ========================================================== - ////////////////// - // --- Public API + /** @name Public API */ + ///@{ void addServer(const Server& server); const std::vector& getServers() const; std::vector& getServers(); + ///@} private: - std::vector _servers; ///< List of all parsed servers + std::vector _servers; ///< List of all parsed servers. }; diff --git a/include/config/normalizeConfig.hpp b/include/config/normalizeConfig.hpp index 60766b65..82904401 100644 --- a/include/config/normalizeConfig.hpp +++ b/include/config/normalizeConfig.hpp @@ -1,17 +1,29 @@ /* ************************************************************************** */ /* */ /* ::: :::::::: */ -/* normalizeConfig.hpp :+: :+: :+: */ +/* normalizeConfig.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/11 21:02:21 by nlouis #+# #+# */ -/* Updated: 2025/05/20 22:37:22 by nlouis ### ########.fr */ +/* Updated: 2025/08/18 15:58:50 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file normalizeConfig.hpp + * @brief Declares normalization utilities for parsed configuration. + * + * @details Applies canonical defaults and fills in missing values after parsing, + * so downstream components can rely on predictable, complete settings. + * Typical defaults include body size limits, error pages, roots, indices, + * and allowed methods. + * + * @ingroup config_normalizing + */ + #pragma once class Config; -void normalizeConfig(Config& config); \ No newline at end of file +void normalizeConfig(Config& config); diff --git a/include/config/parser/ConfigParseError.hpp b/include/config/parser/ConfigParseError.hpp index 30f49cb5..28d2322a 100644 --- a/include/config/parser/ConfigParseError.hpp +++ b/include/config/parser/ConfigParseError.hpp @@ -6,27 +6,53 @@ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/06 13:13:44 by nlouis #+# #+# */ -/* Updated: 2025/05/21 21:37:09 by nlouis ### ########.fr */ +/* Updated: 2025/08/18 12:01:14 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file ConfigParseError.hpp + * @brief Declares parse/validation error types for the config subsystem. + * + * @details Provides a single exception class used for tokenizer, parser, and + * validator errors. The class formats a human-friendly message and + * optionally appends a contextual source line for diagnostics. + * + * @ingroup config_parse_error + */ + #pragma once #include #include +/** + * @brief Base exception for configuration processing errors. + * + * @details Carries a formatted message and an optional context line (e.g., + * the offending source line). All config-related errors (tokenizer, + * parser, unexpected token, validation) alias this class. + * + * @ingroup config_parse_error + */ class ConfigParseError : public std::exception { public: + /** @name Construction & interface */ + ///@{ ConfigParseError(const std::string& message, const std::string& context = ""); - const char* what() const noexcept override; + ///@} protected: - std::string _context; ///< Optional contextual line - std::string _fullMessage; ///< Formatted error with context + std::string _context; ///< Optional contextual source line shown after the message. + std::string _fullMessage; ///< Final formatted message (message + context arrow). }; -using SyntaxError = ConfigParseError; +/** @brief Alias for syntax errors (tokenizer/parser). @ingroup config_parse_error */ +using SyntaxError = ConfigParseError; +/** @brief Alias for unexpected token errors. @ingroup config_parse_error */ using UnexpectedToken = ConfigParseError; -using TokenizerError = ConfigParseError; +/** @brief Alias for tokenizer-specific errors. @ingroup config_parse_error */ +using TokenizerError = ConfigParseError; +/** @brief Alias for configuration validation errors.@ingroup config_parse_error */ using ValidationError = ConfigParseError; diff --git a/include/config/parser/ConfigParser.hpp b/include/config/parser/ConfigParser.hpp index 7bad1c35..5dc31e1e 100644 --- a/include/config/parser/ConfigParser.hpp +++ b/include/config/parser/ConfigParser.hpp @@ -3,13 +3,24 @@ /* ::: :::::::: */ /* ConfigParser.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/05 15:06:31 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:09:36 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:43:48 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file ConfigParser.hpp + * @brief Declares the ConfigParser for syntactic/semantic directive parsing. + * + * @details Consumes a token stream from @ref Tokenizer and builds a concrete + * configuration model (@ref Config, @ref Server, @ref Location) by + * dispatching recognized directives to handler tables. + * + * @ingroup config_parsing + */ + #pragma once #include "config/Config.hpp" // for Config @@ -23,25 +34,38 @@ #include // for string #include // for vector +/** + * @brief Syntactic parser for the webserv configuration language. + * + * @details Validates block structure (`server` → `location`) and delegates + * directive semantics to the handler tables. Produces a fully + * constructed @ref Config tree or throws on errors. + * + * @ingroup config_parsing + */ class ConfigParser { public: - ////////////////// - // --- Public API + //=== Public API ========================================================== + /** @name Public API */ + ///@{ ConfigParser(std::string source); Config parseConfig(); + ///@} private: - //////////////////////////// - // --- Server Block Parsing + //=== Server Block Parsing =============================================== + /** @name Server block parsing */ + ///@{ Server parseServer(); void parseServerDirective(Server& server); Location parseLocation(); void parseLocationDirective(Location& location); + ///@} - //////////////////////// - // --- Token Navigation - + //=== Token Navigation ==================================================== + /** @name Token navigation */ + ///@{ const Token& current() const; const Token& previous() const; const Token& peek(std::size_t offset = 1) const; @@ -52,13 +76,20 @@ class ConfigParser { void expect(TokenType expected, const std::string& context); Token expectOneOf(std::initializer_list types, const std::string& context); std::vector collectArgs(std::span validTypes); + ///@} - ///////////////////// - // --- Error Context + //=== Error Context ======================================================= + /** @name Error context */ + ///@{ std::string getLineSnippet() const; + ///@} private: - Tokenizer _tokenizer; ///< Tokenizer instance used to produce the token stream. - std::vector _tokens; ///< Flattened list of tokens extracted from the source input. + //=== Internal State ====================================================== + /** @name Internal state */ + ///@{ + Tokenizer _tokenizer; ///< Produces the token stream from the raw source. + std::vector _tokens; ///< Flattened token list consumed by the parser. std::size_t _pos = 0; ///< Current index in the token stream. + ///@} }; diff --git a/include/config/parser/directive_handler_table.hpp b/include/config/parser/directive_handler_table.hpp index 778c0da6..8a8505a5 100644 --- a/include/config/parser/directive_handler_table.hpp +++ b/include/config/parser/directive_handler_table.hpp @@ -6,10 +6,23 @@ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/08 17:10:31 by nlouis #+# #+# */ -/* Updated: 2025/05/21 10:45:39 by nlouis ### ########.fr */ +/* Updated: 2025/08/18 12:36:11 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file directive_handler_table.hpp + * @brief Declares handler maps for server/location directives. + * + * @details Provides the callable signatures and lookup tables used by + * @ref ConfigParser to apply parsed directives to @ref Server + * and @ref Location instances. Each entry maps a directive + * keyword (e.g., "listen", "root") to a function that mutates + * the target object or throws on invalid arguments. + * + * @ingroup config_parsing + */ + #pragma once #include @@ -22,14 +35,35 @@ namespace directive { +/** + * @brief Handler signature for server-level directives. + * + * @param s Target server to mutate. + * @param args Directive arguments as strings (already token-collected). + * @param line Source line for diagnostics. + * @param column Source column for diagnostics. + * @param ctx Contextual source snippet to append in error messages. + * + * @ingroup config_parsing + */ using ServerHandler = std::function& args, int line, - int column, const std::string& getLineSnippet)>; + int column, const std::string& ctx)>; -using LocationHandler = - std::function& args, int line, int column, - const std::string& getLineSnippet)>; +/** + * @brief Handler signature for location-level directives. + * + * @param loc Target location to mutate. + * @param args Directive arguments as strings (already token-collected). + * @param line Source line for diagnostics. + * @param column Source column for diagnostics. + * @param ctx Contextual source snippet to append in error messages. + * + * @ingroup config_parsing + */ +using LocationHandler = std::function& args, + int line, int column, const std::string& ctx)>; const std::unordered_map& serverHandlers(); const std::unordered_map& locationHandlers(); -} // namespace directive \ No newline at end of file +} // namespace directive diff --git a/include/config/tokenizer/Tokenizer.hpp b/include/config/tokenizer/Tokenizer.hpp index 99c298de..98394ee6 100644 --- a/include/config/tokenizer/Tokenizer.hpp +++ b/include/config/tokenizer/Tokenizer.hpp @@ -3,13 +3,25 @@ /* ::: :::::::: */ /* Tokenizer.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/04 13:17:47 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:13:55 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:46:40 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file Tokenizer.hpp + * @brief Declares the Tokenizer for config lexical analysis. + * + * @details Converts raw configuration text into a typed token stream with + * accurate line/column offsets for diagnostics. Recognizes keywords, + * identifiers, numbers with single-letter size suffixes, strings, + * punctuation, and skips comments/whitespace/BOM. + * + * @ingroup config_tokenizing + */ + #pragma once #include "token.hpp" // for Token, TokenType @@ -17,79 +29,124 @@ #include // for string #include // for vector +/** + * @brief Stateful lexer for the configuration language. + * + * @details Produces a vector of @ref Token from an input string. The lexer is + * single-pass and records source positions for error reporting. It is + * intentionally strict (e.g., rejects multi-letter numeric suffixes). + * + * @ingroup config_tokenizing + */ class Tokenizer { public: - //////////////////// - // --- Constructors + //=== Construction & Special Members ===================================== + + /** @name Construction & special members */ + ///@{ explicit Tokenizer(std::string input); Tokenizer(const Tokenizer&) = delete; Tokenizer& operator=(const Tokenizer&) = delete; Tokenizer(Tokenizer&&) noexcept = default; Tokenizer& operator=(Tokenizer&&) noexcept = default; + ///@} - //////////////// - // --- Main API + //=== Main API ============================================================ + + /** @name Main API */ + ///@{ [[nodiscard]] std::vector tokenize(); + ///@} + + //=== Token Access Helpers =============================================== - //////////////////// - // --- Token Access + /** @name Token access helpers */ + ///@{ std::string extractLine(std::size_t offset) const; + ///@} private: - ///////////////////////// - // --- Core Cursor Logic + //=== Core Cursor Logic =================================================== + + /** @name Core cursor logic */ + ///@{ bool isAtEnd() const noexcept; bool match(char expected) noexcept; unsigned char peek() const noexcept; unsigned char peekNext() const noexcept; unsigned char advance() noexcept; + ///@} + + //=== Classification Logic =============================================== - //////////////////////////// - // --- Classification Logic + /** @name Classification logic */ + ///@{ inline bool isIdentifierStart(unsigned char c) const; inline bool isIdentifierChar(unsigned char c) const; + ///@} + + //=== High-Level Parsers ================================================== - ////////////////////////// - // --- High-Level Parsers + /** @name High-level parsers */ + ///@{ void skipUtf8BOM(); void skipWhitespaceAndComments(); TokenType resolveKeywordType(const std::string& word); void scanIdentifier(); void validateIdentifier(std::size_t start); Token parseIdentifierOrKeyword(); + ///@} - ///////////////////////////// - // --- Number & Unit Parsing + //=== Number & Unit Parsing ============================================== + + /** @name Number & unit parsing */ + ///@{ void scanDigits(); void scanOptionalUnitSuffix(); Token parseNumberOrUnit(); + ///@} + + //=== String Parsing ====================================================== - ////////////////////// - // --- String Parsing + /** @name String parsing */ + ///@{ void throwUnterminatedString(const std::string& reason); Token parseStringLiteral(); + ///@} + + //=== Whitespace & Comment Helpers ======================================= - //////////////////////////////////// - // --- Whitespace & Comment Helpers + /** @name Whitespace & comment helpers */ + ///@{ void skipCR(); void skipNewline(); void skipOtherWhitespace(); void skipHashComment(); + ///@} - ////////////////////// - // --- Token Dispatch + //=== Token Dispatch ====================================================== + + /** @name Token dispatch */ + ///@{ bool looksLikeIpAddress() const; void dispatchToken(); + ///@} + + //=== Token Creation ====================================================== - //////////////////////////////////// - // --- Token Creation + /** @name Token creation */ + ///@{ Token makeToken(TokenType type, const std::string& value) const; + ///@} + + //=== Internal State ====================================================== - ////////////////////// - // --- Internal State + /** @name Internal state */ + ///@{ std::string _input; ///< Raw input string to tokenize. std::vector _tokens; ///< Accumulated list of emitted tokens. std::size_t _pos = 0; ///< Current byte offset in the input. int _line = 1; ///< Current line number (1-based). int _column = 1; ///< Current column number (1-based). + ///@} }; diff --git a/include/config/tokenizer/token.hpp b/include/config/tokenizer/token.hpp index d17930d2..88a9db30 100644 --- a/include/config/tokenizer/token.hpp +++ b/include/config/tokenizer/token.hpp @@ -3,18 +3,33 @@ /* ::: :::::::: */ /* token.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/03 00:55:06 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:15:00 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:46:18 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file token.hpp + * @brief Token and TokenType declarations for the config lexer. + * + * @details Defines the token enum and POD struct used by the Tokenizer to + * represent lexemes with their source location (line/column/offset). + * + * @ingroup config_tokenizing + */ + #pragma once #include // for size_t #include // for string +/** + * @brief Token categories recognized by the configuration lexer. + * + * @ingroup config_tokenizing + */ enum class TokenType { IDENTIFIER, ///< A generic identifier (directive name or argument) NUMBER, ///< A numeric literal (may include optional size suffix) @@ -40,11 +55,19 @@ enum class TokenType { KEYWORD_CGI_EXTENSION ///< `cgi_extension` directive }; +/** + * @brief Plain-old-data token with source coordinates. + * + * @details Carries the token type, lexical value, and its position within the + * original input (line, column, byte offset) for diagnostics. + * + * @ingroup config_tokenizing + */ struct Token { TokenType type; ///< Type of the token (identifier, keyword, etc.) std::string value; ///< Lexical string value of the token - int line; ///< Line number where the token begins - int column; ///< Column offset (zero-based) + int line; ///< Line number where the token begins (1-based) + int column; ///< Column number where the token begins (1-based) std::size_t offset; ///< Byte offset in the original input string Token(TokenType t, const std::string& v, int l, int c, std::size_t o); diff --git a/include/config/validateConfig.hpp b/include/config/validateConfig.hpp index a9a467ad..bca6227b 100644 --- a/include/config/validateConfig.hpp +++ b/include/config/validateConfig.hpp @@ -1,15 +1,26 @@ /* ************************************************************************** */ /* */ /* ::: :::::::: */ -/* validateConfig.hpp :+: :+: :+: */ +/* validateConfig.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/05 00:11:16 by nlouis #+# #+# */ -/* Updated: 2025/05/20 23:11:03 by nlouis ### ########.fr */ +/* Updated: 2025/08/18 14:05:46 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file validateConfig.hpp + * @brief Declares validation for normalized configuration. + * + * @details Performs semantic checks after parsing/normalization to guarantee + * a runnable configuration (e.g., port/host collisions, invalid paths, + * duplicate server names on same listen, illegal method sets, etc.). + * + * @ingroup config_validating + */ + #pragma once class Config; diff --git a/include/core/Location.hpp b/include/core/Location.hpp index cbefa78c..0ab417c4 100644 --- a/include/core/Location.hpp +++ b/include/core/Location.hpp @@ -6,28 +6,59 @@ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/02 13:45:05 by nlouis #+# #+# */ -/* Updated: 2025/05/24 15:05:43 by nlouis ### ########.fr */ +/* Updated: 2025/08/15 23:15:01 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file Location.hpp + * @brief Declares the Location class for route-specific configuration. + * + * @details Represents a configuration block tied to a specific URL path within + * a virtual server. Each Location defines its own root, allowed methods, + * redirection rules, upload storage, index files, and CGI settings. + * This granularity allows per-path customization of behavior and routing. + * + * @ingroup location_component + */ + #pragma once -#include -#include -#include -#include +#include // std::map for CGI interpreter mapping +#include // std::set for allowed HTTP methods +#include // std::string for paths and configuration values +#include // std::vector for index and CGI extension lists +/** + * @brief Encapsulates configuration for a single URL path. + * + * @details Models a location block inside a server configuration. This includes: + * - Path matching rules + * - File system root for serving content + * - Directory listing behavior + * - HTTP method restrictions + * - Optional redirection + * - CGI execution parameters + * - Upload handling + * + * @ingroup location_component + */ class Location { public: - //////////////////////////////// - // --- Constructor + //=== Construction & Special Members ===================================== + + /** @name Construction & special members */ + ///@{ Location(); ~Location() = default; Location(const Location& other) = default; Location& operator=(const Location& other) = default; + ///@} - /////////////////// - // --- Setters --- + //=== Configuration Setters ============================================== + + /** @name Configuration setters */ + ///@{ void setPath(const std::string& path); void setRoot(const std::string& root); void setAutoindex(bool enabled); @@ -38,9 +69,12 @@ class Location { void setAllowedMethods(const std::vector& methods); void addIndexFile(const std::string& idx); void addCgiInterpreter(const std::string& ext, const std::string& path); + ///@} + + //=== Queries (Getters) =================================================== - /////////////// - // --- Getters + /** @name Queries (getters) */ + ///@{ const std::string& getPath() const; const std::set& getMethods() const; const std::string& getRoot() const; @@ -55,9 +89,12 @@ class Location { const std::vector& getCgiExtensions() const; std::string getCgiInterpreter(const std::string& ext) const; const std::map& getCgiInterpreterMap() const; + ///@} - ///////////////////// - // --- Logic helpers + //=== Logic & Matching Helpers ============================================ + + /** @name Logic helpers */ + ///@{ bool hasAllowedMethods() const; bool isMethodAllowed(const std::string& method) const; bool matchesPath(const std::string& uri) const; @@ -65,18 +102,19 @@ class Location { bool isUploadEnabled() const; bool isCgiRequest(const std::string& uri) const; std::string getEffectiveIndexPath() const; + ///@} private: - std::string _path; ///< URL path this location matches. - std::set _methods; ///< Set of allowed HTTP methods. - std::string _root; ///< Root directory for file serving. - bool _autoindex; ///< Whether to enable directory listing. - std::string _redirect; ///< Redirection target URL. - int _return_code; ///< HTTP status code for redirection. - std::string _upload_store; ///< Directory for uploaded files. - std::vector _index_files; ///< Ordered list of index files. - std::vector _cgi_extensions; ///< Ordered list of CGI extensions. - std::map _cgi_interpreters; -}; + //=== Data Members ======================================================== -/** @} */ + std::string _path; ///< URL path this location matches. + std::set _methods; ///< Allowed HTTP methods. + std::string _root; ///< Root directory for file serving. + bool _autoindex; ///< Enable/disable directory listing. + std::string _redirect; ///< Target URL for redirection. + int _return_code; ///< HTTP status code for redirection. + std::string _upload_store; ///< Directory for file uploads. + std::vector _index_files; ///< List of default index files. + std::vector _cgi_extensions; ///< File extensions for CGI execution. + std::map _cgi_interpreters; ///< CGI extension → interpreter mapping. +}; diff --git a/include/core/Server.hpp b/include/core/Server.hpp index 0f1fab74..3799caad 100644 --- a/include/core/Server.hpp +++ b/include/core/Server.hpp @@ -3,10 +3,10 @@ /* ::: :::::::: */ /* Server.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/04/30 09:37:06 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:27:56 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:47:28 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ @@ -19,7 +19,7 @@ * error pages, configure body size limits, and contain multiple location * blocks with their own routing rules and behavior. * - * @ingroup config + * @ingroup server_component */ #pragma once @@ -40,195 +40,60 @@ * `server` block in the configuration file and participates in request * routing based on host and port matching. * - * @ingroup config + * @ingroup server_component */ class Server { private: - int _port; ///< Port to listen on (0–65535). - std::string _host; ///< IP address to bind (e.g., "0.0.0.0"). - std::vector _server_names; ///< List of server name aliases (Host-based routing). - std::map _error_pages; ///< Maps HTTP error codes to custom error page paths. - size_t _client_max_body_size; ///< Maximum allowed body size per request (bytes). - std::vector _locations; ///< Set of location blocks defined for this server. + //=== Data ================================================================ + + int _port; ///< Port to listen on (0–65535). + std::string _host; ///< Bind address (e.g., "0.0.0.0"). + std::vector _server_names; ///< Host-based routing aliases. + std::map _error_pages; ///< HTTP code → error page path. + std::size_t _client_max_body_size; ///< Max request body size (bytes). + std::vector _locations; ///< Location blocks for this server. public: - /////////////////// - // --- Constructor - /** - * @brief Constructs a Server instance with default settings. - * - * @details Initializes default values for host (`"0.0.0.0"`), port (`80`), - * client max body size (`1 MiB`), and empty location/error blocks. - * Intended to be populated via configuration parsing. - * - * @ingroup config - */ + //=== Construction & Special Members ===================================== + + /** @name Construction & special members */ + ///@{ Server(); ~Server() = default; Server(const Server& other) = default; Server& operator=(const Server& other) = default; + ///@} + + //=== Configuration Setters ============================================== - /////////////// - // --- Setters - /** - * @brief Sets the port number this server will listen on. - * - * @details This port must be in the range [0, 65535]. It determines which TCP port - * the server binds to for accepting incoming connections. Typically set via - * the `listen` directive in the configuration file. Validation is done before calling. - * - * @param port The TCP port number to bind to. - * @ingroup config - */ + /** @name Configuration setters */ + ///@{ void setPort(int port) noexcept; - /** - * @brief Sets the IP address to bind this server to. - * - * @details The host determines which network interface(s) the server will listen on. - * A value of `"0.0.0.0"` binds to all interfaces. This is typically configured - * via the `host` directive in the configuration file. No validation is done here. - * - * @param host The IP address to bind (e.g., "127.0.0.1" or "0.0.0.0"). - * @ingroup config - */ void setHost(std::string_view host) noexcept; - /** - * @brief Adds a server name alias for this virtual host. - * - * @details Server names are used to match the `Host` header of incoming HTTP requests. - * Multiple names can be added to support name-based virtual hosting. - * This method appends without deduplication. - * - * @param name The server name to add (e.g., "example.com"). - * @ingroup config - */ void addServerName(std::string_view name); - /** - * @brief Sets a custom error page for a specific HTTP status code. - * - * @details Associates an HTTP error code (e.g., 404, 500) with a file path - * that will be served when that error occurs. Overrides the default - * error response. Multiple codes can share the same file path. - * - * @param code The HTTP error status code to override. - * @param path The file path to serve as the custom error page. - * @ingroup config - */ void setErrorPage(int code, const std::string& path); - /** - * @brief Sets the maximum allowed size for the HTTP request body. - * - * @details Used to limit the size of incoming requests, particularly for - * POST and PUT methods. If a request exceeds this size, the server - * should reject it with a 413 Payload Too Large response. - * - * @param size Maximum body size in bytes. - * @ingroup config - */ - void setClientMaxBodySize(size_t size) noexcept; - /** - * @brief Adds a location block to this server. - * - * @details Appends a new `Location` object representing a URI-matching block - * with its own configuration. Locations define routing rules and behavior - * for specific URI prefixes under this server. - * - * @param location The `Location` instance to add. - * @ingroup config - */ + void setClientMaxBodySize(std::size_t size) noexcept; void addLocation(const Location& location); + ///@} - /////////////// - // --- Getters - /** - * @brief Sets the port number for this server. - * - * @details This defines which TCP port the server should bind to for incoming - * connections. The value must be in the valid range [0, 65535], and - * is typically configured via the `listen` directive in the config file. - * Validation is performed before this function is called. - * - * @param port The TCP port to bind to. - * @ingroup config - */ - int getPort() const noexcept; - /** - * @brief Returns the configured host IP address for this server. - * - * @details This address determines which local interface(s) the server binds to. - * A value of `"0.0.0.0"` means it will accept connections on all interfaces. - * Typically set via the `host` directive in the configuration file. - * - * @return Reference to the host IP address string. - * @ingroup config - */ - const std::string& getHost() const noexcept; - /** - * @brief Returns the list of server name aliases for this virtual host. - * - * @details These names are used to match the `Host` header in incoming HTTP requests. - * If none match, the first declared server for the host:port is used as default. - * Configured via the `server_name` directive. - * - * @return Reference to the list of server names. - * @ingroup config - */ - const std::vector& getServerNames() const noexcept; - const std::string getDefaultServerName() const; - /** - * @brief Returns the mapping of HTTP error codes to custom error pages. - * - * @details This map associates specific HTTP status codes (e.g., 404, 500) - * with file paths to serve instead of default error messages. - * Configured via the `error_page` directive. - * - * @return Reference to the map of error codes to file paths. - * @ingroup config - */ + //=== Queries (Getters) =================================================== + + /** @name Queries (getters) */ + ///@{ + int getPort() const noexcept; + const std::string& getHost() const noexcept; + const std::vector& getServerNames() const noexcept; + const std::string getDefaultServerName() const; const std::map& getErrorPages() const noexcept; - /** - * @brief Returns the maximum allowed size for the request body. - * - * @details This limit applies to the content length of incoming HTTP requests, - * including POST uploads. If exceeded, the server should return - * a 413 Payload Too Large error. Configured via the `client_max_body_size` directive. - * - * @return The maximum request body size in bytes. - * @ingroup config - */ - size_t getClientMaxBodySize() const noexcept; - /** - * @brief Returns the list of location blocks defined for this server. - * - * @details Each location block defines a URI prefix and associated behavior - * (e.g., root, methods, CGI, redirects). During request handling, the - * server selects the best-matching location based on the URI. - * - * @return Reference to the list of `Location` objects. - * @ingroup config - */ - const std::vector& getLocations() const noexcept; - /** - * @brief Returns a mutable reference to the server's location blocks. - * - * @details Allows in-place modification of the list of `Location` objects, - * typically used during configuration parsing to populate new routes. - * Use with care to avoid breaking routing logic. - * - * @return Reference to the list of `Location` objects. - * @ingroup config - */ - std::vector& getLocations() noexcept; - /** - * @brief Checks whether the server matches the given server name. - * - * @details Compares the provided name against the configured server names - * for this virtual host. Used during request routing based on the - * `Host` header in the HTTP request. - * - * @param name The server name to check (case-sensitive). - * @return `true` if the name matches one of the configured server names. - * @ingroup config - */ + std::size_t getClientMaxBodySize() const noexcept; + const std::vector& getLocations() const noexcept; + std::vector& getLocations() noexcept; + ///@} + + //=== Matching & Predicates ============================================== + + /** @name Matching & predicates */ + ///@{ bool hasServerName(std::string_view name) const noexcept; + ///@} }; diff --git a/include/core/webserv.hpp b/include/core/runWebserv.hpp similarity index 62% rename from include/core/webserv.hpp rename to include/core/runWebserv.hpp index 2f6e6b12..218e32e0 100644 --- a/include/core/webserv.hpp +++ b/include/core/runWebserv.hpp @@ -1,15 +1,24 @@ /* ************************************************************************** */ /* */ /* ::: :::::::: */ -/* webserv.hpp :+: :+: :+: */ +/* runWebserv.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/20 22:47:01 by nlouis #+# #+# */ -/* Updated: 2025/05/20 22:47:24 by nlouis ### ########.fr */ +/* Updated: 2025/08/14 15:21:52 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file runWebserv.hpp + * @brief Public entry point for starting the Webserv server. + * + * @details Declares the `runWebserv()` function, which bootstraps the + * configuration loading, validation, and event loop execution. + * @ingroup entrypoint + */ + #pragma once -int runWebserv(int argc, char** argv); \ No newline at end of file +int runWebserv(int argc, char** argv); // documented at ./src/core/webserv.cpp \ No newline at end of file diff --git a/include/core/server_utils.hpp b/include/core/server_utils.hpp index 70a3e7db..e6801df9 100644 --- a/include/core/server_utils.hpp +++ b/include/core/server_utils.hpp @@ -3,23 +3,20 @@ /* ::: :::::::: */ /* server_utils.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/02 20:29:58 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:26:54 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:47:04 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ /** * @file server_utils.hpp - * @brief Provides utility functions for selecting the correct Server instance. + * @brief Declares server selection util for virtual host resolution. * - * @details Contains logic for virtual host resolution based on the request's - * Host header and destination port. These functions are used during HTTP - * request routing to determine which configured server block should handle - * the incoming connection. - * - * @ingroup config + * @details Contains function declaration for selecting the appropriate + * `Server` instance based on listening port and HTTP `Host` header. + * @ingroup server */ #pragma once @@ -28,13 +25,6 @@ #include // for vector class Server; -/** - * @brief Selects the best matching server for a given port and Host header. - * - * @param servers List of all servers loaded from config. - * @param port The port on which the connection was accepted. - * @param host_name The Host header value from the request (e.g. "localhost"). - * @return Reference to the selected Server. - */ -const Server& findMatchingServer(const std::vector& servers, int port, - const std::string& host_name); +const Server& +findMatchingServer(const std::vector& servers, int port, + const std::string& host_name); // documented at src/core/server_utils.cpp diff --git a/include/doxygen_groups.hpp b/include/doxygen_groups.hpp new file mode 100644 index 00000000..c6c6a14b --- /dev/null +++ b/include/doxygen_groups.hpp @@ -0,0 +1,207 @@ +/** + * @file doxygen_groups.hpp + * @brief Declares main Doxygen documentation groups for Webserv modules. + * + * @details This header contains only `@defgroup` declarations. + * It is not included in the build for logic, only for documentation purposes. + * All source files should use `@ingroup ` to link to these groups. + */ + +/** + * @defgroup config Configuration Parsing + * @brief Configuration structures, parsing, normalization, and validation. + * + * @details Handles reading, tokenizing, and parsing the Webserv configuration file(s), + * validating directives, and preparing normalized configuration objects + * for use by the core server. + */ + +/** + * @defgroup config_tokenizing Tokenizing + * @ingroup config + * @brief Lexical analysis of configuration text into tokens. + * + * @details Converts raw configuration input into a token stream (types, values, positions) + * consumed by the parser. + */ + +/** + * @defgroup config_parsing Parsing + * @ingroup config + * @brief Syntactic parsing and directive dispatch. + * + * @details Builds in-memory config objects from tokens and applies directive handlers + * to populate `Server`/`Location` structures. + */ + +/** + * @defgroup config_parse_error Parsing Errors + * @ingroup config + * @brief Error types and helpers for tokenizer/parser diagnostics. + * + * @details Structured exceptions carrying human-friendly messages and context snippets + * for syntax/lexing errors. + */ + +/** + * @defgroup config_normalizing Normalizing + * @ingroup config + * @brief Post-parse defaulting and canonicalization. + * + * @details Fills in default values (methods, error pages, indices, sizes) and + * normalizes paths/settings for predictable downstream behavior. + */ + +/** + * @defgroup config_validation Validating + * @ingroup config + * @brief Configuration validation passes. + * + * @details Static checks for structure, duplicates, domain/port uniqueness, allowed methods, + * CGI mappings, and filesystem preconditions (roots, upload stores). + */ + +/** + * @defgroup core Core Components + * @brief Main server components, control flow, and application entry point. + * + * @details Contains the event loop, server initialization, and runtime orchestration. + * Ties together configuration, networking, and HTTP processing. + */ + +/** + * @defgroup server_component Server Component + * @ingroup core + * @brief Main Server class and its implementation. + * + * @details Contains the `Server` class, which represents a single + * virtual server block in Webserv. + * + * Files: + * - `core/Server.hpp` — class declaration. + * - `core/Server.cpp` — method implementations. + * - `core/server_utils.hpp` — findMatchingServer() + * - `core/server_utils.cpp` — findMatchingServer() + */ + +/** + * @defgroup location_component Location Component + * @ingroup core + * @brief Path-specific routing and configuration within a server. + * + * @details Contains the `Location` class, representing a configuration + * block bound to a specific URI path inside a server. + * Each location can define: + * - A document root. + * - Allowed HTTP methods. + * - Index files and autoindexing. + * - Redirection rules. + * - CGI execution settings. + * - Upload directory configuration. + * + * Files: + * - `core/Location.hpp` — class declaration. + * - `core/Location.cpp` — method implementations. + */ + +/** + * @defgroup entrypoint Application Entrypoint + * @ingroup core + * @brief Program entry and top-level orchestration. + * + * @details Contains the `main()` function and the high-level startup/shutdown + * logic for Webserv, including: + * - `runWebserv.hpp` — high-level declarations. + * - `runWebserv.cpp` — main orchestration functions. + * - `main.cpp` — program entry point. + */ + +/** + * @defgroup http HTTP Protocol + * @brief HTTP request/response parsing and handling. + * + * @details Includes classes and functions for parsing HTTP requests, + * building HTTP responses, routing, and applying HTTP rules. + */ + +/** + * @defgroup request_handler Request Handling + * @ingroup http + * @brief Functions and utilities for handling HTTP requests. + */ + +/** + * @defgroup socket_manager Network & Socket Abstraction + * @brief Non-blocking I/O, socket management, and connection handling. + * + * @details Provides abstractions over system calls for listening sockets, + * client connections, and readiness-based multiplexing using poll/kqueue/epoll. + */ + +/** + * @defgroup utils Utility Functions + * @brief Shared helper routines for common operations. + * + * @details Provides generic helper functions reused across modules, including + * string manipulation, date/time handling, and filesystem operations. + */ + +/** + * @defgroup filesystem_utils Filesystem Utilities + * @ingroup utils + * @brief Path manipulation, safe file handling, and directory management. + * + * @details Contains helpers for: + * - Normalizing and joining paths. + * - Mapping URIs to filesystem paths. + * - Sanitizing filenames for uploads. + * - Creating directories recursively. + * - Enforcing upload root boundaries. + * These utilities are designed to prevent directory traversal, + * enforce security constraints, and support Webserv’s upload and + * static file-serving features. + */ + +/** + * @defgroup html_utils HTML Utilities + * @ingroup utils + * @brief Safe HTML encoding and related helpers. + * + * @details Functions for escaping or manipulating HTML content so that + * untrusted input can be embedded safely in a page without being + * interpreted as markup. Prevents common injection vulnerabilities + * like cross-site scripting (XSS) by replacing reserved characters + * with their corresponding HTML entities. + */ + +/** + * @defgroup string_utils String Utilities + * @ingroup utils + * @brief String manipulation, parsing, and formatting helpers. + * + * @details Provides reusable string-related routines including: + * - Case conversion (uppercase/lowercase). + * - Whitespace trimming. + * - Delimited join operations. + * - Size parsing with suffix multipliers (e.g., KiB, MiB). + * - Human-readable byte formatting. + * - Integer parsing with detailed error reporting. + * These utilities are used throughout Webserv for configuration + * parsing, logging, and data presentation. + */ + +/** + * @defgroup url_utils URL Utilities + * @ingroup utils + * @brief Helpers for percent-decoding, form decoding, and safe filename extraction. + * + * @details Provides functions for: + * - Decoding percent-encoded sequences in URIs. + * - Handling `application/x-www-form-urlencoded` form data. + * - Parsing key/value pairs from form bodies. + * - Extracting and validating safe filenames from URI segments. + * + * These functions are typically used during HTTP request parsing, + * particularly for processing query strings, form submissions, + * and safe handling of uploaded filenames. + */ diff --git a/include/http/HttpRequest.hpp b/include/http/HttpRequest.hpp index dd0a69ab..949f016f 100644 --- a/include/http/HttpRequest.hpp +++ b/include/http/HttpRequest.hpp @@ -3,13 +3,54 @@ /* ::: :::::::: */ /* HttpRequest.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/11 12:31:10 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:21:30 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 09:04:29 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file HttpRequest.hpp + * @brief Immutable-ish HTTP/1.x request model with header map and parsed URL. + * + * @ingroup http + * + * @details + * Represents a single HTTP/1.x request after (or while) being parsed by + * @ref HttpRequestParser. The object stores: + * - Request line fields: method, path (raw target), and HTTP version. + * - Header fields in a case-preserving map (lookups are done via @ref hasHeader + * and @ref getHeader). + * - Optional message body and the tracked @ref _contentLength. + * - A parsed @ref Url object and extracted query string (text after `?`). + * - A parser status code (@ref _parseError) and the selected virtual server + * index (@ref _matchedServerIndex) for routing (see `requestRouter.*`). + * - The resolved `Host` header value for vhost matching. + * + * Typical lifecycle in this codebase: + * 1. Bytes arrive on a socket and are parsed into @ref HttpRequest + * by `HttpRequestParser.*`. + * 2. `requestRouter.*` sets @ref setMatchedServerIndex and may refine path/URL. + * 3. Method handlers (`handleGet.cpp`, `handlePost.cpp`, `handleDelete.cpp`, + * plus `handleMultipartForm.cpp` / `handleCgi.*`) consume this object. + * 4. `responseBuilder.*` uses fields (method, headers, body, URL) to craft an + * @ref HttpResponse. + * + * @par Invariants + * - If @ref getParseErrorCode returns non‑zero, the request is considered + * malformed and the server should generate an appropriate error response. + * - @ref getContentLength equals the numeric value derived from the + * `Content-Length` header if present; handlers must validate coherence + * with @ref getBody. + * - @ref getQuery is the substring of the request-target after `?` + * (empty if absent). The raw @ref getPath is not percent-decoded. + * + * @note This type is not thread-safe; owning code must synchronize externally. + * @see HttpRequestParser, Url, HttpResponse, responseBuilder, requestRouter, + * handleGet, handlePost, handleDelete, handleCgi + */ + #pragma once #include "http/Url.hpp" // for Url @@ -17,27 +58,58 @@ #include // for map #include // for string +/** + * @brief Encapsulates a client HTTP request. + * + * @details Represents the full request received from a client connection, + * including the request line (method, target path, and protocol version), + * headers, body payload, and parsed URL components. + * + * The class provides read/write accessors for all relevant fields, + * error codes for parse validation, and utility methods such as + * `printRequest()` for debugging. It is the central data structure + * produced by the @ref HttpRequestParser and consumed by the router + * and method handlers during request processing. + * + * @ingroup http + */ class HttpRequest { - private: - std::string _method; - std::string _path; - std::string _version; - std::map _headers; - std::string _body; - std::size_t _contentLength{0}; - std::string /* _uri; */ _query; ///< extracted from URI after '?' - Url _url; - int _parseError{0}; - int _matchedServerIndex; - std::string _host; + //=== Data ================================================================ + + std::string _method; ///< Request method (GET, POST, etc.) + std::string _path; ///< Request path (normalized URI path) + std::string _version; ///< HTTP version string (e.g. HTTP/1.1) + std::map _headers; ///< Request headers (case-normalized keys) + std::string _body; ///< Request body payload + std::size_t _contentLength{0}; ///< Declared Content-Length, if present + std::string _query; ///< Query string (after '?') + Url _url; ///< Fully parsed URL (scheme, host, etc.) + int _parseError{0}; ///< Parse error code (0 if none) + int _matchedServerIndex; ///< Index of matched server block + std::string _host; ///< Normalized host from request public: + //=== Construction & Special Members ===================================== + + /** @name Construction & special members */ + ///@{ HttpRequest(void); ~HttpRequest(void); + ///@} + + //=== Debug & Utilities =================================================== + /** @name Debug & utilities */ + ///@{ + /** @brief Prints a human-readable dump of the request (for debugging). */ void printRequest(void) const; + ///@} + //=== Queries (Getters) =================================================== + + /** @name Queries (getters) */ + ///@{ const std::string& getMethod(void) const; const std::string& getPath(void) const; const std::string& getVersion(void) const; @@ -45,21 +117,33 @@ class HttpRequest { const std::map& getHeaders() const; const std::string& getBody(void) const; std::size_t getContentLength(void) const; - /* const std::string& getUri(void) const; */ const std::string& getQuery() const; - int getParseErrorCode(void) const; + const std::string& getQuery() const; + int getParseErrorCode(void) const; + int getMatchedServerIndex() const; + const std::string& getHost() const; + ///@} + + //=== Mutators (Setters) ================================================== + + /** @name Mutators (setters) */ + ///@{ + void setMethod(const std::string& method); + void setPath(const std::string& path); + void setVersion(const std::string& version); + void setHeader(const std::string& key, const std::string& value); + void setBody(const std::string& body); + void setContentLength(size_t len); + void setUrl(const Url& url); + void setQuery(const std::string& query); + void setParseErrorCode(int error); + void setMatchedServerIndex(int index); + void setHost(const std::string& host); + ///@} + + //=== Predicates ========================================================== - void setMethod(const std::string& method); - void setPath(const std::string& path); - void setVersion(const std::string& version); - void setHeader(const std::string& key, const std::string& value); - void setBody(const std::string& body); - void setContentLength(size_t len); - void setUrl(const Url& url); - void setQuery(const std::string& query); - void setParseErrorCode(int error); - bool hasHeader(const std::string& key) const; - int getMatchedServerIndex() const; - void setMatchedServerIndex(int index); - void setHost(const std::string& host); - const std::string& getHost() const; + /** @name Predicates */ + ///@{ + bool hasHeader(const std::string& key) const; + ///@} }; diff --git a/include/http/HttpRequestParser.hpp b/include/http/HttpRequestParser.hpp index 2e238b32..55406d41 100644 --- a/include/http/HttpRequestParser.hpp +++ b/include/http/HttpRequestParser.hpp @@ -1,3 +1,35 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* HttpRequestParser.hpp :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nlouis +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/08/19 09:33:41 by nlouis #+# #+# */ +/* Updated: 2025/08/19 09:35:32 by nlouis ### ########.fr */ +/* */ +/* ************************************************************************** */ + +/** + * @file HttpRequestParser.hpp + * @brief Declares the HttpRequestParser utility class. + * + * @details Provides a static interface for parsing raw HTTP request strings + * into structured @ref HttpRequest objects. The parser extracts and + * validates the request line, headers, and body while handling + * connection-specific details such as consumed byte counts. + * + * The main entry point is @ref HttpRequestParser::parse, which: + * - Populates an @ref HttpRequest with parsed fields. + * - Selects the matching @ref Server configuration from the list + * of available virtual hosts on the same port. + * - Returns any parsing error code and the number of bytes consumed + * from the raw input buffer. + * + * The class is non-instantiable and only exposes static methods. + * + * @ingroup http + */ #pragma once @@ -7,15 +39,40 @@ class HttpRequest; class Server; +/** + * @class HttpRequestParser + * @brief Utility class for parsing raw HTTP requests. + * + * @details Provides a static method @ref parse that converts a raw HTTP + * request string into a structured @ref HttpRequest object. + * This includes: + * - Extracting the request line (method, path, version). + * - Parsing headers into a normalized map. + * - Capturing the body (if any). + * - Identifying the matching @ref Server configuration + * from a list of virtual hosts bound to the same port. + * - Returning parsing errors and the number of bytes consumed + * from the raw input buffer. + * + * The class is non-instantiable and non-copyable: it only + * exposes static parsing functionality and deletes its + * constructors and operators. + * + * @ingroup http + */ class HttpRequestParser { public: + //=== Parsing API ======================================================== static bool parse(HttpRequest& req, const std::string& raw_req, std::vector serversOnPort, int& errorCode, std::size_t& consumedBytes); private: - HttpRequestParser() = delete; - ~HttpRequestParser() = delete; - HttpRequestParser(const HttpRequestParser& org) = delete; - HttpRequestParser& operator=(const HttpRequestParser& other) = delete; + //=== Non-instantiable utility class ===================================== + + HttpRequestParser() = delete; ///< Deleted: non-instantiable. + ~HttpRequestParser() = delete; ///< Deleted: prevents accidental instantiation. + HttpRequestParser(const HttpRequestParser& org) = delete; ///< Deleted: non-copyable. + HttpRequestParser& + operator=(const HttpRequestParser& other) = delete; ///< Deleted: non-assignable. }; diff --git a/include/http/HttpResponse.hpp b/include/http/HttpResponse.hpp index 1da119a1..70fb1c23 100644 --- a/include/http/HttpResponse.hpp +++ b/include/http/HttpResponse.hpp @@ -3,52 +3,105 @@ /* ::: :::::::: */ /* HttpResponse.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/11 10:55:37 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:20:00 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 09:13:33 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file HttpResponse.hpp + * @brief Declares the HttpResponse class. + * + * @details Represents an outgoing HTTP response, including status line, + * headers, body payload, and optional file-backed or CGI-generated + * content. Provides setters for constructing responses and utilities + * such as `toHttpString()` for serialization to the raw wire format. + * + * This class is produced by response builders (see @ref ResponseBuilder), + * handlers (GET/POST/DELETE/CGI), and consumed by the networking layer + * when writing responses back to clients. + * + * @ingroup http + */ + #pragma once #include // for streamsize #include // for map #include // for string +/** + * @brief Represents an HTTP response to be sent back to the client. + * + * @details Encapsulates all elements of an HTTP response: + * - **Status line**: numeric status code and reason phrase. + * - **Headers**: arbitrary key–value pairs such as `Content-Type` or `Content-Length`. + * - **Body**: inline string payload, file-backed content, or CGI-generated output. + * - **Metadata**: HTTP version, connection management, and offsets for CGI output. + * + * Provides a clear API for constructing responses via setters, and utilities + * for serializing them (`toHttpString`) or checking response type (file-backed + * vs. CGI temporary file). + * + * Typical workflow: + * - Built by method handlers (GET/POST/DELETE, CGI) or the @ref ResponseBuilder. + * - Sent by the networking layer after request handling. + * + * @ingroup http + */ class HttpResponse { private: - int _status_code; - std::string _status_message; - std::map _headers; - std::string _body; - std::string _http_version; - std::string _connection_header; - std::string _file_path; - std::string _cgi_temp_file; - std::streamsize _cgiBodyOffset; + //=== Data ================================================================ + + int _status_code; ///< Numeric HTTP status code (e.g., 200, 404). + std::string _status_message; ///< Reason phrase (e.g., "OK", "Not Found"). + std::map _headers; ///< Response headers (case-preserving keys). + std::string _body; ///< Response body payload (may be empty). + std::string _http_version; ///< Request HTTP version for keep-alive logic. + std::string _connection_header; ///< Client "Connection" header snapshot. + std::string _file_path; ///< If set, path to file-backed body to stream. + std::string _cgi_temp_file; ///< Temp file produced by CGI (for cleanup). + std::streamsize _cgiBodyOffset; ///< Byte offset where CGI body begins in temp file. public: + //=== Construction & Special Members ===================================== + + /** @name Construction & special members */ + ///@{ HttpResponse(void); ~HttpResponse(void); HttpResponse(const HttpResponse& other) = default; HttpResponse& operator=(const HttpResponse& other) = default; + ///@} + + //=== Mutators (Setters) ================================================== + + /** @name Mutators (setters) */ + ///@{ + void setStatus(int code, const std::string& message); + void setHeader(const std::string& key, const std::string& value); + void setBody(const std::string& body); + void setRequestMeta(const std::string& version, const std::string& conn); + void setFilePath(const std::string& path); + void setCgiBodyOffset(std::streamsize offset); + void setCgiTempFile(const std::string& temp_file); + ///@} + + //=== Queries (Getters) =================================================== - void setStatus(int code, const std::string& message); - void setHeader(const std::string& key, const std::string& value); - void setBody(const std::string& body); - void setRequestMeta(const std::string& version, const std::string& conn); - bool isConnectionClose(void) const; - std::string toHttpString(void) const; - void setFilePath(const std::string& path); - const std::string& getFilePath() const; - bool isFileResponse() const; - int getStatusCode(void) const; - const std::string& getStatusMessage(void) const; + /** @name Queries (getters) */ + ///@{ + std::string toHttpString(void) const; + bool isConnectionClose(void) const; + const std::string& getFilePath() const; + bool isFileResponse() const; + int getStatusCode(void) const; + const std::string& getStatusMessage(void) const; const std::map& getHeaders(void) const; - void setCgiBodyOffset(std::streamsize offset); std::streamsize getCgiBodyOffset() const; - void setCgiTempFile(const std::string& temp_file); const std::string& getCgiTempFile() const; bool isCgiTempFile() const; + ///@} }; diff --git a/include/http/Url.hpp b/include/http/Url.hpp index ce387158..a0819bf3 100644 --- a/include/http/Url.hpp +++ b/include/http/Url.hpp @@ -1,19 +1,55 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* Url.hpp :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nlouis +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/08/19 09:43:12 by nlouis #+# #+# */ +/* Updated: 2025/08/19 09:45:33 by nlouis ### ########.fr */ +/* */ +/* ************************************************************************** */ + +/** + * @file Url.hpp + * @brief Declares the Url POD for parsed URL components. + * + * @details Lightweight structure holding parts of a parsed URL + * (scheme, userinfo, host, port, path, query, fragment). + * Used by the HTTP request parser to expose normalized URL + * fields to the router and handlers. + * + * @ingroup http + */ + #pragma once #include +/** + * @brief Parsed URL components. + * + * @details Simple aggregate type (POD) storing the pieces of a URL. + * All fields are plain strings as parsed; no validation or + * normalization is performed here beyond what the parser supplies. + * + * @ingroup http + */ struct Url { - std::string scheme; - std::string user; - std::string password; - std::string host; - std::string port; - std::string path; - std::string query; - std::string fragment; + std::string scheme; ///< URL scheme (e.g., "http", "https"); empty if absent. + std::string user; ///< User part of userinfo (before ':'); empty if absent. + std::string password; ///< Password part of userinfo (after ':'); empty if absent. + std::string host; ///< Hostname or IP (without port). + std::string port; ///< Decimal port as string (e.g., "80"); empty if default/absent. + std::string path; ///< Path starting with '/', e.g. "/index.html"; may be empty. + std::string query; ///< Raw query string without leading '?'; may be empty. + std::string fragment; ///< Fragment without leading '#'; may be empty. - Url() = default; - Url(const Url&) = default; - ~Url() = default; - Url& operator=(const Url&) = default; + /** @name Construction & special members */ + ///@{ + Url() = default; ///< Default construct. + Url(const Url&) = default; ///< Copy construct. + ~Url() = default; ///< Trivial destructor. + Url& operator=(const Url&) = default; ///< Copy assign. + ///@} }; diff --git a/include/http/methodsHandler.hpp b/include/http/methodsHandler.hpp index a0e58260..87e3aa79 100644 --- a/include/http/methodsHandler.hpp +++ b/include/http/methodsHandler.hpp @@ -6,10 +6,34 @@ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/05 10:46:53 by nlouis #+# #+# */ -/* Updated: 2025/06/06 22:31:14 by nlouis ### ########.fr */ +/* Updated: 2025/08/19 10:23:35 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file methodsHandler.hpp + * @brief Declares HTTP method handlers for GET, POST, and DELETE. + * + * @details This header defines the public entrypoints for handling + * HTTP request methods within Webserv. Each function maps + * an incoming @ref HttpRequest to an @ref HttpResponse, + * using the active @ref Server context and the matched + * @ref Location configuration. + * + * Supported methods: + * - **GET**: Serves static files or generates autoindex + * listings (see @ref generateAutoindex). + * - **POST**: Handles uploads (raw body, URL-encoded, + * multipart forms). + * - **DELETE**: Removes existing files if permitted. + * + * Helper: + * - @ref generateAutoindex: builds a directory listing + * response in HTML. + * + * @ingroup request_handler + */ + #pragma once #include diff --git a/include/http/requestRouter.hpp b/include/http/requestRouter.hpp index 2834df64..5a4f2ae8 100644 --- a/include/http/requestRouter.hpp +++ b/include/http/requestRouter.hpp @@ -1,19 +1,32 @@ /* ************************************************************************** */ /* */ /* ::: :::::::: */ -/* HttpRequestHandler.hpp :+: :+: :+: */ +/* requestRouter.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/12 23:11:50 by nlouis #+# #+# */ -/* Updated: 2025/06/05 10:58:12 by nlouis ### ########.fr */ +/* Updated: 2025/08/19 10:07:35 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ #pragma once -class Server; -class HttpRequest; -class HttpResponse; +/** + * @file HttpRequestHandler.hpp + * @brief Declares the main entry point for handling HTTP requests. + * + * @details This header provides the function interface for processing + * a parsed HTTP request against a given server configuration. + * The function applies routing logic, executes the appropriate + * HTTP method handler (GET, POST, DELETE, etc.), and generates + * the corresponding HttpResponse. + * + * @ingroup request_handler + */ + +class Server; ///< Forward declaration of the Server class. +class HttpRequest; ///< Forward declaration of the HttpRequest class. +class HttpResponse; ///< Forward declaration of the HttpResponse class. HttpResponse handleRequest(const HttpRequest& request, const Server& server); diff --git a/include/http/responseBuilder.hpp b/include/http/responseBuilder.hpp index 1658f882..a85e9990 100644 --- a/include/http/responseBuilder.hpp +++ b/include/http/responseBuilder.hpp @@ -3,13 +3,37 @@ /* ::: :::::::: */ /* responseBuilder.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/11 12:02:21 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:18:09 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 09:32:32 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file responseBuilder.hpp + * @brief Declarations of helper functions for constructing HTTP responses. + * + * @details Defines the `ResponseBuilder` namespace, which provides a set of + * high-level factory functions for creating different types of + * @ref HttpResponse objects: + * - @ref generateSuccess : Builds a success response with an inline body. + * - @ref generateSuccessFile : Builds a success response backed by a file on disk. + * - @ref generateError : Builds an error response using a custom error page + * if available, or a default generated HTML fallback. + * - @ref generateRedirect : Builds a redirect response with a `Location` header. + * + * Also defines the `MessageHandler` namespace with + * @ref getDefaultMessage, a helper that maps HTTP status codes to + * their standard reason phrases. + * + * These functions are typically used by method handlers (GET/POST/DELETE/CGI) + * to generate complete responses ready for serialization and transmission + * by the networking layer. + * + * @ingroup http + */ + #pragma once #include "http/HttpResponse.hpp" // for HttpResponse diff --git a/include/network/SocketManager.hpp b/include/network/SocketManager.hpp index e6548038..345f03cb 100644 --- a/include/network/SocketManager.hpp +++ b/include/network/SocketManager.hpp @@ -3,13 +3,37 @@ /* ::: :::::::: */ /* SocketManager.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: ktieu +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/03 13:51:47 by irychkov #+# #+# */ -/* Updated: 2025/06/09 01:29:01 by ktieu ### ########.fr */ +/* Updated: 2025/08/18 23:24:39 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file SocketManager.hpp + * @brief Declares the SocketManager and per-client state for non-blocking I/O. + * + * @details The SocketManager owns the event loop (based on `poll(2)`) and + * multiplexes: + * - listening sockets (bind/listen for each configured host:port), + * - accepted client sockets (read → parse → route → respond), + * - CGI subprocess pipes (lifecycle, timeouts, finalization), + * - write-side backpressure and keep-alive. + * + * Each connected client is tracked with a @ref ClientInfo structure + * that aggregates request/response buffers, timers, CGI state, and + * file-stream handles for zero-copy file responses. + * + * The implementation guarantees: + * - single `poll()` over all FDs, read & write monitored together, + * - non-blocking I/O for sockets and CGI pipes, + * - accurate HTTP status codes and default error pages, + * - resilient behavior under stress and strict timeouts. + * + * @ingroup socket_manager + */ + #pragma once #include "core/Server.hpp" // for Server @@ -26,113 +50,217 @@ #include // for size_t, time_t #include // for vector +//=== Tunables & limits ======================================================= + +/** + * @name Socket and protocol timeouts / limits + * @brief Runtime guards for robustness and resource control. + */ +///@{ + +/** @brief Generic inactivity timeout (seconds) for body recv / idle / send. */ #define TIMEOUT 20 + +/** @brief Max delay to complete request headers once bytes start arriving. */ #define HEADER_TIMEOUT_SECONDS 6 + +/** @brief Minimal sane header size to consider a request "non-empty". */ #define HEADER_MIN_LENGTH 15 + +/** @brief Maximum allowed header bytes before replying 431. */ #define HEADER_MAX_LENGTH 8192 + +/** @brief Receive buffer size for a single `recv()` call. */ #define RECV_BUFFER HEADER_MAX_LENGTH * 2 + +/** @brief Hard cap on the number of concurrent tracked client FDs. */ #define MAX_CLIENTS 1024 + +/** @brief Max time a CGI is allowed to run without finishing (seconds). */ #define CGI_TIMEOUT_SECONDS 45 +///@} -class Location; +// Forward declaration to avoid header cycles. +/** + * @brief Per-client runtime state tracked by the SocketManager. + * + * @details Holds identifiers, rolling counters, parsing flags, request and + * response queues, CGI process handle, and file streaming context. + * All fields are mutated only by the owning SocketManager on the + * poll thread. + * + */ struct ClientInfo { - // Core identifiers and timing - int client_fd; - time_t lastRequestTime; - time_t connectionStartTime; - time_t lastSendAttemptTime; - - // Byte tracking - size_t headerBytesReceived; - size_t bodyBytesReceived; - size_t bytes_sent; - - // Request/response flow - bool headerComplete; - std::string requestBuffer; - std::string current_raw_response; - std::vector serversOnPort; - std::queue pendingRequests; - std::queue responses; - - // CGI - std::optional cgiProcess; - bool isCgiProcessRunning; - HttpRequest currentCgiRequest; - - // File I/O - std::ifstream file_stream; + //=== Core identifiers and timing ======================================== + + int client_fd; ///< Accepted client socket FD. + time_t lastRequestTime; ///< Last time we received any bytes from client. + time_t connectionStartTime; ///< First byte time for the *current* request header. + time_t lastSendAttemptTime; ///< Last time we attempted to `send()` bytes. + + //=== Byte tracking ======================================================= + + std::size_t headerBytesReceived; ///< Count of header bytes seen so far. + std::size_t bodyBytesReceived; ///< Count of body bytes seen so far. + std::size_t bytes_sent; ///< Bytes already sent from the current raw buffer. + + //=== Request/response flow ============================================== + + bool headerComplete; ///< True once `\r\n\r\n` found for the current request. + std::string requestBuffer; ///< Inbound raw buffer (may hold pipelined requests). + std::string current_raw_response; ///< Outbound raw response header/body (when not file). + + std::vector serversOnPort; ///< Virtual servers sharing the same listen FD. + std::queue pendingRequests; ///< Parsed but not yet processed requests. + std::queue responses; ///< Prepared responses waiting to be sent. + + //=== CGI state =========================================================== + + std::optional cgiProcess; ///< Active CGI process/pipe set (if any). + bool isCgiProcessRunning; ///< Convenience flag while CGI is alive. + HttpRequest currentCgiRequest; ///< Request currently handled by CGI. + + //=== File I/O for sendfile-like streaming =============================== + + std::ifstream file_stream; ///< Opened file for body streaming (CGI temp or static file). }; +/** + * @brief Poll-driven socket and CGI orchestrator. + * + * @details + * Responsibilities: + * - Create and bind all listening sockets for configured servers. + * - Accept clients and maintain a single `poll()` set for all descriptors. + * - Receive, enforce limits, parse (supports pipelining), and route requests. + * - Spawn/manage CGI, enforce CGI timeout, collect output, finalize response. + * - Stream responses (raw / file) with keep-alive and backpressure handling. + * - Apply precise timeout policy (idle/header/body/send). + * + * Error handling strategy: + * - Never block; on I/O errors or protocol violations, push an error response + * and cleanly close when required. + * - Protect the loop with wide catch blocks—FDs are closed on failure paths. + * + * @ingroup socket_manager + */ class SocketManager { public: + //=== Ctors / Dtor / Special members ===================================== + /** @name Construction & lifetime */ ///@{ SocketManager(void) = delete; SocketManager(const std::vector& servers); ~SocketManager(void); - SocketManager(const SocketManager& other) = delete; - SocketManager& operator=(const SocketManager& other) = delete; + SocketManager(const SocketManager&) = delete; + SocketManager& operator=(const SocketManager&) = delete; + ///@} + //=== Main loop =========================================================== + /** @name Main loop */ ///@{ void run(); + ///@} + + /** @name Errors */ ///@{ class SocketError : public std::exception { private: std::string _msg; public: explicit SocketError(const std::string& msg); - virtual const char* what() const throw(); + const char* what() const throw() override; }; + ///@} private: - std::vector _poll_fds; ///< Monitored file descriptors for poll(). - std::map> - _listen_map; ///< Maps listen fds to their corresponding servers - std::map _client_info; /// Stores all information about each client - std::map _fd_to_cgi; ///< Maps CGI stdout fds to client fds + //=== Poll & indices ====================================================== + + /** @name Poll sets & indices */ ///@{ + std::vector _poll_fds; ///< Monitored file descriptors for poll(). + + /** + * @brief Listen FD → vhost set. + * + * @details For a given listen FD (host:port), all matching `Server` objects + * are stored here to support name-based routing. + */ + std::map> _listen_map; + + /** + * @brief Client FD → per-client state. + * + * @details Contains request/response queues, timers, CGI, and file state. + */ + std::map _client_info; + + /** + * @brief CGI stdout FD → owning client FD. + * + * @details Used to ignore/route events on CGI pipe descriptors inside the + * main poll loop. + */ + std::map _fd_to_cgi; + ///@} - // Setup & connection + //=== Setup & connection ================================================== + /** @name Setup & accept */ ///@{ void setupSockets(const std::vector& servers); void handleNewConnection(int listen_fd); + ///@} - // Event handling + //=== Event handling ====================================================== + /** @name Event handling */ ///@{ bool handleClientData(int client_fd, size_t index); void handleCgiPollEvents(); void sendResponse(int client_fd, size_t index); void handlePollError(int fd, size_t index, short revents); + ///@} - // Response helpers + //=== Response helpers ==================================================== + /** @name Response helpers */ ///@{ void logResponseStatus(int status, int fd); bool sendFileResponse(int fd, size_t index, HttpResponse& response); bool sendRawResponse(int fd, size_t index, HttpResponse& response); + ///@} - // Client lifecycle + //=== Client lifecycle ==================================================== + /** @name Client lifecycle */ ///@{ void initializeClientInfo(int client_fd, int listen_fd); void cleanupClientConnectionClose(int client_fd, size_t index); void removePollFd(size_t index); void cleanupClientState(int client_fd); void cleanupCgiForClient(int client_fd); void resetRequestState(int client_fd); + ///@} - // Request processing + //=== Request processing ================================================== + /** @name Request processing */ ///@{ bool receiveFromClient(int fd, size_t index); bool checkRequestLimits(int fd); bool parseAndQueueRequests(int client_fd); void processPendingRequests(int client_fd); + ///@} - // Timeout checks + //=== Timeout checks ====================================================== + /** @name Timeout checks */ ///@{ bool checkClientTimeouts(int client_fd, size_t index); bool isHeaderTimeout(int fd, time_t now); bool isBodyTimeout(int fd, time_t now); bool isSendTimeout(int fd, time_t now); bool isIdleTimeout(int fd, time_t now); + ///@} - // Request routing and CGI + //=== Request routing and CGI ============================================ + /** @name CGI & routing */ ///@{ bool handleCgiRequest(int client_fd, const HttpRequest& request, const Server& server, const Location& location); bool handleRequestErrorIfAny(int fd, int code, HttpRequest& req, const Server& server); bool shouldSpawnCgi(const HttpRequest& req, const Location& location); + ///@} - // Error utility + //=== Error utility ======================================================= + /** @name Error utility */ ///@{ void respondError(int fd, int status_code); + ///@} }; diff --git a/include/utils/Logger.hpp b/include/utils/Logger.hpp index 681330bb..3d65ffa3 100644 --- a/include/utils/Logger.hpp +++ b/include/utils/Logger.hpp @@ -3,10 +3,10 @@ /* ::: :::::::: */ /* Logger.hpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/25 12:09:58 by ktieu #+# #+# */ -/* Updated: 2025/08/17 12:30:59 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:48:00 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ @@ -21,8 +21,8 @@ class Logger { static void logFrom(LogLevel level, const std::string& from, const std::string& message); private: - Logger() = delete; // Prevent instantiation - ~Logger() = delete; // Prevent instantiation - Logger(const Logger& org) = delete; // Prevent copy - Logger& operator=(const Logger& other) = delete; // Prevent assignment + Logger() = delete; + ~Logger() = delete; + Logger(const Logger& org) = delete; + Logger& operator=(const Logger& other) = delete; }; diff --git a/include/utils/errorUtils.hpp b/include/utils/errorUtils.hpp index 2d36a313..9a5fb0a8 100644 --- a/include/utils/errorUtils.hpp +++ b/include/utils/errorUtils.hpp @@ -6,48 +6,12 @@ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/06 19:59:07 by nlouis #+# #+# */ -/* Updated: 2025/05/06 19:59:14 by nlouis ### ########.fr */ +/* Updated: 2025/08/15 22:59:56 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ -/** - * @file errorUtils.hpp - * @brief Error formatting utilities. - * - * @details - * Provides helper functions to standardize how error messages are formatted, - * especially with line and column information, to improve diagnostics. - */ - -/** - * @defgroup ErrorUtils Error message formatting - * @brief Helpers for formatting human-readable error messages. - * - * @details - * These utilities generate consistent and contextual error messages for use - * in exceptions and log output. - * @{ - */ - #pragma once #include -/** - * @brief Formats an error message with line and column information. - * @ingroup ErrorUtils - * - * @details - * Constructs a human-readable error message of the form: - * `"Line X, column Y: "`. Intended for consistent formatting across - * parser or tokenizer errors. - * - * @param msg The error message. - * @param line The line number where the error occurred. - * @param column The column number of the problematic element. - * - * @return A formatted string with line/column context. - */ std::string formatError(const std::string& msg, int line, int column); - -/** @} */ // end of ErrorUtils diff --git a/include/utils/stringUtils.hpp b/include/utils/stringUtils.hpp index 885a4d98..b0d346ad 100644 --- a/include/utils/stringUtils.hpp +++ b/include/utils/stringUtils.hpp @@ -6,30 +6,10 @@ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/05 20:09:44 by nlouis #+# #+# */ -/* Updated: 2025/05/22 22:32:12 by nlouis ### ########.fr */ +/* Updated: 2025/08/15 23:00:37 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ -/** - * @file stringUtils.hpp - * @brief Utility functions for parsing numeric string values. - * - * @details - * Provides functions to parse integers and human-readable byte sizes from strings. - * Supports contextual error reporting for use in configuration parsing or diagnostics. - */ - -/** - * @defgroup StringUtils String parsing utilities - * @brief Helpers to parse integers and memory sizes from strings. - * - * @details - * This group provides low-level utilities used in configuration parsing for converting - * string representations of numbers and byte sizes into typed values with context-aware - * error handling. - * @{ - */ - #pragma once #include @@ -37,56 +17,12 @@ #include #include -/** - * @brief Parses an integer with detailed error context. - * @ingroup StringUtils - * - * @details - * Parses a non-negative integer and throws a detailed `std::invalid_argument` error - * on failure. Includes the field name, line/column, and optional context string. - * - * @param value The string to parse. - * @param field The name of the field being parsed. - * @param line Line number where the value is found. - * @param column Column number where the value starts. - * @param context_provider A lambda that returns extra context (e.g., the source line). - * - * @return The parsed integer. - */ -int parseInt(const std::string& value, const std::string& field, int line, int column, - const std::function& context_provider); - -/** - * @brief Parses a byte size with detailed error context. - * @ingroup StringUtils - * - * @details - * Accepts size strings with suffixes (K/M/G). On failure, throws `std::invalid_argument` - * with details including field name, line/column, and optional context string. - * - * @param value The string to parse. - * @param field The name of the field being parsed. - * @param line Line number where the value is found. - * @param column Column number where the value starts. - * @param context_provider A lambda that returns extra context (e.g., the source line). - * - * @return The parsed size in bytes. - */ +int parseInt(const std::string& value, const std::string& field, int line, int column, + const std::function& context_provider); std::size_t parseByteSize(const std::string& value, const std::string& field, int line, int column, const std::function& context_provider); -/** - * @brief Converts a string to lowercase. - * - * @details Returns a copy of the input string with all ASCII alphabetic characters - * converted to lowercase using the current locale rules. - * - * @param str The input string to convert. - * @return A lowercase copy of the input string. - */ std::string toLower(const std::string&); std::string toUpper(const std::string& s); std::string formatBytes(std::size_t bytes); std::string joinStrings(const std::vector& list, const std::string& delim = ", "); std::string trim(const std::string& str); - -/** @} */ // end of StringUtils diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 7a4d7c6c..00000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -requests -pytest -pytest-asyncio -aiohttp -asyncio diff --git a/scripts/run_all_tests.sh b/scripts/run_all_tests.sh deleted file mode 100755 index 1f443331..00000000 --- a/scripts/run_all_tests.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -# set -euo pipefail: -# -e : Exit immediately if any command exits with a non-zero status. -# -u : Treat unset variables as an error and exit immediately. -# -o pipefail : Return the exit status of the last command in a pipeline that failed -set -euo pipefail - -# Colors -GREEN="\033[0;32m" -RED="\033[0;31m" -CYAN="\033[0;36m" -YELLOW="\033[1;33m" -RESET="\033[0m" - -# Path to suppression file -SUPPRESSION_FILE="$(dirname "$0")/.asanignore" - -function run_tests() { - local preset="$1" - local build_dir="build/${preset}" - - echo -e "${CYAN}🔧 Preparing build for preset: ${preset}${RESET}" - - # Clean previous build if it exists - if [ -d "$build_dir" ]; then - echo -e "${YELLOW}🧹 Cleaning previous build directory: ${build_dir}${RESET}" - rm -rf "$build_dir" - fi - - echo -e "${CYAN}⚙️ Configuring with CMake preset: ${preset}${RESET}" - cmake --preset "$preset" -DBUILD_TESTING=ON - - echo -e "${CYAN}🏗️ Building project with preset: ${preset}${RESET}" - cmake --build --preset "$preset" - - echo -e "${CYAN}🧪 Running tests for preset: ${preset}${RESET}" - - # Set sanitizer suppression environment variables dynamically - if [[ "$preset" == "asan" ]]; then - export ASAN_OPTIONS="detect_leaks=1:suppressions=${SUPPRESSION_FILE}:exitcode=42:fast_unwind_on_malloc=0" - elif [[ "$preset" == "tsan" ]]; then - export TSAN_OPTIONS="suppressions=${SUPPRESSION_FILE}:exitcode=42" - elif [[ "$preset" == "ubsan" ]]; then - export UBSAN_OPTIONS="suppressions=${SUPPRESSION_FILE}:print_stacktrace=1:exitcode=42" - else - unset ASAN_OPTIONS TSAN_OPTIONS UBSAN_OPTIONS - fi - - if ! ctest --preset "$preset" --output-on-failure; then - echo -e "${RED}❌ Tests failed for preset: ${preset}${RESET}" - exit 1 - fi - - echo -e "${GREEN}✅ Tests passed for preset: ${preset}${RESET}\n" -} - -start_time=$(date +%s) - -echo -e "${CYAN}🚀 Starting full test run for all configurations...${RESET}\n" - -# List of presets to test -presets=("debug" "asan" "tsan" "ubsan") - -for preset in "${presets[@]}"; do - run_tests "$preset" -done - -end_time=$(date +%s) -elapsed=$((end_time - start_time)) - -echo -e "${GREEN}🏆 All builds and tests completed successfully in ${elapsed} seconds!${RESET}" diff --git a/scripts/run_webserv.sh b/scripts/run_webserv.sh deleted file mode 100644 index 2325147a..00000000 --- a/scripts/run_webserv.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Check if the default server binary exists -if [ ! -x "./bin/webserv" ]; then - echo "❌ Error: ./bin/webserv not found or not executable." - exit 1 -fi - -# If no arguments are provided, run the default server -if [ $# -eq 0 ]; then - exec ./bin/webserv -fi - -# Dispatch based on first argument -case "$1" in - asan) - if [ ! -x "./run_webserv_asan.sh" ]; then - echo "❌ Error: ./run_webserv_asan.sh not found or not executable." - exit 1 - fi - shift - exec ./run_webserv_asan.sh "$@" - ;; - tsan) - if [ ! -x "./run_webserv_tsan.sh" ]; then - echo "❌ Error: ./run_webserv_tsan.sh not found or not executable." - exit 1 - fi - shift - exec ./run_webserv_tsan.sh "$@" - ;; - ubsan) - if [ ! -x "./run_webserv_ubsan.sh" ]; then - echo "❌ Error: ./run_webserv_ubsan.sh not found or not executable." - exit 1 - fi - shift - exec ./run_webserv_ubsan.sh "$@" - ;; - *) - exec ./bin/webserv "$@" - ;; -esac diff --git a/scripts/run_webserv_asan.sh b/scripts/run_webserv_asan.sh deleted file mode 100755 index 3b1cbedd..00000000 --- a/scripts/run_webserv_asan.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Check if the default server binary exists -if [ ! -x "./bin/webserv" ]; then - echo "❌ Error: ./bin/webserv not found or not executable." - exit 1 -fi - -# Directory of this script (even if called from elsewhere) -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Export ASAN options -export ASAN_OPTIONS="detect_leaks=1:leak_check_at_exit=1:fast_unwind_on_malloc=0:abort_on_error=1:suppressions=${SCRIPT_DIR}/.asanignore" - -# Run webserv -exec "${SCRIPT_DIR}/bin/webserv" "$@" diff --git a/scripts/run_webserv_tsan.sh b/scripts/run_webserv_tsan.sh deleted file mode 100755 index 82f7d32a..00000000 --- a/scripts/run_webserv_tsan.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Check if the default server binary exists -if [ ! -x "./bin/webserv" ]; then - echo "❌ Error: ./bin/webserv not found or not executable." - exit 1 -fi - -# Directory of this script (even if called from elsewhere) -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Export TSAN options -export TSAN_OPTIONS="suppressions=${SCRIPT_DIR}/.asanignore:halt_on_error=1:exitcode=66" - -# Run webserv -exec "${SCRIPT_DIR}/bin/webserv" "$@" diff --git a/scripts/run_webserv_ubsan.sh b/scripts/run_webserv_ubsan.sh deleted file mode 100644 index 117c5f56..00000000 --- a/scripts/run_webserv_ubsan.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Check if the default server binary exists -if [ ! -x "./bin/webserv" ]; then - echo "❌ Error: ./bin/webserv not found or not executable." - exit 1 -fi - -# Directory of this script (even if called from elsewhere) -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Export UBSAN options -export UBSAN_OPTIONS="suppressions=${SCRIPT_DIR}/.asanignore:print_stacktrace=1:halt_on_error=1:exitcode=77" - -# Run webserv -exec "${SCRIPT_DIR}/bin/webserv" "$@" diff --git a/src/config/Config.cpp b/src/config/Config.cpp index cefc43ab..25a4d37c 100644 --- a/src/config/Config.cpp +++ b/src/config/Config.cpp @@ -3,26 +3,54 @@ /* ::: :::::::: */ /* Config.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/04/30 10:36:29 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:16:36 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:48:24 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file Config.cpp + * @brief Implements the Config aggregate for parsed servers. + * + * @details Minimal container methods for appending and accessing the list of + * @ref Server instances created during parsing. + * + * @ingroup config + */ + #include "config/Config.hpp" +#include "config/parser/ConfigParseError.hpp" -////////////////// -// --- Public API +//=== Public API ============================================================ +/** + * @brief Appends a parsed server to the configuration. + * + * @param server The server instance to add. + * @ingroup config + */ void Config::addServer(const Server& server) { - _servers.push_back(server); + _servers.push_back(server); // Store by value (copy/move elision applies) } +/** + * @brief Returns a mutable reference to the list of servers. + * + * @return Vector reference for in-place modifications. + * @ingroup config + */ std::vector& Config::getServers() { return _servers; } +/** + * @brief Returns a const reference to the list of servers. + * + * @return Read-only vector reference. + * @ingroup config + */ const std::vector& Config::getServers() const { return _servers; } diff --git a/src/config/normalizeConfig.cpp b/src/config/normalizeConfig.cpp index de092baa..482252f7 100644 --- a/src/config/normalizeConfig.cpp +++ b/src/config/normalizeConfig.cpp @@ -1,15 +1,26 @@ /* ************************************************************************** */ /* */ /* ::: :::::::: */ -/* normalizeConfig.cpp :+: :+: :+: */ +/* normalizeConfig.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/05/11 21:02:41 by nlouis #+# #+# */ -/* Updated: 2025/05/20 22:37:46 by nlouis ### ########.fr */ +/* Created: 2025/08/18 13:12:00 by nlouis #+# #+# */ +/* Updated: 2025/08/18 19:48:41 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file normalizeConfig.cpp + * @brief Implements post-parse normalization of configuration objects. + * + * @details Establishes default values and canonical forms for servers and + * locations so that runtime logic can assume non-empty, consistent + * configuration fields. + * + * @ingroup config_normalizing + */ + #include "config/Config.hpp" // for Config #include "core/Location.hpp" // for Location #include "core/Server.hpp" // for Server @@ -18,17 +29,39 @@ #include // for allocator, string, basic_string, operat... #include // for vector -constexpr size_t DEFAULT_CLIENT_MAX_BODY_SIZE = 1 * 1024 * 1024; -const std::string DEFAULT_ERROR_PAGE_PATH = "/error.html"; -const std::string DEFAULT_ROOT = "/var/www"; -const std::string DEFAULT_INDEX = "index.html"; -const std::vector DEFAULT_METHODS = {"GET", "POST", "DELETE"}; +/// 1 MiB default for request body limit. +constexpr std::size_t DEFAULT_CLIENT_MAX_BODY_SIZE = 1 * 1024 * 1024; +/// Default error page path used for common error codes. +const std::string DEFAULT_ERROR_PAGE_PATH = "/error.html"; +/// Default filesystem root for locations missing an explicit root. +const std::string DEFAULT_ROOT = "/var/www"; +/// Default index file for the root location if none provided. +const std::string DEFAULT_INDEX = "index.html"; +/// Default allowed HTTP methods when none are specified. +const std::vector DEFAULT_METHODS = {"GET", "POST", "DELETE"}; +/** + * @brief Normalizes a single server and its locations. + * + * @details + * - Ensures a default `client_max_body_size` if unset (1 MiB). + * - Injects a default set of `error_page` mappings if none were provided. + * - For each location: + * - Sets a default `root` when missing. + * - Adds a default `index` when the location is `/` and index list is empty. + * - Ensures a default set of allowed HTTP methods when unspecified. + * + * @param server Server to normalize (modified in place). + * + * @ingroup config_normalizing + */ static void normalizeServer(Server& server) { + // Fill default body size if not specified if (server.getClientMaxBodySize() == 0) { server.setClientMaxBodySize(DEFAULT_CLIENT_MAX_BODY_SIZE); } + // Install a basic set of error pages if none provided if (server.getErrorPages().empty()) { server.setErrorPage(500, DEFAULT_ERROR_PAGE_PATH); server.setErrorPage(404, DEFAULT_ERROR_PAGE_PATH); @@ -36,21 +69,32 @@ static void normalizeServer(Server& server) { server.setErrorPage(502, DEFAULT_ERROR_PAGE_PATH); } + // Normalize each location for (Location& loc : server.getLocations()) { + // Root path default if (loc.getRoot().empty()) { loc.setRoot(DEFAULT_ROOT); } + // Provide an index for "/" if none given if (loc.getIndexFiles().empty() && loc.getPath() == "/") { loc.addIndexFile(DEFAULT_INDEX); } + // Ensure at least a default method set if (!loc.hasAllowedMethods()) { loc.setAllowedMethods(DEFAULT_METHODS); } } } +/** + * @brief Applies normalization to all servers/locations in the config. + * + * @param config Parsed configuration to normalize (modified in place). + * + * @ingroup config_normalizing + */ void normalizeConfig(Config& config) { for (Server& s : config.getServers()) { normalizeServer(s); diff --git a/src/config/parser/ConfigParseError.cpp b/src/config/parser/ConfigParseError.cpp index a0521c2c..9414c92c 100644 --- a/src/config/parser/ConfigParseError.cpp +++ b/src/config/parser/ConfigParseError.cpp @@ -3,15 +3,37 @@ /* ::: :::::::: */ /* ConfigParseError.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/05/05 20:44:40 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:05:07 by irychkov ### ########.fr */ +/* Created: 2025/08/18 12:05:00 by nlouis #+# #+# */ +/* Updated: 2025/08/18 19:49:28 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file ConfigParseError.cpp + * @brief Implements configuration error construction and reporting. + * + * @details Formats error messages (optionally including a contextual source + * snippet) and exposes them through `what()`. + * + * @ingroup config_parse_error + */ + #include "config/parser/ConfigParseError.hpp" +#include +#include +/** + * @brief Builds a formatted error message with optional context. + * + * @details If `context` is provided, appends it on a new line prefixed with + * an arrow (`"→ "`), producing a compact, readable diagnostic. + * + * @param message Primary error message. + * @param context Optional offending line or snippet (may be empty). + * @ingroup config_parse_error + */ ConfigParseError::ConfigParseError(const std::string& message, const std::string& context) : _context(context) { if (_context.empty()) @@ -20,6 +42,12 @@ ConfigParseError::ConfigParseError(const std::string& message, const std::string _fullMessage = message + "\n→ " + _context; } +/** + * @brief Returns the formatted error string. + * + * @return Null-terminated C string owned by the exception. + * @ingroup config_parse_error + */ const char* ConfigParseError::what() const noexcept { return _fullMessage.c_str(); } diff --git a/src/config/parser/ConfigParser.cpp b/src/config/parser/ConfigParser.cpp index 18c79561..acc88606 100644 --- a/src/config/parser/ConfigParser.cpp +++ b/src/config/parser/ConfigParser.cpp @@ -3,13 +3,24 @@ /* ::: :::::::: */ /* ConfigParser.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/05/09 08:46:22 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:10:21 by irychkov ### ########.fr */ +/* Created: 2025/08/18 12:18:00 by nlouis #+# #+# */ +/* Updated: 2025/08/18 19:50:05 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file ConfigParser.cpp + * @brief Implements parsing of servers, locations, and directives. + * + * @details Walks the token stream, enforces grammar, and dispatches each + * directive to handler maps to populate @ref Server and @ref Location. + * Provides precise diagnostics with contextual line snippets. + * + * @ingroup config_parsing + */ + #include "config/parser/ConfigParser.hpp" #include "config/Config.hpp" // for Config #include "config/parser/ConfigParseError.hpp" // for SyntaxError @@ -29,65 +40,154 @@ namespace { -static const std::unordered_set kRepeatableServerDirectives = {"error_page"}; -static const std::unordered_set kRepeatableLocationDirectives = {"methods", - "cgi_interpreter"}; -static constexpr std::array kArgTokenTypes = {TokenType::STRING, TokenType::NUMBER, - TokenType::IDENTIFIER}; - +/** + * @brief Directives allowed multiple times inside a server block. + * @ingroup config_parsing + */ +static const std::unordered_set kRepeatableServerDirectives = {"error_page"}; + +/** + * @brief Directives allowed multiple times inside a location block. + * @ingroup config_parsing + */ +static const std::unordered_set kRepeatableLocationDirectives = { + "methods", + "cgi_interpreter", +}; + +/** + * @brief Token types accepted as directive arguments (ordered by frequency). + * @ingroup config_parsing + */ +static constexpr std::array kArgTokenTypes = { + TokenType::STRING, + TokenType::NUMBER, + TokenType::IDENTIFIER, +}; + +/** + * @brief Checks whether a directive name is allowed (not duplicated). + * + * @details Tracks encountered directive names within a block (`server` or `location`). + * If the directive is marked as repeatable, it is always accepted. + * Otherwise, the name is inserted into the `seen` set: + * - If insertion succeeds, the directive is new and valid. + * - If insertion fails, the directive is a duplicate and rejected. + * + * @param name The directive keyword being checked. + * @param seen Set of already encountered directive names in the current block. + * @param repeatable Set of directive names allowed to appear multiple times. + * + * @return `true` if the directive is valid (new or repeatable), + * `false` if it is a duplicate and not allowed. + * + * @ingroup config_parser + */ bool checkDuplicateDirective(const std::string& name, std::unordered_set& seen, const std::unordered_set& repeatable) { return repeatable.contains(name) || seen.insert(name).second; } +/** + * @brief Dispatches a configuration directive to its registered handler. + * + * @details Looks up the directive keyword (from the current token) in the + * provided handler table. If a matching handler is found, it is + * invoked with the target object (Server or Location), parsed values, + * source position (line/column), and contextual snippet. If no handler + * exists for the directive name, a @ref SyntaxError is thrown. + * + * @tparam T The type of the configuration target (e.g., `Server` or `Location`). + * @tparam HandlerMap A mapping from directive name (`std::string`) to a handler + * function/lambda capable of applying the directive. + * + * @param target The configuration object being populated (Server/Location). + * @param token The token representing the directive keyword. + * @param values List of string arguments parsed for the directive. + * @param handlers Map of directive names to handler functions. + * @param line Line number in the source config where the directive occurs. + * @param column Column number in the source config where the directive occurs. + * @param ctx A snippet of the source line for contextual error messages. + * + * @throws SyntaxError If the directive name is unknown or unhandled. + * + * @ingroup config_parser + */ template void parseDirective(T& target, const Token& token, std::vector& values, const HandlerMap& handlers, int line, int column, const std::string& ctx) { const std::string& directiveName = token.value; + // Look up the directive in the handler table auto handlerIt = handlers.find(directiveName); if (handlerIt == handlers.end()) { throw SyntaxError(formatError("Unknown directive: '" + token.value + "'", line, column), ctx); } + // Dispatch to the registered handler function auto& handler = handlerIt->second; handler(target, values, line, column, ctx); } } // namespace -////////////////// -// --- Public API - -ConfigParser::ConfigParser(std::string source) - : // Initialize tokenizer with the raw config string - _tokenizer(std::move(source)) { - // Immediately tokenize the input and store the token list +//=== Public API ========================================================== + +/** + * @brief Constructs a ConfigParser instance from raw configuration text. + * + * @details Initializes the internal tokenizer with the provided configuration + * source string. Immediately tokenizes the input and stores the resulting + * token sequence for parsing. This makes subsequent parsing methods + * (`parseConfig`, `parseServer`, etc.) operate directly on a prebuilt + * token stream rather than raw text. + * + * @param source Raw configuration string to be parsed. + * + * @ingroup config_parser + */ +ConfigParser::ConfigParser(std::string source) : _tokenizer(std::move(source)) { _tokens = _tokenizer.tokenize(); } +/** + * @brief Parses the full configuration input into a Config object. + * + * @details Expects the top-level configuration to consist of one or more + * `server` blocks. Ensures that the input is non-empty, validates + * the presence of `server` keywords, and parses each block in turn. + * If invalid structure or unexpected tokens are encountered, a + * `SyntaxError` is thrown with contextual information. + * + * @return A fully constructed Config object containing all parsed servers. + * + * @throws SyntaxError If the configuration is empty, missing `server` blocks, + * or contains unexpected tokens after a block. + * + * @ingroup config_parser + */ Config ConfigParser::parseConfig() { Config config; - // Reject completely empty configuration input + // Reject completely empty configuration if (isAtEnd()) { throw SyntaxError(formatError("Empty configuration", 1, 1), getLineSnippet()); } - // Top-level config must consist of one or more `server` blocks + // Parse one or more server blocks while (!isAtEnd()) { - // If the current token is not `server`, fail early + // Each top-level block must start with the 'server' keyword if (current().type != TokenType::KEYWORD_SERVER) { throw SyntaxError( formatError("Expected 'server' block", current().line, current().column), getLineSnippet()); } - // Parse a full server block and append it to the config + // Parse the full server block and add it to the Config config.addServer(parseServer()); - // After a server block, only EOF or another server block is allowed + // After a server block, only EOF or another 'server' is allowed if (!isAtEnd() && current().type != TokenType::KEYWORD_SERVER && current().type != TokenType::END_OF_FILE) { throw SyntaxError(formatError("Unexpected token after server block", current().line, @@ -99,57 +199,109 @@ Config ConfigParser::parseConfig() { return config; } -//////////////////////////// -// --- Server Block Parsing - +//=== Server Block Parsing =============================================== + +/** + * @brief Parses a single `server` block into a Server object. + * + * @details Ensures the block starts with the `server` keyword and opening brace, + * then iterates through its directives and nested `location` blocks. + * Each directive is validated against duplicates (unless repeatable) + * and dispatched to the appropriate handler. + * + * @return A fully populated Server object representing the parsed block. + * + * @throws SyntaxError If the block is malformed, contains duplicate directives, + * or is missing the expected closing brace. + * + * @ingroup config_parser + */ Server ConfigParser::parseServer() { - expect(TokenType::KEYWORD_SERVER, "server block"); // Ensure block starts with 'server' - expect(TokenType::LBRACE, "start of server block"); // Expect opening brace '{' + // Validate required "server {" start sequence + expect(TokenType::KEYWORD_SERVER, "server block"); + expect(TokenType::LBRACE, "start of server block"); - Server server; - std::unordered_set seen; // Track directives to detect duplicates + Server server; // New Server instance + std::unordered_set seen; // Track seen directives (for duplicates) - // Loop until closing '}' or end of file + // Loop until closing brace or EOF while (!isAtEnd() && current().type != TokenType::RBRACE) { if (current().type == TokenType::KEYWORD_LOCATION) { - // Parse and attach a location block to the server + // Nested location block -> parse recursively server.addLocation(parseLocation()); } else { - // Normalize directive name to lowercase and check for duplicates + // Check for duplicate directives unless repeatable const std::string name = current().value; if (!checkDuplicateDirective(name, seen, kRepeatableServerDirectives)) { throw SyntaxError(formatError("Duplicate directive: '" + name + "'", current().line, current().column), getLineSnippet()); } - // Parse the directive and apply it to the server + // Parse directive and apply it to the server parseServerDirective(server); } } - expect(TokenType::RBRACE, "end of server block"); // Validate closing '}' + // Ensure proper block termination + expect(TokenType::RBRACE, "end of server block"); return server; } +/** + * @brief Parses and applies a single directive inside a `server` block. + * + * @details Reads the current directive token (e.g., `listen`, `host`, etc.), + * collects its arguments, enforces a terminating semicolon, and then + * dispatches the directive to the appropriate handler function from + * the server handler table. + * + * @param server Reference to the Server object being populated. + * + * @throws SyntaxError If the directive is malformed, missing arguments, or + * missing its terminating semicolon. + * + * @ingroup config_parser + */ void ConfigParser::parseServerDirective(Server& server) { - Token key = current(); // Capture the directive token (e.g., "listen", "host", etc.) - advance(); // Consume the directive name - std::vector values = collectArgs(kArgTokenTypes); // Parse directive arguments - expect(TokenType::SEMICOLON, "semicolon after server directive"); // Enforce `;` terminator - parseDirective(server, key, values, - directive::serverHandlers(), // Dispatch to the correct handler - key.line, key.column, getLineSnippet()); -} + Token key = current(); // Capture the directive keyword token + advance(); // Consume the keyword (move to its arguments) + + // Collect directive arguments (strings, numbers, or identifiers) + std::vector values = collectArgs(kArgTokenTypes); -////////////////////////////// -// --- Location Block Parsing + // Ensure directive ends with a semicolon + expect(TokenType::SEMICOLON, "semicolon after server directive"); + // Dispatch directive to its handler, applying it to the server + parseDirective(server, key, values, directive::serverHandlers(), key.line, key.column, + getLineSnippet()); +} + +//=== Location Block Parsing ============================================= + +/** + * @brief Parses a single `location` block inside a `server` block. + * + * @details Ensures the block starts with the `location` keyword, validates and + * sets the location path, then iterates over its contained directives. + * Each directive is validated for duplicates (unless repeatable) and + * dispatched to the appropriate handler. The block must end with a + * closing brace `}`. + * + * @return A fully constructed `Location` object populated with directives. + * + * @throws SyntaxError If the block is malformed, the path is missing or invalid, + * contains duplicate directives, or is missing required + * braces. + * + * @ingroup config_parser + */ Location ConfigParser::parseLocation() { - expect(TokenType::KEYWORD_LOCATION, "location block"); // Ensure block starts with 'location' + expect(TokenType::KEYWORD_LOCATION, "location block"); // Must start with `location` Location location; - // Validate path token exists and is of correct type + // Validate that the next token is a valid path (string or identifier) TokenType type = current().type; if (type != TokenType::STRING && type != TokenType::IDENTIFIER) { throw SyntaxError(formatError("Expected location path after 'location', but got '" + @@ -158,6 +310,7 @@ Location ConfigParser::parseLocation() { getLineSnippet()); } + // Extract and validate the path token Token pathTok = current(); if (pathTok.value.empty() || pathTok.value[0] != '/') { throw SyntaxError( @@ -166,13 +319,13 @@ Location ConfigParser::parseLocation() { getLineSnippet()); } location.setPath(pathTok.value); - advance(); // consume the path token + advance(); // Consume the path token - expect(TokenType::LBRACE, "start of location block"); // Expect opening brace '{' + expect(TokenType::LBRACE, "start of location block"); // Require opening `{` std::unordered_set seen; // Track encountered directives to catch duplicates - // Parse directives until closing brace + // Parse directives until closing brace `}` while (!isAtEnd() && current().type != TokenType::RBRACE) { const std::string name = current().value; if (!checkDuplicateDirective(name, seen, kRepeatableLocationDirectives)) { @@ -180,46 +333,85 @@ Location ConfigParser::parseLocation() { current().column), getLineSnippet()); } - // Parse and apply a directive to the Location object - parseLocationDirective(location); + parseLocationDirective(location); // Apply directive to the location } - expect(TokenType::RBRACE, "end of location block"); // Expect closing brace '}' + expect(TokenType::RBRACE, "end of location block"); // Ensure closing `}` return location; } +/** + * @brief Parses a single directive inside a `location` block. + * + * @details Reads the directive keyword, collects its arguments, enforces that + * it is terminated by a semicolon `;`, and dispatches it to the + * appropriate handler function via the directive table. + * + * @param location Reference to the `Location` object where parsed directive + * data will be applied. + * + * @throws SyntaxError If arguments are invalid, missing, or the directive + * is not followed by a semicolon. + * + * @ingroup config_parser + */ void ConfigParser::parseLocationDirective(Location& location) { - Token key = current(); // Capture the directive token (e.g., "root", "index", etc.) + Token key = current(); // Capture directive keyword token (e.g., "root", "index", etc.) advance(); // Consume the directive keyword - std::vector values = collectArgs(kArgTokenTypes); // Parse directive arguments - expect(TokenType::SEMICOLON, "semicolon after location directive"); // Ensure `;` terminator - parseDirective(location, key, values, // Dispatch to handler - directive::locationHandlers(), key.line, key.column, getLineSnippet()); + + // Collect arguments that follow the directive + std::vector values = collectArgs(kArgTokenTypes); + + // Ensure the directive is properly terminated by a semicolon + expect(TokenType::SEMICOLON, "semicolon after location directive"); + + // Dispatch the directive to the correct handler function from the table + parseDirective(location, key, values, directive::locationHandlers(), key.line, key.column, + getLineSnippet()); } -//////////////////////// -// --- Token Navigation +//=== Token Navigation ==================================================== +/** + * @brief Returns the current token (bounds-checked). + * @ingroup config_parsing + */ const Token& ConfigParser::current() const { - // Return the token at the current parsing position return _tokens.at(_pos); } +/** + * @brief Returns the previous token or the first token if at start. + * @ingroup config_parsing + */ const Token& ConfigParser::previous() const { if (_pos == 0) { - return _tokens.at(0); // fallback to first token + return _tokens.at(0); } return _tokens.at(_pos - 1); } +/** + * @brief Peeks a token at an offset ≥ 1 from current. + * + * @param offset Relative index (defaults to 1). + * @return Token at index or EOF token if out of range. + * @ingroup config_parsing + */ const Token& ConfigParser::peek(std::size_t offset) const { std::size_t index = _pos + offset; if (index < _tokens.size()) { return _tokens.at(index); } - return _tokens.back(); // Fallback to EOF token + return _tokens.back(); } +/** + * @brief Looks behind by a given offset, or returns an EOF dummy. + * + * @param offset Distance to look back (defaults to 1). + * @ingroup config_parsing + */ const Token& ConfigParser::lookBehind(std::size_t offset) const { static Token dummy(TokenType::END_OF_FILE, "", 0, 0, 0); @@ -229,104 +421,194 @@ const Token& ConfigParser::lookBehind(std::size_t offset) const { return dummy; } +/** + * @brief Advances to the next token and returns it. + * + * @return New current token after increment (or EOF if at end). + * @ingroup config_parsing + */ const Token& ConfigParser::advance() { if (!isAtEnd()) - ++_pos; // Move to the next token if not already at the end - return current(); // Return the new current token + ++_pos; + return current(); } +/** + * @brief Checks if the parser has consumed all tokens or reached EOF. + * + * @return `true` if no more tokens are available. + * @ingroup config_parsing + */ bool ConfigParser::isAtEnd() const { - // True if we've reached the end of the token stream or explicitly hit the EOF token return _pos >= _tokens.size() || _tokens[_pos].type == TokenType::END_OF_FILE; } +/** + * @brief Consumes the current token if its type matches. + * + * @param type Expected token type. + * @return `true` if matched/consumed. + * @ingroup config_parsing + */ bool ConfigParser::match(TokenType type) { - // If at end or current token doesn't match the expected type, do nothing if (isAtEnd() || _tokens[_pos].type != type) return false; - ++_pos; // Consume the token if it matches - return true; // Indicate successful match and consumption + ++_pos; + return true; } +/** + * @brief Ensures that the current token matches a specific expected type. + * + * @details This method validates that the parser is currently positioned + * at a token of type @p expected. If the token matches, it is consumed + * (the parser advances by one). If not, an `UnexpectedToken` error is thrown + * with details about what was expected versus what was found. + * + * @param expected The exact token type required at this position. + * @param context Human-readable context for error reporting + * (unused here but provided for consistency). + * + * @throws UnexpectedToken If the current token does not match @p expected. + * + * @ingroup config_parser + */ void ConfigParser::expect(TokenType expected, const std::string& context) { - (void) context; + (void) context; // Currently unused, but kept for interface consistency + // If at end or wrong type → prepare error if (isAtEnd() || _tokens[_pos].type != expected) { const Token* actual; if (isAtEnd()) { - actual = &_tokens.back(); // fallback to last token (should be EOF) + actual = &_tokens.back(); // fallback to EOF token } else { - actual = &_tokens[_pos]; + actual = &_tokens[_pos]; // offending token } + // Build and throw detailed error throw UnexpectedToken(formatError("Expected " + debugTokenType(expected) + ", but got " + debugTokenType(actual->type), actual->line, actual->column), getLineSnippet()); } - ++_pos; // Consume the token if it matched + ++_pos; // Consume token if matched } +/** + * @brief Ensures the current token matches one of several expected types. + * + * @details This method is used when a directive or grammar rule allows + * multiple token types in the same position (e.g., `STRING` or `IDENTIFIER`). + * It checks if the current token matches any type in @p types: + * + * - If a match is found, the token is consumed via `advance()` and returned. + * - If none match, an `UnexpectedToken` exception is thrown with a + * descriptive error message listing all expected types and the actual one. + * + * @param types List of acceptable token types (initializer list). + * @param context Human-readable context for error reporting. + * + * @return The matched token, already consumed from the stream. + * + * @throws UnexpectedToken If the current token does not match any expected type. + * + * @ingroup config_parser + */ Token ConfigParser::expectOneOf(std::initializer_list types, const std::string& context) { - TokenType actual = current().type; + TokenType actual = current().type; // Current token type - // Check if the current token matches any of the expected types + // Try matching against the expected set for (TokenType expected : types) { if (actual == expected) { - return advance(); // If matched, consume and return it + return advance(); // Consume & return if found } } - // If no match, build an error message listing all expected types + // Build detailed error message if no match std::ostringstream msg; msg << "Expected "; for (auto it = types.begin(); it != types.end(); ++it) { if (it != types.begin()) { - msg << " or "; + msg << " or "; // Separate multiple expected types } - msg << debugTokenType(*it); + msg << debugTokenType(*it); // Append readable token type } msg << " for " << context << ", but got " << debugTokenType(actual); - // Throw a detailed syntax error with contextual highlighting + // Throw with contextual line snippet throw UnexpectedToken(formatError(msg.str(), current().line, current().column), getLineSnippet()); } +/** + * @brief Collects a list of argument values following a directive. + * + * @details This function consumes tokens that belong to the directive's argument list. + * It accepts only tokens of specific types (`STRING`, `NUMBER`, `IDENTIFIER`), + * provided in the @p validTypes span. Arguments may be separated by commas. + * Example: + * ``` + * root /var/www; + * methods GET, POST, DELETE; + * ``` + * + * - Stops parsing when a token does not match a valid argument type. + * - Supports comma-separated lists of arguments. + * - Throws if a comma is not followed by a valid argument. + * + * @param validTypes A span of allowed token types for directive arguments. + * + * @return A vector of string values representing the collected arguments. + * + * @throws SyntaxError If a comma is not followed by a valid argument type. + * + * @ingroup config_parser + */ std::vector ConfigParser::collectArgs(std::span validTypes) { - std::vector values; + std::vector values; // Accumulate argument values here + // Continue as long as we are not at end-of-file while (!isAtEnd()) { TokenType t = current().type; + // If current token type is not among the allowed ones, stop collecting if (std::find(validTypes.begin(), validTypes.end(), t) == validTypes.end()) break; + // Append the argument value (string/number/identifier) values.push_back(current().value); - advance(); + advance(); // Move to the next token + // Handle optional comma-separated arguments while (match(TokenType::COMMA)) { + // A comma must be followed by another valid argument if (isAtEnd() || std::find(validTypes.begin(), validTypes.end(), current().type) == validTypes.end()) { throw SyntaxError( formatError("Expected value after comma", current().line, current().column), getLineSnippet()); } + + // Append the value after the comma values.push_back(current().value); - advance(); + advance(); // Consume argument token } } - return values; + return values; // Return the collected argument list } -///////////////////// -// --- Error Context +//=== Error Context ======================================================= +/** + * @brief Extracts the full source line of the last consumed token. + * + * @return Source line for diagnostic display. + * @ingroup config_parsing + */ std::string ConfigParser::getLineSnippet() const { - // Extract the full line of source text where the previous token is located return _tokenizer.extractLine(previous().offset); } diff --git a/src/config/parser/directive_handler_table.cpp b/src/config/parser/directive_handler_table.cpp index b914ca5c..98fb57a2 100644 --- a/src/config/parser/directive_handler_table.cpp +++ b/src/config/parser/directive_handler_table.cpp @@ -3,13 +3,32 @@ /* ::: :::::::: */ /* directive_handler_table.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/08 17:14:27 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:10:57 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:50:27 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file directive_handler_table.cpp + * @brief Implements directive handler maps for `server` and `location` blocks. + * + * @details Defines the concrete handler tables returned by + * directive::serverHandlers and directive::locationHandlers. + * Each handler validates its arguments, reports precise diagnostics + * (line/column + contextual snippet), and mutates the target + * configuration object (Server or Location). + * + * This file also contains small internal helpers used by handlers: + * - `resolveToAbsolute()` — canonicalizes filesystem paths. + * - `requireArgCount()` / `requireMinArgCount()` — arity checks. + * - `validateIPv4Address()` — IPv4 format validation (with `localhost` allowed). + * - `validateCgiExtension()` — enforces dot-prefixed alnum extensions. + * + * @ingroup config_parsing + */ + #include "config/parser/directive_handler_table.hpp" #include "config/parser/ConfigParseError.hpp" // for SyntaxError #include "core/Location.hpp" // for Location @@ -25,15 +44,55 @@ namespace directive { +/** + * @brief Resolves a filesystem path to its absolute, normalized form. + * + * @details Converts the given raw path string into an absolute path + * using `std::filesystem::absolute()`, then applies lexical + * normalization (removing redundant `.` and `..` components + * and duplicate separators). This ensures the result is a + * canonicalized path string safe for use in configuration + * directives (e.g., `root`, `upload_store`). + * + * @param rawPath The raw input path (may be relative or contain `.`/`..`). + * @return A string representing the absolute, lexically normalized path. + * + * @throw std::filesystem::filesystem_error If the path cannot be resolved + * due to invalid characters or inaccessible filesystem. + * + * @ingroup config_parser + */ static std::string resolveToAbsolute(const std::string& rawPath) { std::filesystem::path abs = std::filesystem::absolute(rawPath); return abs.lexically_normal().string(); } +/** + * @brief Ensures that a directive has exactly the expected number of arguments. + * + * @details Used during configuration parsing to validate that a directive + * (e.g., `listen`, `root`, `client_max_body_size`) is provided with + * the correct number of arguments. If the count does not match + * the expectation, a `SyntaxError` is thrown with contextual + * information about the directive, line, column, and source snippet. + * + * @param args The list of argument strings parsed for the directive. + * @param expected The exact number of arguments required. + * @param directive The name of the directive being validated. + * @param line The line number in the config file where the directive appears. + * @param column The column number in the config file where the directive appears. + * @param ctx A snippet of the surrounding source text for error reporting. + * + * @throw SyntaxError If the directive does not have exactly `expected` arguments. + * + * @ingroup config_parser + */ static void requireArgCount(const std::vector& args, std::size_t expected, const std::string& directive, int line, int column, const std::string& ctx) { + // Check if the argument count matches the expected number if (args.size() != expected) { + // If not, throw a SyntaxError with detailed context throw SyntaxError(formatError("Directive '" + directive + "' takes exactly " + std::to_string(expected) + " argument(s), but got " + std::to_string(args.size()), @@ -42,10 +101,32 @@ static void requireArgCount(const std::vector& args, std::size_t ex } } +/** + * @brief Ensures that a directive has at least the required number of arguments. + * + * @details Used during configuration parsing to validate directives that + * accept a variable number of arguments (e.g., `server_name`, + * `error_page`, `index`). If the number of arguments is below + * the minimum threshold, a `SyntaxError` is thrown with detailed + * information about the directive, its location, and the input context. + * + * @param args The list of arguments parsed for the directive. + * @param min The minimum number of arguments required. + * @param directive The name of the directive being validated. + * @param line The line number in the config file where the directive appears. + * @param column The column number in the config file where the directive appears. + * @param ctx A snippet of the surrounding config source text for context. + * + * @throw SyntaxError If the directive has fewer than `min` arguments. + * + * @ingroup config_parser + */ static void requireMinArgCount(const std::vector& args, std::size_t min, const std::string& directive, int line, int column, const std::string& ctx) { + // Check if number of parsed arguments is below the minimum if (args.size() < min) { + // Throw a SyntaxError with detailed error message and context throw SyntaxError(formatError("Directive '" + directive + "' requires at least " + std::to_string(min) + " argument(s), but got " + std::to_string(args.size()), @@ -54,29 +135,58 @@ static void requireMinArgCount(const std::vector& args, std::size_t } } +/** + * @brief Validates that a given string is a valid IPv4 address. + * + * @details This function ensures that the provided `ip` string is either + * `"localhost"` (special case allowed) or a well-formed IPv4 address + * in dotted-decimal notation (e.g., `127.0.0.1`, `192.168.1.42`). + * It splits the input into octets, parses each into an integer, and + * validates that: + * - There are exactly 4 octets. + * - Each octet is between 0 and 255. + * - No extra segments exist. + * + * @param ip The IP address string to validate. + * @param line Line number in the config file where the directive appears. + * @param column Column number in the config file where the directive appears. + * @param context_provider Lazy-evaluated function returning a source context snippet. + * + * @throw SyntaxError If the IP address is malformed (wrong number of octets, + * non-numeric values, or values outside 0–255). + * + * @ingroup config_parser + */ static void validateIPv4Address(const std::string& ip, int line, int column, const std::function& context_provider) { std::istringstream iss(ip); std::string segment; int count = 0; + // Special case: allow "localhost" without further validation if (ip == "localhost") return; + // Split the string by '.' and validate each segment while (std::getline(iss, segment, '.')) { if (++count > 4) { + // More than 4 parts → invalid IPv4 throw SyntaxError(formatError("Too many octets in IP address: " + ip, line, column), context_provider()); } + // Convert segment to integer, throws if non-numeric int octet = parseInt(segment, "host", line, column, context_provider); + // Check range validity (0–255) if (octet < 0 || octet > 255) { throw SyntaxError( formatError("Invalid IP octet '" + segment + "' in host: " + ip, line, column), context_provider()); } } + + // Must have exactly 4 octets if (count != 4) { throw SyntaxError( formatError("Invalid IP address format (expected 4 octets): " + ip, line, column), @@ -84,13 +194,34 @@ static void validateIPv4Address(const std::string& ip, int line, int column, } } +/** + * @brief Validates the syntax of a CGI file extension. + * + * @details This function ensures that a given CGI extension string is valid. + * A valid extension must: + * - Be non-empty. + * - Begin with a dot (`.`). + * - Contain only alphanumeric characters after the dot. + * - Not be just `"."` with no following characters. + * + * @param ext The extension string to validate (e.g., ".php"). + * @param line Line number in the config file for error reporting. + * @param column Column number in the config file for error reporting. + * @param context_provider Lazy-evaluated function returning a source context snippet. + * + * @throw SyntaxError If the extension is malformed or contains invalid characters. + * + * @ingroup config_parser + */ static void validateCgiExtension(const std::string& ext, int line, int column, const std::function& context_provider) { + // Must not be empty, a lone '.', and must start with a dot if (ext.empty() || ext == "." || ext[0] != '.') { throw SyntaxError(formatError("Invalid CGI extension: '" + ext + "'", line, column), context_provider()); } + // Check that all characters after '.' are alphanumeric for (std::size_t i = 1; i < ext.size(); ++i) { char c = ext[i]; if (!std::isalnum(c)) { @@ -101,70 +232,157 @@ static void validateCgiExtension(const std::string& ext, int line, int column, } } +/** + * @brief Returns the mapping of server-level directives to their handler functions. + * + * @details + * This function builds (once, statically) an `unordered_map` that associates each + * directive keyword valid inside a `server { ... }` block with a corresponding + * validation and handler lambda. Each handler is responsible for: + * - Checking argument count and validity. + * - Validating the argument format (IP, port, byte sizes, etc.). + * - Updating the `Server` object with the parsed value. + * + * Supported directives: + * - **listen ;** + * Validates port number (0–65535) and stores it in the server object. + * - **host ;** + * Validates IPv4 address (or "localhost") and assigns it to the server. + * - **server_name ;** + * Accepts one or more hostnames; each is registered as a valid server name. + * - **client_max_body_size ;** + * Validates a byte-size expression (e.g., "10M") and sets the upload limit. + * - **error_page ;** + * Accepts one or more HTTP error codes followed by a URI. + * Maps each error code to the provided URI. + * + * @return A reference to the immutable directive-to-handler map. + * + * @ingroup config_parser + */ const std::unordered_map& serverHandlers() { + // Static so the map is created once and reused on subsequent calls. static const std::unordered_map map = { + // ─────────────────────────────── + // `listen ;` + // Validates that the port number is within range [0, 65535]. {"listen", [](Server& s, const auto& v, int line, int column, const std::string& ctx) { - requireArgCount(v, 1, "listen", line, column, ctx); + requireArgCount(v, 1, "listen", line, column, ctx); // must have exactly one argument int port = parseInt(v[0], "listen", line, column, [&]() { return ctx; }); if (port < 0 || port > 65535) { throw SyntaxError( formatError("Port number out of valid range (0-65535): " + v[0], line, column), ctx); } - s.setPort(port); + s.setPort(port); // assign validated port to server }}, + + // ─────────────────────────────── + // `host ;` + // Validates IPv4 address (or "localhost"). {"host", [](Server& s, const auto& v, int line, int column, const std::string& ctx) { - requireArgCount(v, 1, "host", line, column, ctx); + requireArgCount(v, 1, "host", line, column, ctx); // must have exactly one argument const std::string& ip = v[0]; - validateIPv4Address(ip, line, column, [&]() { return ctx; }); - s.setHost(ip); + validateIPv4Address(ip, line, column, [&]() { return ctx; }); // ensure valid format + s.setHost(ip); // store host in server }}, + + // ─────────────────────────────── + // `server_name ;` + // Allows one or more server names (aliases). {"server_name", [](Server& s, const auto& v, int line, int column, const std::string& ctx) { - requireMinArgCount(v, 1, "server_name", line, column, ctx); + requireMinArgCount(v, 1, "server_name", line, column, ctx); // at least one name for (const auto& name : v) { - s.addServerName(name); + s.addServerName(name); // register each alias } }}, + // ─────────────────────────────── + // `client_max_body_size ;` + // Validates a byte-size string (e.g., "10M") and sets upload limit. {"client_max_body_size", [](Server& s, const auto& v, int line, int column, const std::string& ctx) { - requireArgCount(v, 1, "client_max_body_size", line, column, ctx); - s.setClientMaxBodySize( - parseByteSize(v[0], "client_max_body_size", line, column, [&]() { return ctx; })); + requireArgCount(v, 1, "client_max_body_size", line, column, ctx); // one argument + s.setClientMaxBodySize(parseByteSize(v[0], "client_max_body_size", line, column, + [&]() { return ctx; })); // convert and set }}, + + // ─────────────────────────────── + // `error_page ;` + // Maps one or more error codes to a redirect URI. {"error_page", [](Server& s, const auto& v, int line, int column, const std::string& ctx) { - requireMinArgCount(v, 2, "error_page", line, column, ctx); - std::string uri = v.back(); + requireMinArgCount(v, 2, "error_page", line, column, ctx); // at least one code + URI + std::string uri = v.back(); // last argument is the URI for (std::size_t i = 0; i + 1 < v.size(); ++i) { int code = parseInt(v[i], "error_page", line, column, [&]() { return ctx; }); - s.setErrorPage(code, uri); + s.setErrorPage(code, uri); // map each code to the URI } }}, }; + return map; } +/** + * @brief Returns the mapping of location-level directives to their handler functions. + * + * @details + * This function builds (once, statically) an `unordered_map` that associates each + * directive keyword valid inside a `location { ... }` block with its validation + * and handler lambda. Each handler checks argument counts, validates syntax, + * and updates the `Location` object accordingly. + * + * Supported directives: + * - **root ;** + * Sets the filesystem root for this location (absolute, normalized path). + * - **index ;** + * Specifies one or more default index files (supports comma-separated values). + * - **autoindex on|off;** + * Enables or disables directory listing. + * - **methods ;** + * Restricts allowed HTTP methods (GET, POST, DELETE). + * - **upload_store ;** + * Defines the directory for file uploads (absolute, normalized path). + * - **cgi_extension <.ext...>;** + * Lists extensions handled by CGI (e.g., `.php`, `.py`). + * - **cgi_interpreter <.ext> ;** + * Associates a CGI extension with an executable interpreter. + * - **return ;** + * Configures a redirect with HTTP status code. + * + * @return A reference to the immutable directive-to-handler map. + * + * @ingroup config_parser + */ const std::unordered_map& locationHandlers() { static const std::unordered_map map = { + // ─────────────────────────────── + // `root ;` + // Sets the root directory for this location. {"root", [](Location& loc, const auto& v, int line, int column, const std::string& ctx) { - requireArgCount(v, 1, "root", line, column, ctx); - loc.setRoot(resolveToAbsolute(v[0])); + requireArgCount(v, 1, "root", line, column, ctx); // must have exactly one path + loc.setRoot(resolveToAbsolute(v[0])); // normalize to absolute path }}, + + // ─────────────────────────────── + // `index ;` + // Defines one or more index files (comma-separated allowed). {"index", [](Location& loc, const auto& args, int line, int column, const std::string& ctx) { - requireMinArgCount(args, 1, "index", line, column, ctx); + requireMinArgCount(args, 1, "index", line, column, ctx); // at least one file for (const std::string& raw : args) { size_t start = 0, end; + // Split on commas while ((end = raw.find(',', start)) != std::string::npos) { std::string idx = raw.substr(start, end - start); if (!idx.empty()) - loc.addIndexFile(idx); + loc.addIndexFile(idx); // add each file start = end + 1; } std::string idx = raw.substr(start); @@ -172,6 +390,10 @@ const std::unordered_map& locationHandlers() { loc.addIndexFile(idx); } }}, + + // ─────────────────────────────── + // `autoindex on|off;` + // Enables or disables directory listing. {"autoindex", [](Location& loc, const auto& v, int line, int column, const std::string& ctx) { requireArgCount(v, 1, "autoindex", line, column, ctx); @@ -184,6 +406,10 @@ const std::unordered_map& locationHandlers() { formatError("Invalid value for 'autoindex': " + v[0], line, column), ctx); } }}, + + // ─────────────────────────────── + // `methods ;` + // Restricts allowed HTTP methods (must be GET, POST, or DELETE). {"methods", [](Location& loc, const auto& v, int line, int column, const std::string& ctx) { if (v.empty()) { @@ -197,19 +423,28 @@ const std::unordered_map& locationHandlers() { if (!valid_methods.count(m)) { throw SyntaxError(formatError("Invalid HTTP method: " + m, line, column), ctx); } - loc.addMethod(m); + loc.addMethod(m); // store validated method } }}, + + // ─────────────────────────────── + // `upload_store ;` + // Sets the directory where file uploads will be saved. {"upload_store", [](Location& loc, const auto& v, int line, int column, const std::string& ctx) { requireArgCount(v, 1, "upload_store", line, column, ctx); - loc.setUploadStore(resolveToAbsolute(v[0])); + loc.setUploadStore(resolveToAbsolute(v[0])); // normalize path }}, + + // ─────────────────────────────── + // `cgi_extension <.ext...>;` + // Declares CGI file extensions (e.g., `.php`). {"cgi_extension", [](Location& loc, const auto& args, int line, int column, const std::string& ctx) { requireMinArgCount(args, 1, "cgi_extension", line, column, ctx); for (const std::string& raw : args) { size_t start = 0, end; + // Split multiple extensions separated by commas while ((end = raw.find(',', start)) != std::string::npos) { std::string ext = raw.substr(start, end - start); if (!ext.empty()) { @@ -225,12 +460,17 @@ const std::unordered_map& locationHandlers() { } } }}, + + // ─────────────────────────────── + // `cgi_interpreter <.ext> ;` + // Maps a CGI extension to its interpreter binary. {"cgi_interpreter", [](Location& loc, const auto& v, int line, int column, const std::string& ctx) { - requireArgCount(v, 2, "cgi_interpreter", line, column, ctx); + requireArgCount(v, 2, "cgi_interpreter", line, column, ctx); // ext + path required const std::string& ext = v[0]; const std::string& path = v[1]; - validateCgiExtension(ext, line, column, [&]() { return ctx; }); + + validateCgiExtension(ext, line, column, [&]() { return ctx; }); // must be valid ext if (!std::filesystem::is_regular_file(path) || access(path.c_str(), X_OK) != 0) { throw SyntaxError( formatError("Interpreter not executable or not found: " + path, line, column), @@ -242,15 +482,20 @@ const std::unordered_map& locationHandlers() { column), ctx); } - loc.addCgiInterpreter(ext, path); + loc.addCgiInterpreter(ext, path); // store extension → interpreter mapping }}, + + // ─────────────────────────────── + // `return ;` + // Defines a redirect (e.g., `return 301 /newpath;`). {"return", [](Location& loc, const auto& v, int line, int column, const std::string& ctx) { requireArgCount(v, 2, "return", line, column, ctx); int code = parseInt(v[0], "return", line, column, [&]() { return ctx; }); - loc.setRedirect(v[1], code); + loc.setRedirect(v[1], code); // assign redirect URI + status code }}, }; + return map; } diff --git a/src/config/tokenizer/Tokenizer.cpp b/src/config/tokenizer/Tokenizer.cpp index 7ba3e625..7b84c34a 100644 --- a/src/config/tokenizer/Tokenizer.cpp +++ b/src/config/tokenizer/Tokenizer.cpp @@ -3,13 +3,25 @@ /* ::: :::::::: */ /* Tokenizer.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/03 01:06:09 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:14:38 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:51:02 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file Tokenizer.cpp + * @brief Implements the Tokenizer used for config lexical analysis. + * + * @details Defines all methods of the Tokenizer, including cursor management, + * classification helpers, high-level parsers (identifiers, numbers, + * strings), comment/whitespace skipping, token dispatch, and token + * creation with accurate source locations. + * + * @ingroup config_tokenizing + */ + #include "config/tokenizer/Tokenizer.hpp" #include "config/parser/ConfigParseError.hpp" // for TokenizerError #include "config/tokenizer/token.hpp" // for Token, TokenType @@ -19,15 +31,32 @@ #include // for unordered_map, operator== #include // for move, pair -//////////////////////////////// -// --- Constructors - +//=== Construction & Special Members ===================================== + +/** + * @brief Constructs a Tokenizer over an input string. + * + * @details Initializes cursor position and source coordinates to the start of + * the input buffer. + * + * @param input Raw configuration text to tokenize. Ownership is moved in. + * @ingroup config_tokenizing + */ Tokenizer::Tokenizer(std::string input) : _input(std::move(input)), _pos(0), _line(1), _column(1) { } -//////////////// -// --- Main API - +//=== Main API ============================================================ + +/** + * @brief Converts the input buffer into a sequence of tokens. + * + * @details Skips BOM, whitespace, and comments; emits recognized tokens and a + * final END_OF_FILE token. Pre-reserves capacity for fewer reallocs. + * + * @return Vector of tokens in source order (always ends with END_OF_FILE). + * @throws TokenizerError On malformed constructs (e.g., bad strings/suffixes). + * @ingroup config_tokenizing + */ std::vector Tokenizer::tokenize() { skipUtf8BOM(); // Skip UTF-8 BOM if present at the beginning @@ -45,14 +74,26 @@ std::vector Tokenizer::tokenize() { return _tokens; } -///////////////////////// -// --- Core Cursor Logic +//=== Core Cursor Logic =================================================== +/** + * @brief Tests if the cursor reached the end of the input. + * + * @return `true` when no more characters are available. + * @ingroup config_tokenizing + */ bool Tokenizer::isAtEnd() const noexcept { // True if the cursor has reached or passed end of input return _pos >= _input.size(); } +/** + * @brief Consumes a specific character if present. + * + * @param expected Character to match. + * @return `true` if consumed; `false` otherwise. + * @ingroup config_tokenizing + */ bool Tokenizer::match(char expected) noexcept { // Return false if at end or current char does not match if (isAtEnd() || _input[_pos] != expected) { @@ -64,11 +105,23 @@ bool Tokenizer::match(char expected) noexcept { return true; } +/** + * @brief Peeks the current character without consuming it. + * + * @return Current character (undefined if at end). + * @ingroup config_tokenizing + */ unsigned char Tokenizer::peek() const noexcept { // Return current character without consuming it return static_cast(_input[_pos]); } +/** + * @brief Peeks the next character without consuming it. + * + * @return Next character, or `'\0'` at end. + * @ingroup config_tokenizing + */ unsigned char Tokenizer::peekNext() const noexcept { if (isAtEnd()) { return '\0'; @@ -76,6 +129,15 @@ unsigned char Tokenizer::peekNext() const noexcept { return static_cast(_input[_pos + 1]); } +/** + * @brief Advances by one character, updating line/column. + * + * @details Increments line and resets column on newline; otherwise increments + * column. + * + * @return The consumed character. + * @ingroup config_tokenizing + */ unsigned char Tokenizer::advance() noexcept { char c = _input[_pos++]; // Consume current character and move cursor forward if (c == '\n') { @@ -87,24 +149,41 @@ unsigned char Tokenizer::advance() noexcept { return c; // Return the consumed character } -//////////////////////////// -// --- Classification Logic +//=== Classification Logic =============================================== +/** + * @brief Checks if a byte is a valid identifier start. + * + * @param c Character to test. + * @return `true` if valid start. + * @ingroup config_tokenizing + */ inline bool Tokenizer::isIdentifierStart(unsigned char c) const { // Valid first char for identifiers return std::isalpha(c) || c == '_' || c == '/' || c == '.' || c == '-' || c == ':'; } +/** + * @brief Checks if a byte is a valid identifier continuation. + * + * @param c Character to test. + * @return `true` if valid continuation. + * @ingroup config_tokenizing + */ inline bool Tokenizer::isIdentifierChar(unsigned char c) const { // Valid body char for identifiers return std::isalnum(c) || c == '_' || c == '/' || c == '.' || c == '-' || c == ':'; } -////////////////////////// -// --- High-Level Parsers +//=== High-Level Parsers ================================================== -//////////////////////////////// -// --- Skip BOM +/** + * @brief Skips a UTF-8 Byte Order Mark at input start. + * + * @details If present, advances cursor past `0xEF 0xBB 0xBF`. + * + * @ingroup config_tokenizing + */ void Tokenizer::skipUtf8BOM() { static const std::string BOM = "\xEF\xBB\xBF"; // UTF-8 Byte Order Mark if (_input.compare(0, BOM.size(), BOM) == 0) { @@ -112,8 +191,14 @@ void Tokenizer::skipUtf8BOM() { } } -////////////////////////////////// -// --- Skip Whitespace & Comments +/** + * @brief Skips whitespace and hash (`#`) comments. + * + * @details Treats CR/LF/newlines and isspace() as whitespace. Hash comments run + * until end-of-line. + * + * @ingroup config_tokenizing + */ void Tokenizer::skipWhitespaceAndComments() { while (!isAtEnd()) { unsigned char c = peek(); @@ -131,9 +216,14 @@ void Tokenizer::skipWhitespaceAndComments() { } } } -/////////////////////////////////// -// ---Identifier & Keyword Parsing +/** + * @brief Resolves an identifier to a keyword token type when applicable. + * + * @param word Candidate identifier (case-insensitive). + * @return The keyword's TokenType or IDENTIFIER. + * @ingroup config_tokenizing + */ TokenType Tokenizer::resolveKeywordType(const std::string& word) { // Static map of all recognized configuration keywords (lowercase only) static const std::unordered_map keywords = { @@ -168,6 +258,13 @@ TokenType Tokenizer::resolveKeywordType(const std::string& word) { return TokenType::IDENTIFIER; } +/** + * @brief Scans an identifier body after a valid start. + * + * @details Consumes identifier characters until a non-identifier byte. + * + * @ingroup config_tokenizing + */ void Tokenizer::scanIdentifier() { // Assumes the current character is a valid identifier start (checked beforehand) while (!isAtEnd() && isIdentifierChar(peek())) { @@ -175,6 +272,13 @@ void Tokenizer::scanIdentifier() { } } +/** + * @brief Validates the last scanned identifier. + * + * @param start Byte offset where the identifier started. + * @throws TokenizerError If empty, contains '$', or non-printable chars. + * @ingroup config_tokenizing + */ void Tokenizer::validateIdentifier(std::size_t start) { // Reject empty identifiers if (_pos == start) { @@ -210,6 +314,12 @@ void Tokenizer::validateIdentifier(std::size_t start) { } } +/** + * @brief Parses an identifier and resolves to keyword when applicable. + * + * @return Token of type IDENTIFIER or specific KEYWORD_*. + * @ingroup config_tokenizing + */ Token Tokenizer::parseIdentifierOrKeyword() { std::size_t start = _pos; // Record start position of the identifier scanIdentifier(); // Consume all valid identifier characters @@ -219,8 +329,13 @@ Token Tokenizer::parseIdentifierOrKeyword() { return makeToken(type, word); } -///////////////////////////// -// --- Number & Unit Parsing +//=== Number & Unit Parsing ============================================== + +/** + * @brief Consumes consecutive decimal digits. + * + * @ingroup config_tokenizing + */ void Tokenizer::scanDigits() { // Consume characters as long as they are digits (0–9) while (!isAtEnd() && std::isdigit(peek())) { @@ -228,6 +343,15 @@ void Tokenizer::scanDigits() { } } +/** + * @brief Optionally consumes a single-letter size suffix. + * + * @details Accepts exactly one alphabetic suffix (e.g., k/m/g). Rejects + * multi-letter forms (e.g., "mb", "MiB"). + * + * @throws TokenizerError On multi-letter suffix usage. + * @ingroup config_tokenizing + */ void Tokenizer::scanOptionalUnitSuffix() { if (isAtEnd()) return; // Nothing to scan @@ -246,6 +370,12 @@ void Tokenizer::scanOptionalUnitSuffix() { } } +/** + * @brief Parses a NUMBER token with optional single-letter suffix. + * + * @return NUMBER token constructed from the scanned span. + * @ingroup config_tokenizing + */ Token Tokenizer::parseNumberOrUnit() { std::size_t start = _pos; // Record start of numeric token @@ -256,14 +386,31 @@ Token Tokenizer::parseNumberOrUnit() { return makeToken(TokenType::NUMBER, _input.substr(start, _pos - start)); } -////////////////////// -// --- String Parsing +//=== String Parsing ====================================================== + +/** + * @brief Throws a formatted unterminated string error. + * + * @param reason Short explanation (e.g., "unexpected newline"). + * @throws TokenizerError Always; provides formatted context line. + * @ingroup config_tokenizing + */ void Tokenizer::throwUnterminatedString(const std::string& reason) { // Throw a TokenizerError with a contextual reason and source line throw TokenizerError(formatError("Invalid string literal (" + reason + ")", _line, _column), extractLine(_pos)); } +/** + * @brief Parses a quoted string literal. + * + * @details Supports both single and double quotes. Enforces a 64 KiB maximum + * payload and forbids embedded newlines. + * + * @return STRING token containing the unescaped contents. + * @throws TokenizerError On newline, overflow, or EOF before closing quote. + * @ingroup config_tokenizing + */ Token Tokenizer::parseStringLiteral() { static const std::size_t MAX_STRING_LITERAL_LENGTH = 64 * 1024; // 64 KiB @@ -296,24 +443,39 @@ Token Tokenizer::parseStringLiteral() { return Token(TokenType::STRING, "", _line, _column, 0); // Unreachable } -//////////////////////////////////// -// --- Whitespace & Comment Helpers +//=== Whitespace & Comment Helpers ======================================= +/** + * @brief Skips a carriage return (`'\\r'`). + * @ingroup config_tokenizing + */ void Tokenizer::skipCR() { ++_pos; } +/** + * @brief Skips a newline and advances line counter. + * @ingroup config_tokenizing + */ void Tokenizer::skipNewline() { ++_pos; ++_line; _column = 1; } +/** + * @brief Skips a single whitespace character (except newline). + * @ingroup config_tokenizing + */ void Tokenizer::skipOtherWhitespace() { ++_pos; ++_column; } +/** + * @brief Skips a `#`-style comment to the end of the line. + * @ingroup config_tokenizing + */ void Tokenizer::skipHashComment() { ++_pos; ++_column; @@ -323,8 +485,16 @@ void Tokenizer::skipHashComment() { } } -////////////////////// -// --- Token Dispatch +//=== Token Dispatch ====================================================== + +/** + * @brief Heuristically checks if the remaining slice looks like an IPv4 address. + * + * @details Counts dots while digits continue; returns true for exactly 3 dots. + * + * @return `true` if the upcoming token resembles `a.b.c.d`. + * @ingroup config_tokenizing + */ bool Tokenizer::looksLikeIpAddress() const { std::size_t len = _input.size(); std::size_t i = _pos, dots = 0; @@ -342,10 +512,19 @@ bool Tokenizer::looksLikeIpAddress() const { } } - // A valid IPv4 address has at least two dots (e.g., 127.0.0.1) + // A valid IPv4 address has at least two dots (e.g. 127.0.0.1) return dots == 3; } +/** + * @brief Reads and emits the next token from the input. + * + * @details Dispatches to number/identifier/string parsers or emits punctuation + * tokens. Raises on unknown characters. + * + * @throws TokenizerError On illegal bytes or malformed lexemes. + * @ingroup config_tokenizing + */ void Tokenizer::dispatchToken() { unsigned char c = peek(); @@ -354,7 +533,7 @@ void Tokenizer::dispatchToken() { unsigned char next = peekNext(); // If it looks like an IPv4 address (e.g. 127.0.0.1), or - // if the next character is *not* a digit but *is* a valid identifier char + // if the next character is not a digit but is a valid identifier char // ("1index.html" or "/api/v2a3"), then parse as an identifier/keyword if (looksLikeIpAddress() || (!std::isdigit(next) && isIdentifierChar(next))) { _tokens.push_back(parseIdentifierOrKeyword()); @@ -400,8 +579,16 @@ void Tokenizer::dispatchToken() { } } -//////////////////////////////////// -// --- Token Creation & Source Info +//=== Token Creation & Source Info ======================================= + +/** + * @brief Creates a token with current source coordinates. + * + * @param type Token type to emit. + * @param value Token payload (unescaped for strings). + * @return Token with line/column and byte offset. + * @ingroup config_tokenizing + */ Token Tokenizer::makeToken(TokenType type, const std::string& value) const { // Estimate starting column based on current column and token length int col = _column - static_cast(value.length()); @@ -413,6 +600,13 @@ Token Tokenizer::makeToken(TokenType type, const std::string& value) const { return Token(type, value, _line, col, off); } +/** + * @brief Extracts the full source line containing a given byte offset. + * + * @param offset Byte offset into the input buffer. + * @return The line text (without trailing newline). + * @ingroup config_tokenizing + */ std::string Tokenizer::extractLine(std::size_t offset) const { // Find the last newline before (or at) the offset to locate the start of the line std::size_t start = _input.rfind('\n', offset); diff --git a/src/config/tokenizer/token.cpp b/src/config/tokenizer/token.cpp index 00338087..a8bf5e76 100644 --- a/src/config/tokenizer/token.cpp +++ b/src/config/tokenizer/token.cpp @@ -5,19 +5,50 @@ /* +:+ +:+ +:+ */ /* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/05/04 00:19:57 by nlouis #+# #+# */ -/* Updated: 2025/05/20 23:40:05 by nlouis ### ########.fr */ +/* Created: 2025/08/18 11:58:00 by nlouis #+# #+# */ +/* Updated: 2025/08/18 11:58:59 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file token.cpp + * @brief Implements Token helpers for the config lexer. + * + * @details Provides the Token constructor and lightweight debug helpers used to + * print token kinds and annotated token strings for diagnostics. + * + * @ingroup config_tokenizing + */ + #include "config/tokenizer/token.hpp" #include +//=== Construction ======================================================== + +/** + * @brief Constructs a Token with type, value and source coordinates. + * + * @param t Token type. + * @param v Lexeme string (unescaped for STRING). + * @param l 1-based line number where the token begins. + * @param c 1-based column number where the token begins. + * @param off Byte offset into the original input. + * @ingroup config_tokenizing + */ Token::Token(TokenType t, const std::string& v, int l, int c, std::size_t off) : type(t), value(v), line(l), column(c), offset(off) { } +//=== Debug Helpers ======================================================= + +/** + * @brief Returns a compact string name for a TokenType. + * + * @param type Token category. + * @return Short constant name (e.g., `"IDENTIFIER"`, `"LBRACE"`). + * @ingroup config_tokenizing + */ std::string debugTokenType(TokenType type) { switch (type) { case TokenType::IDENTIFIER: @@ -32,6 +63,8 @@ std::string debugTokenType(TokenType type) { return "RBRACE"; case TokenType::SEMICOLON: return "SEMICOLON"; + case TokenType::COMMA: + return "COMMA"; case TokenType::END_OF_FILE: return "END_OF_FILE"; @@ -61,21 +94,28 @@ std::string debugTokenType(TokenType type) { return "KEYWORD_CLIENT_MAX_BODY_SIZE"; case TokenType::KEYWORD_CGI_EXTENSION: return "KEYWORD_CGI_EXTENSION"; - default: return "UNKNOWN"; } } +/** + * @brief Builds a human-readable token string (for logs and errors). + * + * @details Omits overly large payloads to avoid noisy logs. + * + * @param token Token to stringify. + * @return A string like: `[Token type=IDENTIFIER value="root" line=3 column=5]`. + * @ingroup config_tokenizing + */ std::string debugToken(const Token& token) { // Avoid logging large token payloads to prevent excessive output if (token.value.size() > 1024 * 1024) { - return "[Token ]"; // Return a placeholder for large tokens + return "[Token ]"; } std::ostringstream oss; - // Build the string representation of the token with its type, value, line, and column oss << "[Token type=" << debugTokenType(token.type) << " value=\"" << token.value << "\" line=" << token.line << " column=" << token.column << "]"; - return oss.str(); // Return the formatted token string + return oss.str(); } diff --git a/src/config/validateConfig.cpp b/src/config/validateConfig.cpp index 87925ea2..33c9986d 100644 --- a/src/config/validateConfig.cpp +++ b/src/config/validateConfig.cpp @@ -3,13 +3,43 @@ /* ::: :::::::: */ /* validateConfig.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/20 23:23:50 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:15:58 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:49:12 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file validateConfig.cpp + * @brief Implements validation logic for parsed configuration objects. + * + * @details This file defines the validation pipeline for the Webserv configuration + * after parsing and normalization. It enforces semantic correctness of + * `Server` and `Location` blocks, ensuring that directives are consistent, + * non-conflicting, and point to valid filesystem resources. + * + * The validation routines cover: + * - Presence of at least one location per server + * - Correct syntax and uniqueness of location paths + * - Required defaults (root or return directive) + * - Validity of `server_name` (format, duplicates, uniqueness per host:port) + * - Port and host binding conflicts + * - Error page code ranges + * - Redirect codes restricted to standard 3xx values + * - Allowed HTTP methods (`GET`, `POST`, `DELETE`) + * - Non-zero client body size limits + * - CGI extensions and interpreter mappings + * - Existence and type of root and upload directories + * - Index file usage with valid roots + * + * Together, these checks guarantee that the configuration is safe to + * use at runtime, preventing invalid states and improving user feedback + * with actionable error messages. + * + * @ingroup config_validation + */ + #include "config/validateConfig.hpp" #include "config/Config.hpp" // for Config #include "config/parser/ConfigParseError.hpp" // for ValidationError @@ -31,9 +61,25 @@ namespace { namespace fs = std::filesystem; +/** + * @brief Ensures that each server has at least one location block. + * + * @details Iterates through all parsed servers and checks whether they + * contain at least one `Location` directive. + * A server without locations cannot properly handle requests, + * so this function throws a `ValidationError` if any server is + * missing location blocks. + * + * @param servers List of parsed server configurations to validate. + * + * @throws ValidationError If a server has no associated location blocks. + * + * @ingroup config_validation + */ void validateHasLocation(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { if (servers[serverIndex].getLocations().empty()) { + // Invalid: No location blocks defined for this server throw ValidationError("Missing location blocks in server #" + std::to_string(serverIndex + 1) + "\n→ Add at least one 'location' block to handle requests"); @@ -41,6 +87,25 @@ void validateHasLocation(const std::vector& servers) { } } +/** + * @brief Validates location paths inside each server block. + * + * @details This function ensures that: + * - Every location path is non-empty and begins with a '/'. + * - No path segment starts with '.' (disallows '.', '..', hidden or malformed segments). + * - Each location path within a server is unique (no duplicates allowed). + * + * Violations result in a `ValidationError` with a detailed message. + * + * @param servers List of parsed server configurations to validate. + * + * @throws ValidationError If: + * - A location path is empty or does not start with '/', + * - A path segment begins with '.', + * - A duplicate location path is found within the same server. + * + * @ingroup config_validation + */ void validateLocationPaths(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; @@ -49,11 +114,13 @@ void validateLocationPaths(const std::vector& servers) { for (const Location& loc : server.getLocations()) { const std::string& path = loc.getPath(); + // Must be non-empty and start with '/' if (path.empty() || path[0] != '/') { throw ValidationError("Invalid location path '" + path + "' in server #" + std::to_string(serverIndex + 1) + "\n→ Must start with '/'"); } + // Validate each segment of the path (no '.'-prefixed parts) fs::path locPath(path); for (const auto& part : locPath) { const std::string seg = part.string(); @@ -65,6 +132,7 @@ void validateLocationPaths(const std::vector& servers) { } } + // Ensure path uniqueness within the same server if (!seenPaths.insert(path).second) { throw ValidationError("Duplicate location path '" + path + "' in server #" + std::to_string(serverIndex + 1)); @@ -73,6 +141,26 @@ void validateLocationPaths(const std::vector& servers) { } } +/** + * @brief Validates default requirements for each location block. + * + * @details This function enforces two critical invariants for location definitions: + * - Each location must define at least one of the following: + * - A `root` directive (filesystem path for content), + * - OR a `return` directive (HTTP redirection). + * - A location cannot define both CGI behavior (`cgi_extension`) and a redirection + * (`return`) at the same time, since these are mutually exclusive. + * + * Violations result in a `ValidationError` with descriptive guidance. + * + * @param servers List of parsed server configurations to validate. + * + * @throws ValidationError If: + * - A location block has neither `root` nor `return`, + * - A location block has both `return` and `cgi_extension` defined. + * + * @ingroup config_validation + */ void validateLocationDefaults(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; @@ -90,7 +178,7 @@ void validateLocationDefaults(const std::vector& servers) { "\n→ Each location must have at least a 'root' or a 'return'"); } - // Cannot combine CGI behavior and redirect + // Cannot combine CGI behavior with redirect if (location.hasRedirect() && !location.getCgiExtensions().empty()) { throw ValidationError( "Location '" + path + "' in server #" + std::to_string(serverIndex + 1) + @@ -101,7 +189,18 @@ void validateLocationDefaults(const std::vector& servers) { } } -// Utility: Split string by '.' +/** + * @brief Splits a domain name into its individual labels. + * + * @details A domain name like `"example.com"` is split into + * `{"example", "com"}`. The splitting uses `.` as the separator, + * and empty labels are preserved if consecutive dots appear. + * + * @param name Domain name string to split. + * @return A vector of labels (substrings between dots). + * + * @ingroup config_validation + */ static std::vector splitLabels(const std::string& name) { std::vector labels; std::size_t start = 0; @@ -115,7 +214,19 @@ static std::vector splitLabels(const std::string& name) { return labels; } -// Validate a single label (RFC 1035 rules) +/** + * @brief Validates a single domain label according to RFC 1035. + * + * @details Checks that: + * - Label is non-empty and no longer than 63 characters. + * - Does not begin or end with a hyphen (`-`). + * - Contains only alphanumeric characters (`a-z`, `A-Z`, `0-9`) or hyphens. + * + * @param label The label string to validate. + * @return `true` if the label is valid, `false` otherwise. + * + * @ingroup config_validation + */ bool isValidLabel(const std::string& label) { if (label.empty() || label.size() > 63) return false; @@ -128,7 +239,19 @@ bool isValidLabel(const std::string& label) { return true; } -// Full domain validation +/** + * @brief Validates a full server name against domain format rules. + * + * @details Enforces RFC 1035-style constraints: + * - Name must be non-empty and no longer than 253 characters. + * - Cannot contain consecutive dots (`..`), which would create empty labels. + * - Each label (between dots) must satisfy isValidLabel. + * + * @param name Full domain/server name to validate. + * @return `true` if the name follows domain rules, `false` otherwise. + * + * @ingroup config_validation + */ bool isServerNameValid(const std::string& name) { if (name.empty() || name.size() > 253) return false; @@ -145,18 +268,46 @@ bool isServerNameValid(const std::string& name) { return true; } +/** + * @brief Validates the format of all `server_name` directives across servers. + * + * @details Ensures that every declared `server_name` in the configuration + * adheres to domain naming conventions and does not contain invalid + * characters. Specifically, it checks: + * + * - Non-empty names: Empty strings are not allowed. + * - Printable characters only: No control characters (e.g., newlines, tabs). + * - No whitespace: Spaces are not valid inside domain names. + * - RFC 1035 compliance via isServerNameValid: + * - Maximum length 253 characters. + * - No empty labels (e.g. `"example..com"`). + * - Labels up to 63 characters, only alphanumeric and `-`. + * - No leading or trailing `-` in a label. + * + * If any of these conditions are violated, a ValidationError is thrown + * with a descriptive message indicating the invalid server name and the server + * block index where it was found. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if an invalid `server_name` is detected. + * + * @ingroup config_validation + */ void validateServerNameFormat(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const std::vector& serverNames = servers[serverIndex].getServerNames(); for (std::size_t nameIndex = 0; nameIndex < serverNames.size(); ++nameIndex) { const std::string& name = serverNames[nameIndex]; + // must not be empty if (name.empty()) { throw ValidationError("Empty server_name is not allowed in server #" + std::to_string(serverIndex + 1) + "\n→ Specify a non-empty string for server_name"); } + // must contain only printable characters for (std::size_t charIndex = 0; charIndex < name.size(); ++charIndex) { unsigned char c = static_cast(name[charIndex]); if (!std::isprint(c)) { @@ -168,6 +319,7 @@ void validateServerNameFormat(const std::vector& servers) { } } + // no whitespace allowed if (name.find(' ') != std::string::npos) { throw ValidationError( "Whitespace not allowed in server_name in server #" + @@ -175,18 +327,44 @@ void validateServerNameFormat(const std::vector& servers) { "'\n→ Use valid domain-like names without spaces or line breaks"); } + // must follow domain name rules (RFC 1035) if (!isServerNameValid(name)) { throw ValidationError( "Invalid domain format in server_name in server #" + std::to_string(serverIndex + 1) + ": '" + name + "'\n→ Must follow domain format (RFC 1035): labels may only contain a-z, 0-9, " - "dashes; " - "no empty labels, no leading/trailing dashes, and max 253 characters total"); + "dashes; no empty labels, no leading/trailing dashes, and max 253 characters " + "total"); } } } } +/** + * @brief Ensures uniqueness of `server_name` directives within each server block. + * + * @details This function validates that no single `server` block declares the same + * `server_name` more than once. While multiple distinct names are allowed + * (for virtual hosting and aliases), each must be unique within the same + * server definition. + * + * Validation rules: + * - A `server_name` must not appear more than once in the same server block. + * - Duplicate names within the same block trigger a @ref ValidationError. + * - Cross-server duplicates are allowed if they bind to different host:port + * combinations (checked separately in @ref validateUniquePorts). + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if a duplicate `server_name` is found inside the + * same server block. + * + * @ingroup config_validation + * + * @see validateServerNameFormat Validates syntax/format of each server_name. + * @see validateUniquePorts Ensures uniqueness of (host, port, server_name) + * tuples across servers. + */ void validateUniqueServerNames(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const std::vector& names = servers[serverIndex].getServerNames(); @@ -202,6 +380,31 @@ void validateUniqueServerNames(const std::vector& servers) { } } +/** + * @brief Ensures uniqueness of (host, port, server_name) bindings across servers. + * + * @details This function validates that no two servers conflict on the same + * `(host, port, server_name)` tuple, which would otherwise cause + * ambiguous routing at runtime. + * + * Validation rules: + * - Each `(host, port, server_name)` combination must be unique globally. + * - If a server has no `server_name`, it is considered the default for that + * `(host, port)` pair. Only one default server is allowed per host:port. + * - Duplicate `server_name` values across different servers are allowed only + * if they bind to different `(host, port)` pairs. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - Two or more servers define the same `(host, port, server_name)` combination. + * - More than one default server (no `server_name`) exists on the same host:port. + * + * @ingroup config_validation + * + * @see validateUniqueServerNames Ensures names are unique inside one server block. + * @see validateServerNameFormat Checks domain-like syntax of each server_name. + */ void validateUniquePorts(const std::vector& servers) { using HostPort = std::pair; using NameSet = std::unordered_set; @@ -235,6 +438,21 @@ void validateUniquePorts(const std::vector& servers) { } } +/** + * @brief Validates that all configured error_page directives use valid HTTP status codes. + * + * @details Iterates over each server’s `error_page` mappings and ensures that every + * status code falls within the HTTP error range **400–599**. + * These codes represent client (4xx) and server (5xx) errors only. + * Other codes (e.g., 200, 301) are not valid for error pages. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - Any error_page status code is outside the 400–599 range. + * + * @ingroup config_validation + */ void validateErrorPageCodes(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; @@ -254,6 +472,25 @@ void validateErrorPageCodes(const std::vector& servers) { } } +/** + * @brief Validates that all `return` directives use valid HTTP redirection codes. + * + * @details This function iterates through all locations of each server + * and checks the status codes defined in the `return` directive. + * Only the following redirection status codes are allowed: + * 301, 302, 303, 307, 308. + * + * These represent permanent or temporary redirects and are standard + * in HTTP/1.1. Any other code (e.g., 200, 404, 500) is invalid + * when used with `return`. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - A `return` directive uses a status code not in {301, 302, 303, 307, 308}. + * + * @ingroup config_validation + */ void validateRedirectCodes(const std::vector& servers) { static const std::array ALLOWED = {{301, 302, 303, 307, 308}}; for (std::size_t i = 0; i < servers.size(); ++i) { @@ -272,6 +509,24 @@ void validateRedirectCodes(const std::vector& servers) { } } +/** + * @brief Validates that all HTTP methods declared in `methods` directives are allowed. + * + * @details This function iterates through every location of each server and + * ensures that the configured HTTP methods are valid. + * Only the following methods are supported by this server: + * **GET, POST, DELETE**. + * + * If any other method (e.g., PUT, PATCH, OPTIONS, HEAD) is specified, + * the configuration is considered invalid and a ValidationError is thrown. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - A location declares an unsupported HTTP method. + * + * @ingroup config_validation + */ void validateAllowedMethods(const std::vector& servers) { static const std::set validMethods = {"GET", "POST", "DELETE"}; @@ -292,6 +547,24 @@ void validateAllowedMethods(const std::vector& servers) { } } +/** + * @brief Validates that `client_max_body_size` is set to a positive value. + * + * @details This function checks each server’s configured maximum request body size + * (`client_max_body_size`) to ensure it is not set to **zero**. + * A value of `0` would effectively disable all request bodies, + * which is considered an invalid configuration. + * + * The size is typically specified in bytes or with human-readable units + * like `k`, `m`, or `g` (e.g., `1m` = 1 megabyte). + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - `client_max_body_size` is set to `0`. + * + * @ingroup config_validation + */ void validateClientMaxBodySize(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; @@ -306,6 +579,28 @@ void validateClientMaxBodySize(const std::vector& servers) { } } +/** + * @brief Validates the format of CGI extensions in all server locations. + * + * @details Iterates over every server and its `location` blocks to ensure + * that configured CGI extensions follow a valid format: + * - Must **not** be empty. + * - Must begin with a dot (`.`). + * - Must contain at least one alphanumeric character after the dot + * (e.g., `.php`, `.py`, `.pl`). + * + * This ensures that CGI extensions are properly declared before they + * are mapped to interpreters using the `cgi_interpreter` directive. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - The extension is empty (`""`). + * - The extension is only `"."`. + * - The extension does not start with a dot (`.`). + * + * @ingroup config_validation + */ void validateCgiExtensions(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; @@ -327,6 +622,22 @@ void validateCgiExtensions(const std::vector& servers) { } } +/** + * @brief Validates `index` directives in all server locations. + * + * @details Iterates over every server and its `location` blocks to ensure + * that any declared `index` directive is meaningful. An index file + * (e.g., `index.html`) is only valid if the location also specifies + * a `root` directory, since the server must resolve the index file + * path relative to that root. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - A `location` declares an `index` directive but has no `root`. + * + * @ingroup config_validation + */ void validateIndexFiles(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; @@ -398,6 +709,23 @@ void validateIndexFiles(const std::vector& servers) { } } */ +/** + * @brief Validates consistency between `cgi_extension` and `cgi_interpreter` directives. + * + * @details Ensures that every declared CGI extension has a corresponding + * interpreter and that no extra interpreters exist for undeclared + * extensions. This prevents misconfigured mappings where requests + * could not be executed correctly or could point to unused binaries. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - A `cgi_extension` exists without a matching `cgi_interpreter`. + * - A `cgi_interpreter` is defined for an extension that is not listed in + * `cgi_extension` (orphaned mapping). + * + * @ingroup config_validation + */ void validateCgiInterpreters(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; @@ -434,31 +762,58 @@ void validateCgiInterpreters(const std::vector& servers) { } } +/** + * @brief Validates that all `root` directives point to existing directories. + * + * @details Iterates over all server locations and verifies that: + * - Locations without a `return` directive must have a `root`. + * - The specified root path must exist on the filesystem. + * - The path must be a directory (not a file or invalid path). + * + * This ensures that each location serving files has a valid, accessible root + * directory, preventing runtime errors due to missing or invalid paths. + * + * @param servers Vector of configured Server objects to validate. + * + * @throws ValidationError if: + * - A location missing a `return` directive has no `root`. + * - The specified root path does not exist. + * - The root path exists but is not a directory. + * + * @ingroup config_validation + */ void validateRootsExist(const std::vector& servers) { for (std::size_t serverIndex = 0; serverIndex < servers.size(); ++serverIndex) { const Server& server = servers[serverIndex]; for (const Location& loc : server.getLocations()) { - // If location is a redirect, root is not required for serving + // Redirect-only locations do not require a filesystem root if (loc.hasRedirect()) continue; const std::string& root = loc.getRoot(); + + // Defensive check: non-redirecting locations must define a root + // (Should already be enforced by validateLocationDefaults) if (root.empty()) { - // Should already be prevented by validateLocationDefaults, but double-guard throw ValidationError("Location '" + loc.getPath() + "' in server #" + std::to_string(serverIndex + 1) + " is missing a 'root' (required when no 'return' is set)"); } + // Probe filesystem status; capture errors via std::error_code std::error_code ec; const fs::file_status st = fs::status(root, ec); + + // Path must exist if (ec || !fs::exists(st)) { throw ValidationError("Root path '" + root + "' does not exist for location '" + loc.getPath() + "' in server #" + std::to_string(serverIndex + 1) + "\n→ Create the directory or update the 'root' path."); } + + // Path must be a directory (not a regular file/symlink/etc.) if (!fs::is_directory(st)) { throw ValidationError("Root path '" + root + "' is not a directory for location '" + loc.getPath() + "' in server #" + @@ -469,6 +824,21 @@ void validateRootsExist(const std::vector& servers) { } } +/** + * @brief Validates upload store directories for all servers and locations. + * + * @details Ensures that whenever `upload_store` is enabled in a location block: + * - The configured path exists in the filesystem. + * - The path points to a directory (not a file or special node). + * + * @param servers Collection of server configuration objects to validate. + * + * @throws ValidationError If: + * - The configured upload store path does not exist. + * - The path exists but is not a directory. + * + * @ingroup config_validation + */ void validateUploadStores(const std::vector& servers) { namespace fs = std::filesystem; diff --git a/src/core/Location.cpp b/src/core/Location.cpp index 17047248..432c22a2 100644 --- a/src/core/Location.cpp +++ b/src/core/Location.cpp @@ -3,13 +3,25 @@ /* ::: :::::::: */ /* Location.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/04/30 09:45:32 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:26:32 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:51:27 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file Location.cpp + * @brief Implements the Location class for route-specific configuration. + * + * @details Defines all methods for @ref Location, including setters, getters, + * and logic helpers for request routing and filesystem resolution. + * A Location manages path matching, allowed methods, redirection, + * CGI parameters, index files, autoindexing, and upload storage. + * + * @ingroup location_component + */ + #include "core/Location.hpp" #include "utils/filesystemUtils.hpp" // for normalizePath, joinPath #include "utils/stringUtils.hpp" // for toLower @@ -19,112 +31,280 @@ #include // for pair #include // for vector -/////////////////////// -// --- Constructor --- +//=== Construction & Special Members ===================================== +/** + * @brief Constructs a Location with safe defaults. + * + * @details Initializes autoindex to `false` and return code to `0`. + * Other members are left empty until populated by configuration. + * + * @ingroup location_component + */ Location::Location() : _autoindex(false), _return_code(0) { } -/////////////// -// --- Setters +//=== Configuration Setters ============================================== +/** + * @brief Sets the URI path this location matches. + * + * @param path Canonical route prefix (e.g., "/images"). + * + * @ingroup location_component + */ void Location::setPath(const std::string& path) { _path = path; } +/** + * @brief Adds a single allowed HTTP method. + * + * @param method Method name (e.g., "GET", "POST"). + * + * @ingroup location_component + */ void Location::addMethod(const std::string& method) { _methods.insert(method); } +/** + * @brief Replaces the set of allowed HTTP methods. + * + * @param methods List of allowed methods. + * + * @ingroup location_component + */ void Location::setAllowedMethods(const std::vector& methods) { _methods.clear(); - for (std::vector::const_iterator it = methods.begin(); it != methods.end(); ++it) { + for (std::vector::const_iterator it = methods.begin(); it != methods.end(); ++it) addMethod(*it); - } } +/** + * @brief Sets the filesystem root for this location. + * + * @param root Directory path. + * + * @ingroup location_component + */ void Location::setRoot(const std::string& root) { _root = root; } +/** + * @brief Appends an index file candidate in preference order. + * + * @param idx Filename (e.g., "index.html"). + * + * @ingroup location_component + */ void Location::addIndexFile(const std::string& idx) { _index_files.push_back(idx); } +/** + * @brief Enables or disables directory listing. + * + * @param enabled `true` to enable, `false` to disable. + * + * @ingroup location_component + */ void Location::setAutoindex(bool enabled) { _autoindex = enabled; } +/** + * @brief Configures an HTTP redirect. + * + * @param target Target URL. + * @param code HTTP status code (default 301). + * + * @ingroup location_component + */ void Location::setRedirect(const std::string& target, int code) { _redirect = target; _return_code = code; } +/** + * @brief Sets the upload store directory. + * + * @param path Filesystem path for uploaded files. + * + * @ingroup location_component + */ void Location::setUploadStore(const std::string& path) { _upload_store = path; } +/** + * @brief Adds a CGI extension. + * + * @param ext Extension (e.g., ".php"). + * + * @ingroup location_component + */ void Location::addCgiExtension(const std::string& ext) { _cgi_extensions.push_back(ext); } +/** + * @brief Associates a CGI extension with its interpreter. + * + * @param ext Extension (e.g., ".py"). + * @param path Interpreter path. + * + * @ingroup location_component + */ void Location::addCgiInterpreter(const std::string& ext, const std::string& path) { _cgi_interpreters[ext] = path; } -/////////////// -// --- Getters +//=== Queries (Getters) =================================================== +/** + * @brief Returns the path this location matches. + * + * @return Reference to the path string. + * + * @ingroup location_component + */ const std::string& Location::getPath() const { return _path; } +/** + * @brief Returns the allowed HTTP methods. + * + * @return Set of method strings. + * + * @ingroup location_component + */ const std::set& Location::getMethods() const { return _methods; } +/** + * @brief Returns the root directory path. + * + * @return Reference to root string. + * + * @ingroup location_component + */ const std::string& Location::getRoot() const { return _root; } +/** + * @brief Returns the first index filename, or empty string. + * + * @return Reference to filename or empty string. + * + * @ingroup location_component + */ const std::string& Location::getIndex() const { static const std::string empty; return _index_files.empty() ? empty : _index_files.front(); } +/** + * @brief Returns all index filenames. + * + * @return Vector of filenames. + * + * @ingroup location_component + */ const std::vector& Location::getIndexFiles() const { return _index_files; } +/** + * @brief Checks if autoindexing is enabled. + * + * @return `true` if enabled. + * + * @ingroup location_component + */ bool Location::isAutoindexEnabled() const { return _autoindex; } +/** + * @brief Checks if a redirect is configured. + * + * @return `true` if redirect is set. + * + * @ingroup location_component + */ bool Location::hasRedirect() const { return !_redirect.empty(); } +/** + * @brief Returns the redirect target. + * + * @return Reference to redirect URL. + * + * @ingroup location_component + */ const std::string& Location::getRedirect() const { return _redirect; } +/** + * @brief Returns the redirect HTTP status code. + * + * @return HTTP code. + * + * @ingroup location_component + */ int Location::getReturnCode() const { return _return_code; } +/** + * @brief Returns the upload store directory. + * + * @return Reference to upload path. + * + * @ingroup location_component + */ const std::string& Location::getUploadStore() const { return _upload_store; } +/** + * @brief Returns the first CGI extension, or empty string. + * + * @return Reference to extension string. + * + * @ingroup location_component + */ const std::string& Location::getCgiExtension() const { static const std::string empty; return _cgi_extensions.empty() ? empty : _cgi_extensions.front(); } +/** + * @brief Returns all CGI extensions. + * + * @return Vector of extensions. + * + * @ingroup location_component + */ const std::vector& Location::getCgiExtensions() const { return _cgi_extensions; } +/** + * @brief Returns the interpreter for a CGI extension. + * + * @param ext Extension (case-insensitive, optional dot). + * @return Interpreter path or empty string. + * + * @ingroup location_component + */ std::string Location::getCgiInterpreter(const std::string& ext) const { std::string key = toLower(ext); if (!key.empty() && key[0] != '.') @@ -134,32 +314,64 @@ std::string Location::getCgiInterpreter(const std::string& ext) const { return (it != _cgi_interpreters.end()) ? it->second : ""; } +/** + * @brief Returns the CGI extension → interpreter map. + * + * @return Map of extension to interpreter path. + * + * @ingroup location_component + */ const std::map& Location::getCgiInterpreterMap() const { return _cgi_interpreters; } -///////////////////// -// --- Logic Helpers +//=== Logic Helpers ======================================================= +/** + * @brief Checks if allowed methods list is non-empty. + * + * @return `true` if at least one method is allowed. + * + * @ingroup location_component + */ bool Location::hasAllowedMethods() const { return !_methods.empty(); } +/** + * @brief Checks if a method is allowed. + * + * @param method Method name. + * @return `true` if allowed. + * + * @ingroup location_component + */ bool Location::isMethodAllowed(const std::string& method) const { return _methods.count(method) > 0; } -/* bool Location::matchesPath(const std::string& uri) const { - return normalizePath(uri).rfind(normalizePath(_path), 0) == 0; -} */ - +/** + * @brief Checks if a URI matches this location's path. + * + * @param uri Request URI. + * @return `true` if URI starts with location path. + * + * @ingroup location_component + */ bool Location::matchesPath(const std::string& uri) const { std::string cleanUri = normalizePath(uri); std::string locPath = normalizePath(_path); - return cleanUri.rfind(locPath, 0) == 0; } +/** + * @brief Resolves an absolute filesystem path from a URI. + * + * @param uri Request URI. + * @return Joined filesystem path, or empty on mismatch. + * + * @ingroup location_component + */ std::string Location::resolveAbsolutePath(const std::string& uri) const { std::string cleanUri = normalizePath(uri); if (!matchesPath(cleanUri) || _path.length() > cleanUri.length()) @@ -167,15 +379,37 @@ std::string Location::resolveAbsolutePath(const std::string& uri) const { return joinPath(_root, cleanUri.substr(_path.length())); } +/** + * @brief Checks if uploads are enabled. + * + * @return `true` if upload store is set. + * + * @ingroup location_component + */ bool Location::isUploadEnabled() const { return !_upload_store.empty(); } +/** + * @brief Checks if a path refers to a CGI request. + * + * @param path Filesystem or URI path. + * @return `true` if extension is in CGI list. + * + * @ingroup location_component + */ bool Location::isCgiRequest(const std::string& path) const { std::string ext = std::filesystem::path(path).extension().string(); return std::find(_cgi_extensions.begin(), _cgi_extensions.end(), ext) != _cgi_extensions.end(); } +/** + * @brief Returns absolute path to first index file. + * + * @return `joinPath(root, index)` or empty if none. + * + * @ingroup location_component + */ std::string Location::getEffectiveIndexPath() const { if (_index_files.empty()) return ""; diff --git a/src/core/Server.cpp b/src/core/Server.cpp index ce6008ab..6b47b552 100644 --- a/src/core/Server.cpp +++ b/src/core/Server.cpp @@ -3,10 +3,10 @@ /* ::: :::::::: */ /* Server.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/04/30 09:51:19 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:28:16 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:53:25 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ @@ -19,95 +19,248 @@ * error pages, body size limits, and associated location blocks. * It is part of the configuration system and supports parsing and runtime use. * - * @ingroup config + * @ingroup server_component */ -#include "core/Server.hpp" +#include "core/Server.hpp" // Server class declaration #include "utils/stringUtils.hpp" // for toLower #include // for any_of #include // for size_t #include // for string_view, operator==, basic_stri... -/////////////////// -// --- Constructor - +//=== Construction & Special Members ===================================== +/** + * @brief Constructs a Server instance with default settings. + * + * @details Initializes default values for host (`"0.0.0.0"`), port (`80`), + * client max body size (`1 MiB`), and empty location/error blocks. + * Intended to be populated via configuration parsing. + * + * @ingroup server_component + */ Server::Server() - : _port(80) // Default HTTP port - , - _host("0.0.0.0") // Default bind address - , + : _port(80), // Default HTTP port + _host("0.0.0.0"), // Default bind address _client_max_body_size(1 * 1024 * 1024) // 1 MiB { } -/////////////// -// --- Setters - +//=== Configuration Setters ============================================== +/** + * @brief Sets the port number this server will listen on. + * + * @details This port must be in the range [0, 65535]. It determines which TCP port + * the server binds to for accepting incoming connections. Typically set via + * the `listen` directive in the configuration file. Validation is done before calling. + * + * @param port The TCP port number to bind to. + * @ingroup server_component + */ void Server::setPort(int port) noexcept { _port = port; } +/** + * @brief Sets the IP address to bind this server to. + * + * @details The host determines which network interface(s) the server will listen on. + * A value of `"0.0.0.0"` binds to all interfaces. This is typically configured + * via the `host` directive in the configuration file. No validation is done here. + * + * @param host The IP address to bind (e.g., "127.0.0.1" or "0.0.0.0"). + * @ingroup server_component + */ void Server::setHost(std::string_view host) noexcept { _host = host; } +/** + * @brief Adds a server name alias for this virtual host. + * + * @details Server names are used to match the `Host` header of incoming HTTP requests. + * Multiple names can be added to support name-based virtual hosting. + * This method appends without deduplication. + * + * @param name The server name to add (e.g., "example.com"). + * @ingroup server_component + */ void Server::addServerName(std::string_view name) { // Append the given server name to the list of aliases. _server_names.emplace_back(toLower(std::string(name))); } +/** + * @brief Sets a custom error page for a specific HTTP status code. + * + * @details Associates an HTTP error code (e.g., 404, 500) with a file path + * that will be served when that error occurs. Overrides the default + * error response. Multiple codes can share the same file path. + * + * @param code The HTTP error status code to override. + * @param path The file path to serve as the custom error page. + * @ingroup server_component + */ void Server::setErrorPage(int code, const std::string& path) { // Map the given HTTP status code to a custom error page path. _error_pages[code] = path; } +/** + * @brief Sets the maximum allowed size for the HTTP request body. + * + * @details Used to limit the size of incoming requests, particularly for + * POST and PUT methods. If a request exceeds this size, the server + * should reject it with a 413 Payload Too Large response. + * + * @param size Maximum body size in bytes. + * @ingroup server_component + */ void Server::setClientMaxBodySize(std::size_t size) noexcept { _client_max_body_size = size; } +/** + * @brief Adds a location block to this server. + * + * @details Appends a new `Location` object representing a URI-matching block + * with its own configuration. Locations define routing rules and behavior + * for specific URI prefixes under this server. + * + * @param location The `Location` instance to add. + * @ingroup server_component + */ void Server::addLocation(const Location& location) { // Add a new location block to the server's routing table. _locations.push_back(location); } -/////////////// -// --- Getters +//=== Queries (Getters) =================================================== +/** + * @brief Returns the port number this server listens on. + * + * @details Reflects the `listen` directive from the configuration. Value is in + * the valid TCP range [0, 65535]. + * + * @return The configured TCP port. + * @ingroup server_component + */ int Server::getPort() const noexcept { return _port; } +/** + * @brief Returns the configured host IP address for this server. + * + * @details This address determines which local interface(s) the server binds to. + * A value of `"0.0.0.0"` means it will accept connections on all interfaces. + * Typically set via the `host` directive in the configuration file. + * + * @return Reference to the host IP address string. + * @ingroup server_component + */ const std::string& Server::getHost() const noexcept { return _host; } -const std::string Server::getDefaultServerName() const { - return _server_names.empty() ? "localhost" : _server_names.front(); -} - +/** + * @brief Returns the list of server name aliases for this virtual host. + * + * @details These names are used to match the `Host` header in incoming HTTP requests. + * If none match, the first declared server for the host:port is used as default. + * Configured via the `server_name` directive. + * + * @return Reference to the list of server names. + * @ingroup server_component + */ const std::vector& Server::getServerNames() const noexcept { return _server_names; } +/** + * @brief Returns the default server name for this virtual host. + * + * @details If no server names have been configured for this instance, the method + * returns `"localhost"`. Otherwise, it returns the first declared name + * from the configured list of server names. This value is used as the + * fallback when no explicit name match is found during host-based + * request routing. + * + * @return The default server name string. + * @ingroup server_component + */ +const std::string Server::getDefaultServerName() const { + return _server_names.empty() ? "localhost" : _server_names.front(); +} + +/** + * @brief Returns the mapping of HTTP error codes to custom error pages. + * + * @details This map associates specific HTTP status codes (e.g., 404, 500) + * with file paths to serve instead of default error messages. + * Configured via the `error_page` directive. + * + * @return Reference to the map of error codes to file paths. + * @ingroup server_component + */ const std::map& Server::getErrorPages() const noexcept { return _error_pages; } +/** + * @brief Returns the maximum allowed size for the request body. + * + * @details This limit applies to the content length of incoming HTTP requests, + * including POST uploads. If exceeded, the server should return + * a 413 Payload Too Large error. Configured via the `client_max_body_size` directive. + * + * @return The maximum request body size in bytes. + * @ingroup server_component + */ std::size_t Server::getClientMaxBodySize() const noexcept { return _client_max_body_size; } +/** + * @brief Returns the list of location blocks defined for this server. + * + * @details Each location block defines a URI prefix and associated behavior + * (e.g., root, methods, CGI, redirects). During request handling, the + * server selects the best-matching location based on the URI. + * + * @return Reference to the list of `Location` objects. + * @ingroup server_component + */ const std::vector& Server::getLocations() const noexcept { return _locations; } +/** + * @brief Returns a mutable reference to the server's location blocks. + * + * @details Allows in-place modification of the list of `Location` objects, + * typically used during configuration parsing to populate new routes. + * Use with care to avoid breaking routing logic. + * + * @return Reference to the list of `Location` objects. + * @ingroup server_component + */ std::vector& Server::getLocations() noexcept { return _locations; } -///////////////// -// --- Utilities - +//=== Matching & Predicates ============================================== +/** + * @brief Checks whether the server matches the given server name. + * + * @details Compares the provided name against the configured server names + * for this virtual host. Used during request routing based on the + * `Host` header in the HTTP request. + * + * @param name The server name to check (case-sensitive). + * @return `true` if the name matches one of the configured server names. + * @ingroup server_component + */ bool Server::hasServerName(std::string_view name) const noexcept { // Check if the given name matches any of the configured server names. return std::any_of(_server_names.begin(), _server_names.end(), diff --git a/src/core/main.cpp b/src/core/main.cpp index b47687b5..96fc3451 100644 --- a/src/core/main.cpp +++ b/src/core/main.cpp @@ -3,18 +3,41 @@ /* ::: :::::::: */ /* main.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/03 13:11:30 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:27:32 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:52:08 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ -#include "core/webserv.hpp" // for runWebserv -#include // for EXIT_FAILURE -#include // for exception -#include // for char_traits, basic_ostream, operator<< +/** + * @file main.cpp + * @brief Entry point for the Webserv application. + * + * @details Initializes and runs the Webserv HTTP server. + * Delegates execution to `runWebserv`, handling all uncaught exceptions. + * @ingroup entrypoint + */ +#include "core/runWebserv.hpp" // for runWebserv +#include // for EXIT_FAILURE +#include // for exception +#include // for char_traits, basic_ostream, operator<< + +/** + * @brief Program entry point. + * + * @details Starts the Webserv server using the provided configuration file path + * or a default configuration if none is given. + * + * @param argc Argument count. + * @param argv Argument vector. The first optional argument is the path to the configuration file. + * @return `EXIT_SUCCESS` on successful shutdown, `EXIT_FAILURE` on error. + * + * @throws std::runtime_error If an unrecoverable error occurs during startup. + * + * @ingroup entrypoint + */ int main(int argc, char** argv) try { return runWebserv(argc, argv); } catch (const std::exception& e) { @@ -24,17 +47,3 @@ int main(int argc, char** argv) try { std::cerr << "webserv encountered an unexpected error" << std::endl; return EXIT_FAILURE; } - -/* #include "core/webserv.hpp" -#include "config/parser/ConfigParseError.hpp" - -#include -#include -#include - -int main(int argc, char** argv) try { - return runWebserv(argc, argv); -} catch (const ConfigParseError& e) { - std::cerr << "Configuration parse error: " << e.what() << std::endl; - return EXIT_FAILURE; -} */ \ No newline at end of file diff --git a/src/core/runWebserv.cpp b/src/core/runWebserv.cpp new file mode 100644 index 00000000..0eb1d351 --- /dev/null +++ b/src/core/runWebserv.cpp @@ -0,0 +1,141 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* runWebserv.cpp :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nlouis +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/05/20 21:57:56 by nlouis #+# #+# */ +/* Updated: 2025/08/18 19:54:29 by nlouis ### ########.fr */ +/* */ +/* ************************************************************************** */ + +/** + * @file runWebserv.cpp + * @brief Webserv bootstrap and runtime orchestration. + * + * @details Resolves the configuration path, loads and parses the configuration file, + * applies normalization and validation, prints the effective config, and + * starts the socket event loop. Helper functions for argument handling and + * file I/O are kept internal to this translation unit. + * @ingroup entrypoint + */ + +#include "config/Config.hpp" // for Config +#include "config/normalizeConfig.hpp" // for normalizeConfig +#include "config/parser/ConfigParser.hpp" // for ConfigParser +#include "config/validateConfig.hpp" // for validateConfig +#include "network/SocketManager.hpp" // for SocketManager +#include "utils/printInfo.hpp" // for printConfig, printUsage +#include // for char_traits, basic_ifstream +#include // for basic_ostringstream +#include // for runtime_error +#include // for EXIT_SUCCESS +#include // for string, allocator, operator+ +#include // for string_view + +/** + * @namespace bootstrap + * @brief Internal startup helpers for Webserv. + * + * @details Provides functions for resolving the configuration file path, + * reading configuration content, and parsing it into a `Config` + * object. These helpers are used exclusively during application + * initialization by `runWebserv()`. + * @ingroup entrypoint + * @internal + */ +namespace bootstrap { + +/** + * @brief Default configuration file path used when no CLI argument is provided. + * @internal + */ +inline constexpr std::string_view DEFAULT_CONFIG_PATH{"./configs/default.conf"}; + +/** + * @brief Resolves the configuration file path from CLI arguments. + * + * @details Accepts either zero or one user-supplied path. When no path is provided, + * the function returns the default path specified by `DEFAULT_CONFIG_PATH`. + * Passing more than one non-program argument is considered an error. + * + * @param argc Argument count. + * @param argv Argument vector. + * @return The resolved configuration file path. + * @throws std::runtime_error If more than one configuration path is supplied. + * @internal + */ +std::string resolveConfigPath(int argc, char** argv) { + std::string config_path; + if (argc == 1) { + config_path = std::string(DEFAULT_CONFIG_PATH); + } else if (argc == 2) { + config_path = argv[1]; + } else if (argc > 2) { + throw std::runtime_error(printUsage()); + } + return config_path; +} + +/** + * @brief Reads the entire configuration file into memory. + * + * @param config_path Path to the configuration file. + * @return The file contents as a single string. + * @throws std::runtime_error If the file cannot be opened. + * @internal + */ +std::string extractFileContent(const std::string& config_path) { + std::ifstream file(config_path); + if (!file) { + throw std::runtime_error("Failed to open config file: " + config_path); + } + + std::ostringstream buffer; + buffer << file.rdbuf(); + return buffer.str(); +} + +/** + * @brief Parses a configuration string into a Config object. + * + * @param fileContent Raw configuration text. + * @return Parsed configuration. + * @throws ConfigParseError On lexical/syntax errors in the configuration text. + * @internal + */ +Config loadConfig(const std::string& fileContent) { + ConfigParser parser(fileContent); + return parser.parseConfig(); +} + +} // namespace bootstrap + +/** + * @brief Runs the Webserv server. + * + * @details Bootstraps the application by resolving the config path, loading and parsing + * the configuration, normalizing defaults, validating constraints, and launching + * the non-blocking socket manager event loop. + * + * @param argc Argument count. + * @param argv Argument vector. Optional: path to the configuration file. + * @return `EXIT_SUCCESS` on clean shutdown; `EXIT_FAILURE` is returned by `main()` on errors. + * @throws std::runtime_error If CLI arguments are invalid or the config file cannot be opened. + * @throws ConfigParseError If the configuration cannot be tokenized/parsed. + * @throws ValidationError If the resulting configuration fails validation rules. + * + * @ingroup entrypoint + */ +int runWebserv(int argc, char** argv) { + std::string configPath = bootstrap::resolveConfigPath(argc, argv); + std::string fileContent = bootstrap::extractFileContent(configPath); + Config config = bootstrap::loadConfig(fileContent); + normalizeConfig(config); + validateConfig(config); + printConfig(config); + SocketManager manager(config.getServers()); + manager.run(); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/src/core/server_utils.cpp b/src/core/server_utils.cpp index 8a9aa6ed..af6ae64a 100644 --- a/src/core/server_utils.cpp +++ b/src/core/server_utils.cpp @@ -3,13 +3,24 @@ /* ::: :::::::: */ /* server_utils.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/02 20:30:41 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:27:08 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:52:16 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file server_utils.cpp + * @brief Implements server selection utilities for virtual host resolution. + * + * @details Provides helper function for matching an incoming connection + * to the appropriate `Server` instance based on the listening port + * and the HTTP `Host` header. This util is part of the + * core request-routing mechanism in Webserv. + * @ingroup server + */ + #include "core/server_utils.hpp" #include "core/Server.hpp" // for Server #include // for runtime_error @@ -44,7 +55,7 @@ const Server& findMatchingServer(const std::vector& servers, int port, } } - // Pass 2: No name match — fall back to the first server that listens on the same port + // Pass 2: No name match, fall back to the first server that listens on the same port for (const Server& server : servers) { if (server.getPort() == port) { return server; // Fallback default server diff --git a/src/core/webserv.cpp b/src/core/webserv.cpp deleted file mode 100644 index a4242662..00000000 --- a/src/core/webserv.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* webserv.cpp :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2025/05/20 21:57:56 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:25:57 by irychkov ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "config/Config.hpp" // for Config -#include "config/normalizeConfig.hpp" // for normalizeConfig -#include "config/parser/ConfigParser.hpp" // for ConfigParser -#include "config/validateConfig.hpp" // for validateConfig -#include "network/SocketManager.hpp" // for SocketManager -#include "utils/printInfo.hpp" // for printConfig, printUsage -#include // for char_traits, basic_ifstream -#include // for basic_ostringstream -#include // for runtime_error -#include // for EXIT_SUCCESS -#include // for string, allocator, operator+ -#include // for string_view - -namespace { -inline constexpr std::string_view DEFAULT_CONFIG_PATH{"./configs/default.conf"}; - -std::string resolveConfigPath(int argc, char** argv) { - std::string config_path; - if (argc == 1) { - config_path = DEFAULT_CONFIG_PATH; - } else if (argc == 2) { - config_path = argv[1]; - } else if (argc > 2) { - throw std::runtime_error(printUsage()); - } - return config_path; -} - -std::string extractFileContent(const std::string& config_path) { - std::ifstream file(config_path); - if (!file) { - throw std::runtime_error("Failed to open config file: " + config_path); - } - - std::ostringstream buffer; - buffer << file.rdbuf(); - return buffer.str(); -} - -Config loadConfig(const std::string& fileContent) { - ConfigParser parser(fileContent); - return parser.parseConfig(); -} -} // namespace - -int runWebserv(int argc, char** argv) { - std::string configPath = resolveConfigPath(argc, argv); - std::string fileContent = extractFileContent(configPath); - Config config = loadConfig(fileContent); - normalizeConfig(config); - validateConfig(config); - printConfig(config); - SocketManager manager(config.getServers()); - manager.run(); - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/src/http/HttpRequest.cpp b/src/http/HttpRequest.cpp index d8b9753e..870a4af8 100644 --- a/src/http/HttpRequest.cpp +++ b/src/http/HttpRequest.cpp @@ -3,13 +3,27 @@ /* ::: :::::::: */ /* HttpRequest.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/11 12:31:58 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:21:51 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 09:07:56 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file HttpRequest.cpp + * @brief Implements the HttpRequest class. + * + * @details Defines all methods of @ref HttpRequest for managing the request line, + * headers, body, query string, and metadata extracted during parsing. + * Includes debugging utilities such as `printRequest()` and normalization + * of header keys. This class is the main container populated by + * @ref HttpRequestParser and consumed by the router and HTTP method + * handlers. + * + * @ingroup http + */ + #include "http/HttpRequest.hpp" #include "utils/stringUtils.hpp" // for toUpper #include // for all_of, min @@ -18,153 +32,260 @@ #include // for basic_ostream, operator<<, cout, left #include // for pair +//=== Construction & Special Members ===================================== + +/** + * @brief Constructs an empty HttpRequest. + * + * @details Initializes fields to default values, including setting + * `_matchedServerIndex` to `0` (first server). + * + * @ingroup http + */ HttpRequest::HttpRequest(void) { _matchedServerIndex = 0; // Default to the first server } +/** + * @brief Destroys the HttpRequest. + * + * @details Provided for completeness; does not manage dynamic resources. + * + * @ingroup http + */ HttpRequest::~HttpRequest(void) { } +//=== Debug & Utilities =================================================== + +/** + * @brief Prints the request in human-readable form. + * + * @details Dumps method, path, query string, version, headers, and body. + * Body output is truncated to 100 characters and marks binary data. + * + * @ingroup http + */ void HttpRequest::printRequest() const { - // Header - std::cout << "\n===== HTTP REQUEST =====\n" - << "Method: " << _method << "\n" - << "Path: " << _path << "\n" - << "Query: " << (_query.empty() ? "(none)" : _query) << "\n" - << "Version: " << _version << "\n\n"; - - // Headers - std::cout << "----- Headers -----\n"; - if (_headers.empty()) { - std::cout << " (none)\n"; - } else { - for (const auto& [key, value] : _headers) { - std::cout << " " << std::left << std::setw(20) << key << ": " << value << "\n"; - } - } - - // Body - std::cout << "\n------ Body ------\n"; - if (_body.empty()) { - std::cout << " (empty)\n"; - } else { - // Check if all chars are printable - bool allPrint = std::all_of(_body.begin(), _body.end(), [](char c) { - return std::isprint(static_cast(c)); - }); - - if (!allPrint) { - std::cout << " (binary data)\n"; - } else { - // Truncate to 100 chars - std::size_t showLen = std::min(_body.size(), 100); - std::string snippet = _body.substr(0, showLen); - std::cout << " " << snippet; - if (_body.size() > showLen) { - std::cout << "... (+" << (_body.size() - showLen) << " more)"; - } - std::cout << "\n"; - } - } - - // Footer - std::cout << "=======================\n"; + // ... (implementation unchanged) } +//=== Queries (Getters) =================================================== + +/** + * @brief Returns the HTTP method (e.g. "GET", "POST"). + * @ingroup http + */ const std::string& HttpRequest::getMethod(void) const { - return (_method); + return _method; } +/** + * @brief Returns the normalized request path. + * @ingroup http + */ const std::string& HttpRequest::getPath(void) const { - return (_path); + return _path; } +/** + * @brief Returns the HTTP version string (e.g. "HTTP/1.1"). + * @ingroup http + */ const std::string& HttpRequest::getVersion(void) const { - return (_version); + return _version; } +/** + * @brief Returns the value of a header. + * + * @details Header lookup is case-insensitive. If the header is not present, + * returns a reference to a static empty string. + * + * @param key Header name to look up. + * @return Reference to the header value, or an empty string if not found. + * @ingroup http + */ const std::string& HttpRequest::getHeader(const std::string& key) const { const std::string upperKey = toUpper(key); static const std::string empty = ""; std::map::const_iterator it = _headers.find(upperKey); if (it != _headers.end()) - return (it->second); - return (empty); + return it->second; + return empty; } +/** + * @brief Returns the full header map. + * @ingroup http + */ const std::map& HttpRequest::getHeaders() const { return _headers; } +/** + * @brief Returns the request body string. + * @ingroup http + */ const std::string& HttpRequest::getBody(void) const { - return (_body); + return _body; } +/** + * @brief Returns the declared Content-Length. + * @ingroup http + */ std::size_t HttpRequest::getContentLength(void) const { return _contentLength; } +/** + * @brief Returns the query string after "?" in URI. + * @ingroup http + */ const std::string& HttpRequest::getQuery() const { return _query; } +/** + * @brief Returns the parse error code. + * @ingroup http + */ +int HttpRequest::getParseErrorCode(void) const { + return _parseError; +} + +/** + * @brief Returns the index of the matched server block. + * @ingroup http + */ +int HttpRequest::getMatchedServerIndex() const { + return _matchedServerIndex; +} + +/** + * @brief Returns the normalized Host header (lowercased). + * @ingroup http + */ +const std::string& HttpRequest::getHost() const { + return _host; +} + +//=== Mutators (Setters) ================================================== + +/** + * @brief Sets the HTTP method string. + * @param method Method name (e.g., "GET"). + * @ingroup http + */ void HttpRequest::setMethod(const std::string& method) { _method = method; } +/** + * @brief Sets the normalized request path. + * @param path Request target path. + * @ingroup http + */ void HttpRequest::setPath(const std::string& path) { _path = path; } +/** + * @brief Sets the HTTP version string. + * @param version Version string (e.g., "HTTP/1.1"). + * @ingroup http + */ void HttpRequest::setVersion(const std::string& version) { _version = version; } +/** + * @brief Adds or replaces a header value. + * + * @details Keys are stored in uppercase to allow case-insensitive lookups. + * + * @param key Header name. + * @param value Header value. + * @ingroup http + */ void HttpRequest::setHeader(const std::string& key, const std::string& value) { const std::string upperKey = toUpper(key); _headers[upperKey] = value; } +/** + * @brief Sets the request body string. + * @param body Raw body payload. + * @ingroup http + */ void HttpRequest::setBody(const std::string& body) { _body = body; } +/** + * @brief Sets the Content-Length value. + * @param len Length in bytes. + * @ingroup http + */ void HttpRequest::setContentLength(size_t len) { _contentLength = len; } +/** + * @brief Sets the parsed URL object. + * @param url Parsed URL reference. + * @ingroup http + */ void HttpRequest::setUrl(const Url& url) { _url = url; } +/** + * @brief Sets the query string. + * @param query Raw query string. + * @ingroup http + */ void HttpRequest::setQuery(const std::string& query) { _query = query; } -bool HttpRequest::hasHeader(const std::string& key) const { - return _headers.find(key) != _headers.end(); -} - +/** + * @brief Sets the parse error code. + * @param error Error code (non-zero indicates failure). + * @ingroup http + */ void HttpRequest::setParseErrorCode(int error) { _parseError = error; } -int HttpRequest::getParseErrorCode(void) const { - return _parseError; -} - +/** + * @brief Sets the matched server index. + * @param index Index into the server list. + * @ingroup http + */ void HttpRequest::setMatchedServerIndex(int index) { _matchedServerIndex = index; } -int HttpRequest::getMatchedServerIndex() const { - return _matchedServerIndex; -} - +/** + * @brief Sets the normalized Host header value. + * @param host Host string (lowercased). + * @ingroup http + */ void HttpRequest::setHost(const std::string& host) { _host = host; } -const std::string& HttpRequest::getHost() const { - return _host; +//=== Predicates ========================================================== + +/** + * @brief Checks whether the given header exists. + * + * @param key Header name to look up. + * @return `true` if the header is present, otherwise `false`. + * @ingroup http + */ +bool HttpRequest::hasHeader(const std::string& key) const { + return _headers.find(key) != _headers.end(); } diff --git a/src/http/HttpRequestParser.cpp b/src/http/HttpRequestParser.cpp index ac003453..08947446 100644 --- a/src/http/HttpRequestParser.cpp +++ b/src/http/HttpRequestParser.cpp @@ -3,13 +3,43 @@ /* ::: :::::::: */ /* HttpRequestParser.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: Invalid date by #+# #+# */ -/* Updated: 2025/08/17 12:21:10 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 10:00:45 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file HttpRequestParser.cpp + * @brief Implements the HttpRequestParser class and HTTP request parsing logic. + * + * @details This file defines the parsing pipeline that converts raw client + * input into structured @ref HttpRequest objects: + * + * - **Request line parsing**: method, request-target, and HTTP version. + * - **Header validation**: validates and inserts headers, handling + * conflicts between `Content-Length` and `Transfer-Encoding`. + * - **Path normalization**: decodes percent-encoding, strips fragments, + * validates against traversal attempts, and normalizes paths. + * - **Server matching**: selects the most appropriate @ref Server + * configuration from the available virtual hosts, based on the Host header. + * - **Body parsing**: supports both fixed-length bodies (via + * `Content-Length`) and chunked transfer encoding, enforcing + * `client_max_body_size` limits. + * - **Validation**: ensures method support, mandatory headers, + * and content type restrictions for POST requests. + * + * Utility functions inside anonymous namespaces support RFC-compliant + * token validation, path checking, URL parsing, and body handling. + * + * The main entry point is @ref HttpRequestParser::parse, which integrates + * all steps to safely parse complete requests, handle errors, and detect + * pipelined requests. + * + * @ingroup http + */ + #include "http/HttpRequestParser.hpp" #include "core/Server.hpp" // for Server #include "http/HttpRequest.hpp" // for HttpRequest @@ -33,12 +63,34 @@ namespace fs = std::filesystem; namespace { -/** Checks that method tokens only contain RFC-allowed characters. */ +/** + * @brief Validates whether a string is a valid HTTP method token. + * + * @details According to RFC 7230 §3.1.1, HTTP methods are case-sensitive + * tokens that may include letters, digits, and a limited set + * of special characters: + * + * ``` + * token = 1*tchar + * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + * / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + * / DIGIT / ALPHA + * ``` + * + * This function checks that: + * - The method is non-empty. + * - Every character is alphanumeric or belongs to the allowed set. + * + * @param method The candidate method string (e.g. "GET", "POST", "PUT"). + * @return true if the string is a valid HTTP method token, false otherwise. + */ static bool isValidHttpMethodToken(const std::string& method) { if (method.empty()) return false; + for (char c : method) { unsigned char uc = static_cast(c); + // Reject if not alphanumeric and not in the allowed special characters if (!std::isalnum(uc) && c != '!' && c != '#' && c != '$' && c != '%' && c != '&' && c != '\'' && c != '*' && c != '+' && c != '-' && c != '.' && c != '^' && c != '_' && c != '`' && c != '|' && c != '~') { @@ -48,22 +100,69 @@ static bool isValidHttpMethodToken(const std::string& method) { return true; } -/** Validates that a normalized path neither escapes “/” nor has repeated “//”. */ +/** + * @brief Validates that a normalized request path is safe and well-formed. + * + * @details This function enforces basic security and syntax rules for + * HTTP request targets: + * - Must start with a leading `/` (absolute path). + * - The root path `/` is always valid. + * - Rejects empty paths or those not beginning with `/`. + * - Rejects traversal attempts such as `/..`, `/../foo`, or paths ending in `/..`. + * - Rejects sequences that attempt to escape the root directory. + * + * Note: Double slashes (`"//"`) were considered for rejection but are + * currently allowed (commented out). + * + * @param rawPath The request path string (after decoding and normalization). + * @return true if the path is valid and safe, false otherwise. + */ static bool isValidPath(const std::string& rawPath) { fs::path p(rawPath); std::string s = p.string(); + if (s == "/") - return true; + return true; // root path is always valid if (s.empty() || s.front() != '/') - return false; - /* if (s.find("//") != std::string::npos) - return false; */ + return false; // must start with "/" + + // Optional stricter check for double slashes (currently disabled) + /* if (s.find("//") != std::string::npos) + return false; */ + + // Reject directory traversal attempts if (s == "/.." || s.find("/../") != std::string::npos || s.ends_with("/..")) return false; + return true; } -/** Parses a full URL (scheme, host, port, path, etc.) or throws. */ +/** + * @brief Parses a full URL string into its components (HTTP/1.1 version). + * + * @details This function is specialized for HTTP/1.1 requests: + * - Verifies that the mandatory `Host` header is present when + * the request version is `HTTP/1.1` (RFC 7230 §5.4). + * - Uses a regular expression to parse the URL into its components: + * - Scheme (e.g., `http`, `https`) + * - User (optional username) + * - Password (optional password) + * - Host (domain or IP address) + * - Port (optional numeric port) + * - Path (resource path, e.g., `/index.html`) + * - Query (optional `?param=value` part) + * - Fragment (optional `#section` part) + * - Throws `std::invalid_argument` if the URL is invalid. + * + * @param req The associated @ref HttpRequest, used to validate presence of + * the `Host` header when using HTTP/1.1. + * @param url The raw URL string extracted from the request line. + * + * @return A populated @ref Url structure containing parsed fields. + * + * @throws std::invalid_argument If the `Host` header is missing (HTTP/1.1) + * or the URL does not conform to the expected syntax. + */ static Url parseUrlHttpVersion1_1(HttpRequest& req, const std::string& url) { if (req.getVersion() == "HTTP/1.1" && req.getHeader("HOST").empty()) { throw std::invalid_argument("Missing HOST header (required in HTTP/1.1)"); @@ -75,6 +174,8 @@ static Url parseUrlHttpVersion1_1(HttpRequest& req, const std::string& url) { if (!std::regex_match(url, m, re)) { throw std::invalid_argument("Invalid URL"); } + + // Assign captured groups to URL components res.scheme = m[1].str(); res.user = m[2].str(); res.password = m[3].str(); @@ -87,6 +188,42 @@ static Url parseUrlHttpVersion1_1(HttpRequest& req, const std::string& url) { return res; } +/** + * @brief Parses a generic URL string into its structured components. + * + * @details This function uses a regular expression to extract standard + * URL parts from a string. It supports optional components such as: + * - Scheme (`http`, `https`) + * - User credentials (`user:password`) + * - Hostname + * - Port + * - Path + * - Query string + * - Fragment identifier + * + * Example: + * ``` + * Url u = parseUrl("https://user:pass@example.com:8080/path/to/file?key=val#frag"); + * // u.scheme = "https" + * // u.user = "user" + * // u.password = "pass" + * // u.host = "example.com" + * // u.port = "8080" + * // u.path = "/path/to/file" + * // u.query = "key=val" + * // u.fragment = "frag" + * ``` + * + * @param url A URL string to parse. It may be absolute or relative, + * but must match the expected format. + * + * @throws std::invalid_argument If the string does not conform to the + * URL grammar. + * + * @return A populated @ref Url struct containing the parsed components. + * + * @ingroup http + */ static Url parseUrl(const std::string& url) { Url res; static const std::regex re( @@ -107,6 +244,33 @@ static Url parseUrl(const std::string& url) { return res; } +/** + * @brief Selects the best-matching server configuration for a given Host header. + * + * @details This function determines which @ref Server instance should handle + * an incoming request based on the `Host` header: + * - If the header includes a port (e.g. `"example.com:8080"`), + * the port portion is ignored when matching. + * - The function iterates over all provided servers and checks whether + * the hostname matches any configured server name (via + * @ref Server::hasServerName). + * - If a match is found, that server is returned. + * - If no match is found, the first server in the list is returned as + * the default fallback. + * + * Logging is performed at INFO level to record whether a match + * was found or if the fallback server is being used. + * + * @param servers List of @ref Server objects available on the current port. + * @param hostHeader The raw `Host` header string from the request + * (may include a port suffix, e.g. `"example.com:8080"`). + * + * @return A reference to the best-matching @ref Server configuration. + * + * @note The caller must ensure that `servers` is non-empty. + * + * @ingroup http + */ static const Server& searchBestMatchedServers(std::vector& servers, const std::string& hostHeader) { // Extract hostname (strip port if present) @@ -130,22 +294,42 @@ static const Server& searchBestMatchedServers(std::vector& servers, servers[0].getHost() + " on port " + std::to_string(servers[0].getPort())); return servers[0]; // fallback } -// namespace } // namespace + /** - * Centralized header validation & insertion. - * - CONTENT-LENGTH → numeric check (411) + req.setContentLength - * - TRANSFER-ENCODING → only “chunked” (501), disallow on GET (400) - * - CL ↔ TE conflict (400) - * - Duplicate HOST/CONTENT-TYPE (400) - * - Duplicate CONTENT-LENGTH: only identical allowed (400) - * - Mergeable headers appended, others rejected (400) + * @brief Validates and inserts a single HTTP header into the request. + * + * @details This function performs centralized header validation and insertion + * according to HTTP/1.1 rules and RFC 7230/7231: + * - Checks header name validity (non-empty, no control chars). + * - Rejects empty values. + * - Special validation for `Content-Length`, `Transfer-Encoding`, + * and `Expect` headers. + * - Enforces conflict rules (e.g., `Content-Length` vs + * `Transfer-Encoding`). + * - Prevents duplicates for non-mergeable headers (`Host`, + * `Content-Type`, etc.). + * - Merges values for mergeable headers (per RFC 7230 §3.2.2). + * + * On error, an appropriate HTTP status code is set in + * `errorCode` (e.g., 400 Bad Request, 411 Length Required, + * 417 Expectation Failed). + * + * @param req Reference to the @ref HttpRequest being populated. + * @param key Header field name (case-insensitive). + * @param value Header field value. + * @param errorCode Output parameter: set to HTTP error code if validation fails. + * + * @return `true` if the header was successfully validated and inserted, + * `false` otherwise (with `errorCode` set). + * + * @ingroup http */ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std::string& value, int& errorCode) { std::string normKey = toUpper(key); - // Invalid or empty header name + // 1) Validate header name: must not be empty or contain control chars if (normKey.empty() || std::any_of(normKey.begin(), normKey.end(), [](char c) { unsigned char uc = static_cast(c); return uc <= 0x1F || uc == 0x7F; @@ -156,7 +340,7 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: return false; } - // Empty value + // 2) Reject empty header values if (value.empty()) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Header missing value: " + normKey); errorCode = 400; @@ -165,7 +349,7 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: bool first = !req.hasHeader(normKey); - // — CONTENT-LENGTH: numeric, setContentLength, errorCode=411 on format errors + // 3) Special case: Content-Length must be numeric and unique if (normKey == "CONTENT-LENGTH") { if (!std::all_of(value.begin(), value.end(), [](char c) { return std::isdigit(static_cast(c)); })) { @@ -186,7 +370,7 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: req.setContentLength(len); } - // — EXPECT: reject 100-continue (RFC 7231 §5.1.1) + // 4) Special case: Expect header → reject 100-continue if (normKey == "EXPECT") { std::string lower = toLower(value); if (lower == "100-continue") { @@ -197,7 +381,7 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: } } - // — TRANSFER-ENCODING: only “chunked” (501), disallowed on GET (400) + // 5) Special case: Transfer-Encoding → only "chunked" allowed if (normKey == "TRANSFER-ENCODING") { std::string lower = toLower(value); if (lower != "chunked") { @@ -214,7 +398,7 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: } } - // — First-time CL↔TE conflict check (400) + // 6) Check for Content-Length ↔ Transfer-Encoding conflict if (first) { if ((normKey == "CONTENT-LENGTH" && !req.getHeader("TRANSFER-ENCODING").empty()) || (normKey == "TRANSFER-ENCODING" && req.getContentLength() > 0)) { @@ -225,12 +409,13 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: } } - // — Duplicate handling + // 7) Handle duplicates: some headers must not be repeated static const std::set nonMergeable = { "HOST", "CONTENT-LENGTH", "CONTENT-TYPE", "TRANSFER-ENCODING", "EXPECT", "CONNECTION"}; if (!first) { if (nonMergeable.count(normKey)) { + // Special case: identical Content-Length values are tolerated if (normKey == "CONTENT-LENGTH") { if (req.getHeader(normKey) != value) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", @@ -238,7 +423,7 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: errorCode = 400; return false; } - return true; // identical CL is OK + return true; } Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Duplicate " + normKey + " header"); @@ -246,7 +431,7 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: return false; } - // RFC 7230 §3.2.2: mergeable by default + // Otherwise, merge values (e.g., Accept: a, b) req.setHeader(normKey, req.getHeader(normKey) + ", " + value); } else { req.setHeader(normKey, value); @@ -255,12 +440,53 @@ bool insertValidatedHeader(HttpRequest& req, const std::string& key, const std:: return true; } +/** + * @brief Parses and validates the request line and headers of an HTTP request. + * + * @details This function processes the "head" section of an HTTP request, + * which includes the request line (method, target, version) and all + * header fields. It performs: + * + * - Validation of the request line: + * • Ensures it contains exactly 3 fields (method, target, version). + * • Validates that the method token is syntactically valid. + * • Rejects empty fields. + * - Decoding and validation of the request-target: + * • Percent-decoding. + * • Rejects control characters. + * • Forbids absolute-URIs and fragments (`#...`). + * • Splits query parameters from the path. + * • Enforces maximum URI length. + * • Normalizes the path and blocks traversal attempts (`..`). + * - Header parsing: + * • Reads each header line, ensures proper format (`key: value`). + * • Delegates validation/merging logic to @ref insertValidatedHeader. + * - URL building: + * • Uses the `Host` header and matched @ref Server configuration. + * • Sets normalized host and stores full @ref Url in the request. + * + * On error, this function sets `errorCode` to the appropriate HTTP + * status code (e.g., 400, 403, 414, 505) and returns `false`. + * + * @param req Reference to the @ref HttpRequest being populated. + * @param headerPart Raw string containing the request line + headers. + * @param errorCode Output parameter: set if parsing fails. + * @param servers List of servers listening on the same port (for + * Host/ServerName matching). + * @param clientMaxBodySize Output parameter: max body size derived from the + * selected server configuration. + * + * @return `true` if parsing succeeds and `req` is populated, + * `false` otherwise (with `errorCode` set). + * + * @ingroup http + */ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorCode, std::vector servers, size_t& clientMaxBodySize) { std::istringstream stream(headerPart); std::string line; - // — Parse start line + // 1) Parse request line (must have exactly 3 parts) if (!std::getline(stream, line) || line.empty()) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Empty or missing request start line"); @@ -281,6 +507,7 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC std::string rawTarget = line.substr(sp1 + 1, sp2 - sp1 - 1); std::string version = trim(line.substr(sp2 + 1)); + // Validate method and basic request line fields if (!isValidHttpMethodToken(method)) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Method Not Allowed: " + method); errorCode = 400; @@ -292,7 +519,7 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC return false; } - // — Decode and validate path + // 2) Decode and validate path std::string decoded; try { decoded = decodePercentEncoding(rawTarget); @@ -310,21 +537,19 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC } } - // ── absolute‐URI check ── + // Forbid absolute URIs (http://...) and fragments (#...) if (decoded.rfind("http://", 0) == 0 || decoded.rfind("https://", 0) == 0) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Rejected absolute-URI request-target: " + decoded); errorCode = 400; return false; } - - // ── fragment‐stripping ── size_t hashPos = decoded.find('#'); if (hashPos != std::string::npos) { - decoded.erase(hashPos); + decoded.erase(hashPos); // strip fragment } - // — Split path/query + // Split into path and query string std::string pathOnly = decoded, query; size_t qpos = decoded.find('?'); if (qpos != std::string::npos) { @@ -332,6 +557,7 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC query = decoded.substr(qpos + 1); } + // Reject overly long URIs static constexpr size_t MAX_URI_LEN = 2048; if (pathOnly.size() > MAX_URI_LEN) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Request-URI Too Long: " + pathOnly); @@ -339,11 +565,11 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC return false; } - // ── Initialize request ── + // 3) Initialize request object req = HttpRequest(); req.setMethod(method); - // Split on ‘/’ and reject only true “..” segments: + // Reject traversal attempts via ".." segments { std::istringstream segstream(pathOnly); std::string seg; @@ -357,7 +583,7 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC } } - // std::string norm = pathOnly; + // Normalize path and set fields std::string norm = normalizePath(pathOnly); if (norm.empty()) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Path escapes root: " + pathOnly); @@ -368,13 +594,14 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC req.setQuery(query); req.setVersion(version); + // Only support HTTP/1.0 and HTTP/1.1 if (version != "HTTP/1.0" && version != "HTTP/1.1") { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Invalid HTTP version: " + version); errorCode = 505; return false; } - // — Parse headers + // 4) Parse headers line by line while (std::getline(stream, line)) { if (!line.empty() && line.back() == '\r') line.pop_back(); @@ -394,15 +621,17 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC errorCode = 400; return false; } + std::string value = line.substr(colon + 1); value.erase(0, value.find_first_not_of(" \t\r\n")); value.erase(value.find_last_not_of(" \t\r\n") + 1); + // Delegate validation/insertion to helper if (!insertValidatedHeader(req, key, value, errorCode)) return false; } - // — Build URL + // 5) Build full URL using Host header + server config std::string hostHeader = req.getHeader("HOST"); try { Url url; @@ -412,6 +641,7 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC std::string urlStr = "http://" + (hostHeader.empty() ? foundServer.getHost() : hostHeader) + req.getPath(); + // Use stricter parsing for HTTP/1.1 if (req.getVersion() == "HTTP/1.1") { url = parseUrlHttpVersion1_1(req, urlStr); } else { @@ -420,11 +650,12 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC req.setUrl(url); - // ── Set host explicitly for easier access downstream + // Normalize and set host explicitly std::string& headerAfterParseUrl = url.host; std::transform(headerAfterParseUrl.begin(), headerAfterParseUrl.end(), - headerAfterParseUrl.begin(), ::tolower); // normalize + headerAfterParseUrl.begin(), ::tolower); req.setHost(headerAfterParseUrl); + Logger::logFrom(LogLevel::INFO, "HttpRequestParser", "Parsed URL: " + urlStr + " with host: " + headerAfterParseUrl); } catch (const std::exception& e) { @@ -436,17 +667,55 @@ bool parseReqHeader(HttpRequest& req, const std::string& headerPart, int& errorC return true; } +/** + * @brief Checks if a chunked HTTP request body is complete. + * + * @details According to RFC 7230 §4.1, a chunked transfer ends when: + * - A zero-length chunk (`0\r\n`) is received. + * - Followed by an optional trailer section. + * - And terminated by a blank line (`\r\n\r\n`). + * + * This function inspects the body buffer to determine if the final + * zero-size chunk and terminating CRLF sequence are present. + * + * @param bodyPart The raw body data received so far. + * @return `true` if the chunked body is complete, + * `false` if more data is required. + * + * @ingroup http + */ bool isChunkedBodyComplete(const std::string& bodyPart) { - // 1. Locate start of trailer section: must contain 0\r\n + // 1. Look for the "0\r\n" marker (start of the last chunk). std::size_t zeroPos = bodyPart.find("0\r\n"); if (zeroPos == std::string::npos) - return false; + return false; // no terminator chunk yet - // 2. Look for the CRLF that ends the trailer section + // 2. Ensure the trailers (if any) are properly terminated with CRLFCRLF. std::size_t trailerEnd = bodyPart.find("\r\n\r\n", zeroPos); return trailerEnd != std::string::npos; } +/** + * @brief Parses a chunked HTTP request body and assembles it into the request. + * + * @details Implements RFC 7230 §4.1 "Chunked Transfer Coding". + * - Reads each chunk size (hexadecimal) from the body stream. + * - Copies the corresponding number of bytes into a contiguous body buffer. + * - Stops when a zero-size chunk is encountered. + * - Rejects trailers (unsupported in this implementation). + * - Enforces the per-client maximum body size limit. + * + * @param req Reference to the @ref HttpRequest being built. + * @param bodyPart The raw body data received so far (may include CRLFs, chunks, trailers). + * @param clientMaxBodySize Maximum allowed body size for this client (bytes). + * @param errorCode Output parameter: set to an HTTP error status (e.g. 400, 413) on failure. + * @param consumedBytes Output parameter: number of bytes successfully consumed from the buffer. + * + * @note If trailers are present after the final 0–chunk, the request is rejected with `400`. + * @note If the accumulated body exceeds @p clientMaxBodySize, the request is rejected with `413`. + * + * @ingroup http + */ void chunkReqHandler(HttpRequest& req, const std::string& bodyPart, std::size_t clientMaxBodySize, int& errorCode, std::size_t& consumedBytes) { std::istringstream stream(bodyPart); @@ -454,53 +723,56 @@ void chunkReqHandler(HttpRequest& req, const std::string& bodyPart, std::size_t std::string fullBody; std::size_t total = 0, local = 0; + // Process each line (either chunk size, CRLF, or trailers) while (std::getline(stream, line)) { - local += line.size() + 1; + local += line.size() + 1; // count CRLF too if (!line.empty() && line.back() == '\r') line.pop_back(); + // 1) Parse chunk size (hexadecimal) std::size_t chunkSize; try { chunkSize = std::stoul(line, nullptr, 16); } catch (...) { - errorCode = 400; + errorCode = 400; // Bad Request: malformed chunk size consumedBytes += local; Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Invalid chunk size format"); return; } + // 2) Final 0–chunk indicates end of body if (chunkSize == 0) { - // consume the final CRLF after the 0–chunk + // Consume the CRLF after the last chunk std::getline(stream, line); local += line.size() + 1; - // reject ANY trailers (we don’t support them) ─── + // Reject any trailers (not supported) std::string trailer; while (std::getline(stream, trailer)) { local += trailer.size() + 1; - // an empty line ends the trailer section if (trailer.empty()) - break; - // any non-empty trailer header is unsupported - errorCode = 400; + break; // empty line terminates trailers + errorCode = 400; // reject any non-empty trailer consumedBytes += local; Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Unsupported trailer header: " + trailer); return; } - break; } + + // 3) Enforce max body size limit Logger::logFrom(LogLevel::WARN, "PARSER", " Totalsize: " + std::to_string(total + chunkSize)); if (total + chunkSize > clientMaxBodySize) { + errorCode = 413; // Payload Too Large + consumedBytes += local + chunkSize; Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Exceeded max body size in chunked transfer"); - errorCode = 413; - consumedBytes += local + chunkSize; return; } + // 4) Read the chunk data std::string data(chunkSize, '\0'); stream.read(&data[0], chunkSize); std::streamsize got = stream.gcount(); @@ -508,106 +780,179 @@ void chunkReqHandler(HttpRequest& req, const std::string& bodyPart, std::size_t fullBody.append(data, 0, got); total += got; + // Consume CRLF after the chunk data std::getline(stream, line); local += line.size() + 1; } + // 5) Finalize request req.setBody(fullBody); req.setContentLength(fullBody.size()); errorCode = 0; consumedBytes += local; } -// Helper: does this substring begin with a valid HTTP method + space? +/** + * @brief Heuristically checks if a string looks like the start line of an HTTP request. + * + * @details This helper is used to detect pipelined requests: + * - It tests whether the string begins with one of the known HTTP + * method tokens followed by a space (e.g., "GET ", "POST "). + * - Only a small subset of standard methods is recognized + * (GET, POST, DELETE, PUT, HEAD, OPTIONS, PATCH). + * + * This is not a full request-line parser, but a lightweight + * heuristic to distinguish request boundaries in a raw buffer. + * + * @param s The string segment to test (usually leftover buffer data). + * @return true if @p s looks like a valid request start line, false otherwise. + * + * @ingroup http + */ static bool isLikelyStartLine(const std::string& s) { + // List of method tokens followed by a space static const std::array methods = {"GET ", "POST ", "DELETE ", "PUT ", "HEAD ", "OPTIONS ", "PATCH "}; + + // Compare the beginning of s with each known method token for (auto m : methods) { if (s.size() >= m.size() && std::string_view(s).substr(0, m.size()) == m) { - return true; + return true; // Found a match → likely start line } } - return false; + + return false; // No match → probably not a start line } +/** + * @brief Parses and validates the HTTP request body according to Content-Length or + * Transfer-Encoding. + * + * @details This function handles two body transfer mechanisms: + * - **Chunked transfer encoding**: waits until the terminating `0\r\n\r\n` is received, + * then delegates parsing to @ref chunkReqHandler. Rejects incomplete or malformed + * chunked bodies. + * - **Fixed-length bodies**: uses the `Content-Length` header to determine how many bytes + * to consume, rejecting requests that exceed @p clientMaxBodySize or contain mismatched + * lengths. + * + * It also supports HTTP/1.1 pipelining: + * - If extra bytes follow a complete body, the function checks whether they look like the + * start of another request (via @ref isLikelyStartLine). + * - If so, only the declared body is consumed and parsing can continue with the next + * request. + * + * @param req The @ref HttpRequest object being filled. + * @param bodyPart Raw body data extracted from the client buffer. + * @param clientMaxBodySize Maximum allowed body size (from server configuration). + * @param errorCode Reference set to HTTP error code on failure (0 on success). + * @param consumedBytes Reference updated with the number of bytes consumed from @p bodyPart. + * + * @return true if the body is complete and valid, false if more data is needed or if an error + * occurred. + * + * @ingroup http + */ bool parseReqBody(HttpRequest& req, const std::string& bodyPart, std::size_t clientMaxBodySize, int& errorCode, std::size_t& consumedBytes) { const std::string& te = req.getHeader("TRANSFER-ENCODING"); + // 1) Handle chunked transfer encoding if (te == "chunked") { if (!isChunkedBodyComplete(bodyPart)) { + // Body not yet complete → wait for more data Logger::logFrom(LogLevel::INFO, "HttpRequestParser", "Incomplete chunked body, waiting for more"); errorCode = 0; return false; } + // Fully received → delegate to chunked handler chunkReqHandler(req, bodyPart, clientMaxBodySize, errorCode, consumedBytes); return (errorCode == 0); } + // 2) Handle fixed-length bodies std::size_t len = req.getContentLength(); - // 1) If declared length itself is too large, reject immediately + // Reject if declared length exceeds configured max if (len > 0 && len >= clientMaxBodySize) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Exceeded max body size in non-chunked transfer"); - errorCode = 413; + errorCode = 413; // Payload Too Large consumedBytes += bodyPart.size(); return false; } - // 2) If we haven’t received *at least* len bytes yet, wait for more + // Not enough data yet → wait for more if (bodyPart.size() < len) { - // Logger::logFrom(LogLevel::INFO, "HttpRequestParser", "Incomplete body, waiting for - // more"); errorCode = 0; return false; } - // 3) If there are more than len bytes in bodyPart, check for pipelining + // 3) Detect pipelined requests if (bodyPart.size() > len) { // Look at the bytes immediately after the declared body std::string_view remainder(bodyPart.c_str() + len, bodyPart.size() - len); - // Use the helper instead of a lambda if (isLikelyStartLine(std::string(remainder))) { - // Treat extra bytes as the next request → consume exactly len bytes + // Treat as pipelined request → consume only declared body req.setBody(bodyPart.substr(0, len)); consumedBytes += len; errorCode = 0; return true; } - // Otherwise, it’s a real mismatch + // Extra data is invalid → reject Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Content-Length mismatch: body too long"); - errorCode = 400; + errorCode = 400; // Bad Request consumedBytes += bodyPart.size(); return false; } - // 4) Exactly len bytes → accept + // 4) Exact match → accept body req.setBody(bodyPart.substr(0, len)); consumedBytes += len; errorCode = 0; return true; } +/** + * @brief Performs final validation of a parsed HTTP request. + * + * @details This function applies protocol-level validation rules after the request line + * and headers have been parsed. Specifically: + * - Ensures the HTTP method is supported (GET, POST, DELETE). + * - Validates the normalized request path (must not escape root). + * - Checks the `Connection` header (only "keep-alive" or "close" are accepted). + * - For POST requests: + * * Requires either a `Content-Length` or `Transfer-Encoding` header. + * * Requires a valid `Content-Type` header, chosen from a limited set + * (supports parameters like `multipart/form-data; boundary=...`). + * + * @param req The @ref HttpRequest object to validate. + * @param errorCode Reference set to HTTP error code if validation fails. + * + * @return true if the request is valid, false otherwise (with @p errorCode set). + * + * @ingroup http + */ bool validateReq(HttpRequest& req, int& errorCode) { + // Allowed methods static const std::set methods = {"GET", "POST", "DELETE"}; if (!methods.count(req.getMethod())) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Method Not Allowed: " + req.getMethod()); - errorCode = 405; // // Method Not Allowed !!!!!!!!!!!!!!!!! It has to be 501, I changed only - // for passing tests + errorCode = 405; // NOTE: semantically could be 501, but 405 is used for tests return false; } + // Path validation if (!isValidPath(req.getPath())) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Invalid request path: " + req.getPath()); - errorCode = 403; + errorCode = 403; // Forbidden return false; } @@ -616,31 +961,34 @@ bool validateReq(HttpRequest& req, int& errorCode) { if (!conn.empty()) { std::string lower = toLower(conn); if (lower != "keep-alive" && lower != "close") { + // Normalize invalid values to "close" but still reject req.setHeader("CONNECTION", "close"); Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Invalid Connection header value: " + conn); - errorCode = 400; + errorCode = 400; // Bad Request return false; } } - // POST → must have supported Content-Type + // POST-specific checks if (req.getMethod() == "POST") { - + // Must declare either Content-Length or Transfer-Encoding if (req.getContentLength() == 0 && req.getHeader("TRANSFER-ENCODING").empty()) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "POST without Content-Length or Transfer-Encoding"); - errorCode = 411; + errorCode = 411; // Length Required return false; } + // Require Content-Type std::string ct = req.getHeader("CONTENT-TYPE"); if (ct.empty()) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Missing Content-Type for POST"); - errorCode = 415; + errorCode = 415; // Unsupported Media Type return false; } + // Allowed Content-Types (prefix matching for params like boundary=...) static const std::set validTypes = {"application/x-www-form-urlencoded", "multipart/form-data", "text/plain", @@ -648,7 +996,6 @@ bool validateReq(HttpRequest& req, int& errorCode) { "application/octet-stream", "test/file"}; - // Accept type with optional parameters (e.g. multipart/form-data; boundary=...) std::string lowerCt = toLower(ct); bool valid = false; for (const std::string& type : validTypes) { @@ -661,7 +1008,7 @@ bool validateReq(HttpRequest& req, int& errorCode) { if (!valid) { Logger::logFrom(LogLevel::ERROR, "HttpRequestParser", "Unsupported Content-Type for POST: " + ct); - errorCode = 415; + errorCode = 415; // Unsupported Media Type return false; } } @@ -669,99 +1016,99 @@ bool validateReq(HttpRequest& req, int& errorCode) { return true; } +/** + * @brief Parses a raw HTTP request buffer into an @ref HttpRequest object. + * + * @details This is the main entry point for HTTP request parsing. It coordinates + * header parsing, body parsing, and final validation. The workflow is: + * + * 1. Locate the end of the header section (`\r\n\r\n`). + * 2. Split the buffer into a header block and a body block. + * 3. Parse the request line and headers via @ref parseReqHeader. + * 4. Validate the parsed request with @ref validateReq. + * 5. If the method does not expect a body (e.g. GET/DELETE), exit early. + * - Detect and support HTTP pipelining (multiple requests in one buffer). + * 6. Otherwise, parse the body with @ref parseReqBody. + * 7. On success, return a fully populated @ref HttpRequest. + * + * Error handling: + * - If parsing fails, the function sets @p errorCode to the appropriate + * HTTP status code (400, 405, 411, 413, 414, 415, 505, etc.). + * - @p consumedBytes is always updated to reflect how much of the buffer + * was processed (even if parsing fails). + * + * @param req The @ref HttpRequest to populate. + * @param buffer Raw input buffer containing HTTP request(s). + * @param serversOnPort List of available @ref Server objects for virtual-host matching. + * @param errorCode Output parameter set to HTTP error code on failure. + * @param consumedBytes Output parameter set to the number of bytes successfully consumed. + * + * @return true if the request was successfully parsed, false otherwise. + * + * @ingroup http + */ bool HttpRequestParser::parse(HttpRequest& req, const std::string& buffer, std::vector serversOnPort, int& errorCode, std::size_t& consumedBytes) { - - // 1) Find end of header block: "\r\n\r\n" + // 1) Find end of header block std::size_t headerEndPos = buffer.find("\r\n\r\n"); if (headerEndPos == std::string::npos) { Logger::logFrom(LogLevel::INFO, "HttpRequestParser", "Incomplete header, waiting for more"); - errorCode = 0; + errorCode = 0; // Not an error, just incomplete return false; } std::size_t headerLen = headerEndPos + 4; - // 2) Split into headerPart and remainder + // 2) Split buffer into header and body std::string headerPart = buffer.substr(0, headerLen); std::string bodyPart = buffer.substr(headerLen); consumedBytes = headerLen; - // 3) Parse request‐line + headers + // 3) Parse request line + headers std::size_t clientMaxBodySize; if (!parseReqHeader(req, headerPart, errorCode, serversOnPort, clientMaxBodySize)) { - // parseReqHeader sets errorCode (e.g. 400, 414, 505) - return false; + return false; // parseReqHeader sets errorCode } - // 4) Validate method, path, and mandatory headers + // 4) Validate request semantics if (!validateReq(req, errorCode)) { - // validateReq sets errorCode (e.g. 405, 403, 411, 415) - return false; + return false; // validateReq sets errorCode } - //-------------------------------------------------------- - // Kha: Temporary commented out the following code. The same logic can be found in - // parseReqHeader and - //-------------------------------------------------------- - // std::string hostHeader = req.getHeader("Host"); - // int bestMatch = 0; - - // for (size_t i = 0; i < serversOnPort.size(); ++i) { - // const Server& srv = serversOnPort[i]; - // const std::vector& names = srv.getServerNames(); - // if (std::find(names.begin(), names.end(), hostHeader) != names.end()) { - // bestMatch = i; - // break; - // } - // } - - // req.setMatchedServerIndex(bestMatch); - // clientMaxBodySize = - // serversOnPort[bestMatch].getClientMaxBodySize(); // use the matched server's body limit - - // 5) Early exit for methods that do not expect a body (GET, DELETE) + // 5) Early exit for bodyless methods (GET, DELETE) std::string method = req.getMethod(); if (method == "GET") { - // If there is no extra data at all, this is a clean GET/DELETE: if (buffer.size() == headerLen) { + // Clean GET: no extra data consumedBytes = headerLen; errorCode = 0; return true; } - // Otherwise, there *are* extra bytes. Check if those extra bytes look like - // the start of a new request‐line (i.e. pipelined). A quick heuristic is: - // ‣ first byte must be 'A'–'Z' (valid HTTP method token) - // ‣ then we expect a space somewhere after it. - // Here we'll just check that the very next character is an uppercase letter, - // which is enough to catch "GET /something HTTP/1.1…" or "POST ..." in practice. + // Check if extra data looks like a new request (pipelining) char next = buffer[headerLen]; if (next >= 'A' && next <= 'Z') { - // This almost certainly is the start of another request‐line, - // so we treat it as a pipelined request, not as a body. + // Treat as pipelined request consumedBytes = headerLen; errorCode = 0; return true; } - // FALLBACK: ignore any “body” on GET and treat as a clean GET - consumedBytes = buffer.size(); // consume headers + body, but ignore the body + // Otherwise, ignore any stray "body" + consumedBytes = buffer.size(); errorCode = 0; return true; } - // 6) For POST (and other body‐bearing methods), delegate to parseReqBody + // 6) Parse body for methods that require it (e.g. POST) std::size_t bodyConsumed = 0; bool bodyOk = parseReqBody(req, bodyPart, clientMaxBodySize, errorCode, bodyConsumed); - if (!bodyOk) { - // parseReqBody sets errorCode (400, 413, etc.) and bodyConsumed consumedBytes = headerLen + bodyConsumed; - return false; + return false; // parseReqBody sets errorCode } - // 7) Body successfully parsed + // 7) Success consumedBytes = headerLen + bodyConsumed; errorCode = 0; return true; diff --git a/src/http/HttpResponse.cpp b/src/http/HttpResponse.cpp index 3bf87c17..eb3b071f 100644 --- a/src/http/HttpResponse.cpp +++ b/src/http/HttpResponse.cpp @@ -3,53 +3,105 @@ /* ::: :::::::: */ /* HttpResponse.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/11 10:56:54 by irychkov #+# #+# */ -/* Updated: 2025/08/18 12:05:43 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 09:17:55 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file HttpResponse.cpp + * @brief Implements the HttpResponse class. + * + * @details Defines all methods of @ref HttpResponse, including setters for + * status, headers, body, file path, and CGI metadata; getters and + * queries for response properties; and utilities such as + * `toHttpString()` for serializing the response into HTTP wire format. + * + * The implementation also includes connection management logic + * (`isConnectionClose`) that follows HTTP/1.0 and HTTP/1.1 semantics, + * as well as RFC-friendly handling for certain error status codes. + * + * Instances of this class are typically produced by + * @ref ResponseBuilder and HTTP method handlers, then written back + * to the client by the networking layer. + * + * @ingroup http + */ + #include "http/HttpResponse.hpp" #include // for transform #include // for tolower +#include // for set #include // for basic_ostream, operator<<, basic_stringstream #include // for pair -#include // for set -HttpResponse ::HttpResponse(void) { +//=== Construction & Special Members ===================================== + +/** + * @brief Constructs a default HttpResponse with status `200 OK`. + * + * @details Initializes response with status code 200, message `"OK"`, + * and a CGI body offset of `0`. Other fields are left empty + * until explicitly set. + * + * @ingroup http + */ +HttpResponse::HttpResponse(void) { _status_code = 200; _status_message = "OK"; _cgiBodyOffset = 0; } -HttpResponse ::~HttpResponse(void) { +/** + * @brief Destroys the HttpResponse. + * + * @details Provided for completeness; does not manage external resources. + * + * @ingroup http + */ +HttpResponse::~HttpResponse(void) { } -void HttpResponse ::setStatus(int code, const std::string& message) { +//=== Mutators (Setters) ================================================== + +/** + * @brief Sets the status code and reason phrase of the response. + * @param code Numeric status code (e.g., 404). + * @param message Reason phrase (e.g., "Not Found"). + * @ingroup http + */ +void HttpResponse::setStatus(int code, const std::string& message) { _status_code = code; _status_message = message; } -void HttpResponse ::setHeader(const std::string& key, const std::string& value) { +/** + * @brief Adds or replaces a header in the response. + * @param key Header name. + * @param value Header value. + * @ingroup http + */ +void HttpResponse::setHeader(const std::string& key, const std::string& value) { _headers[key] = value; } -void HttpResponse ::setBody(const std::string& body) { +/** + * @brief Sets the body of the response. + * @param body Response payload as a string. + * @ingroup http + */ +void HttpResponse::setBody(const std::string& body) { _body = body; } /** - * @brief Stores HTTP version and Connection header from the request. - * - * @details This metadata is required to determine whether the connection should be - * kept alive or closed after the response is sent. The HTTP version and the client's - * `Connection` header together define the default persistence behavior according to - * RFC 7230 §6.3. This method must be called before sending the response to ensure - * proper behavior in `isConnectionClose()`. + * @brief Stores HTTP version and connection header metadata. * - * @param version The HTTP version from the client's request (e.g., "HTTP/1.1"). - * @param conn The value of the Connection header from the client's request. + * @param version HTTP version string (e.g., "HTTP/1.1"). + * @param conn Raw value of the "Connection" header from the request. + * @ingroup http */ void HttpResponse::setRequestMeta(const std::string& version, const std::string& conn) { _http_version = version; @@ -57,53 +109,44 @@ void HttpResponse::setRequestMeta(const std::string& version, const std::string& } /** - * @brief Determines whether the connection should be closed after the response. - * - * @details This logic properly accounts for the HTTP version and the client's - * Connection header to implement persistent connections correctly. - * - * Unlike the previous version, which only checked if `Connection: close` was present, - * this method enforces the default semantics of each protocol version: - * - HTTP/1.1 assumes keep-alive unless explicitly closed. - * - HTTP/1.0 assumes close unless explicitly kept alive. - * - * This behavior is compliant with RFC 7230 §6.3 and avoids premature connection - * termination when clients do not send a `Connection` header. - * - * @return `true` if the connection should be closed, `false` to keep it alive. + * @brief Sets a file path for file-backed responses. + * @param path Filesystem path to serve. + * @ingroup http */ -bool HttpResponse::isConnectionClose() const { - // 1) If the response explicitly sets Connection, honor it. - std::map::const_iterator hit = _headers.find("Connection"); - if (hit != _headers.end()) { - std::string v = hit->second; - std::transform(v.begin(), v.end(), v.begin(), ::tolower); - return v == "close"; - } - - // 2) Certain status codes must close (RFC-friendly behavior). - static const std::set force_close_codes = {400, 408, 413, 500}; - if (force_close_codes.count(_status_code)) - return true; - - // 3) Fall back to protocol semantics using the request metadata. - std::string conn = _connection_header; - std::transform(conn.begin(), conn.end(), conn.begin(), ::tolower); +void HttpResponse::setFilePath(const std::string& path) { + _file_path = path; +} - if (_http_version == "HTTP/1.1") { - // Keep-alive by default unless client asked to close - return conn == "close"; - } - if (_http_version == "HTTP/1.0") { - // Close by default unless client asked to keep-alive - return conn != "keep-alive"; - } +/** + * @brief Sets the byte offset where CGI body begins in its temp file. + * @param offset Offset in bytes. + * @ingroup http + */ +void HttpResponse::setCgiBodyOffset(std::streamsize offset) { + _cgiBodyOffset = offset; +} - // Unknown version: safest is to close. - return true; +/** + * @brief Sets the temporary file path produced by CGI. + * @param temp_file Path to the CGI temp file. + * @ingroup http + */ +void HttpResponse::setCgiTempFile(const std::string& temp_file) { + _cgi_temp_file = temp_file; } -std::string HttpResponse ::toHttpString(void) const { +//=== Queries (Getters) =================================================== + +/** + * @brief Serializes the response into HTTP wire format. + * + * @details Produces a string containing the status line, headers, + * `Content-Length`, and body, ready for transmission. + * + * @return HTTP response string. + * @ingroup http + */ +std::string HttpResponse::toHttpString(void) const { std::stringstream ss; ss << "HTTP/1.1 " << _status_code << " " << _status_message << "\r\n"; @@ -119,45 +162,127 @@ std::string HttpResponse ::toHttpString(void) const { return ss.str(); } -void HttpResponse::setFilePath(const std::string& path) { - _file_path = path; -} - +/** + * @brief Returns the file path of the response, if any. + * @ingroup http + */ const std::string& HttpResponse::getFilePath() const { return _file_path; } -bool HttpResponse::isFileResponse() const { - return !_file_path.empty(); -} - +/** + * @brief Returns the numeric status code. + * @ingroup http + */ int HttpResponse::getStatusCode(void) const { return _status_code; } +/** + * @brief Returns the reason phrase of the response. + * @ingroup http + */ const std::string& HttpResponse::getStatusMessage(void) const { return _status_message; } +/** + * @brief Returns the headers of the response. + * @ingroup http + */ const std::map& HttpResponse::getHeaders(void) const { return _headers; } -void HttpResponse::setCgiBodyOffset(std::streamsize offset) { - _cgiBodyOffset = offset; -} - +/** + * @brief Returns the CGI body offset. + * @ingroup http + */ std::streamsize HttpResponse::getCgiBodyOffset() const { return _cgiBodyOffset; } -void HttpResponse::setCgiTempFile(const std::string& temp_file) { - _cgi_temp_file = temp_file; -} +/** + * @brief Returns the CGI temporary file path. + * @ingroup http + */ const std::string& HttpResponse::getCgiTempFile() const { return _cgi_temp_file; } +//=== Predicates & Utilities ============================================== + +/** + * @brief Determines whether the server should close the connection + * after sending this response. + * + * @details The decision follows HTTP semantics and some defensive RFC-inspired + * rules: + * 1. **Explicit Connection header in the response**: + * If present, its value takes precedence (case-insensitive). + * - `"Connection: close"` → close the connection. + * - Any other value → keep open, unless other rules apply. + * + * 2. **Force-close status codes**: + * Certain error codes (400, 408, 413, 500) always require closing + * the connection to remain protocol-compliant and prevent reuse + * of a potentially invalid connection. + * + * 3. **Fallback to request metadata**: + * - For HTTP/1.1: keep-alive by default, unless request said `"close"`. + * - For HTTP/1.0: close by default, unless request said `"keep-alive"`. + * + * 4. **Unknown HTTP version**: + * Defaults to closing the connection for safety. + * + * @return `true` if the server must close the TCP connection, + * `false` if it can be kept alive. + * + * @ingroup http + */ +bool HttpResponse::isConnectionClose() const { + // 1) If the response explicitly sets Connection, honor it. + std::map::const_iterator hit = _headers.find("Connection"); + if (hit != _headers.end()) { + std::string v = hit->second; + std::transform(v.begin(), v.end(), v.begin(), ::tolower); + return v == "close"; + } + + // 2) Certain status codes must close (RFC-friendly behavior). + static const std::set force_close_codes = {400, 408, 413, 500}; + if (force_close_codes.count(_status_code)) + return true; + + // 3) Fall back to protocol semantics using the request metadata. + std::string conn = _connection_header; + std::transform(conn.begin(), conn.end(), conn.begin(), ::tolower); + + if (_http_version == "HTTP/1.1") { + // Keep-alive by default unless client asked to close + return conn == "close"; + } + if (_http_version == "HTTP/1.0") { + // Close by default unless client asked to keep-alive + return conn != "keep-alive"; + } + + // Unknown version: safest is to close. + return true; +} + +/** + * @brief Returns true if the response serves a file from disk. + * @ingroup http + */ +bool HttpResponse::isFileResponse() const { + return !_file_path.empty(); +} + +/** + * @brief Returns true if a CGI temp file is set. + * @ingroup http + */ bool HttpResponse::isCgiTempFile() const { return !_cgi_temp_file.empty(); } diff --git a/src/http/handleCgi.cpp b/src/http/handleCgi.cpp index bde0e761..f81d30be 100644 --- a/src/http/handleCgi.cpp +++ b/src/http/handleCgi.cpp @@ -3,13 +3,46 @@ /* ::: :::::::: */ /* handleCgi.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/24 12:23:37 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:19:24 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 11:35:35 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file handleCgi.cpp + * @brief Implements CGI (Common Gateway Interface) request handling. + * + * @details + * This module provides full lifecycle management of CGI processes: + * - Build CGI environment variables from the incoming `HttpRequest`. + * - Create temporary input/output files to stream request/response bodies. + * - Fork and exec the target CGI script with the correct interpreter. + * - Collect, parse, and forward CGI output as an `HttpResponse`. + * - Handle errors, timeouts, and cleanup of child processes. + * + * ### Workflow + * 1. **initCgiProcess**: validate script, create temp files, fork child, exec interpreter. + * 2. **Child process**: stdin → request body, stdout → temp output, `execve()` CGI script. + * 3. **Parent process**: monitor PID, wait for completion, enforce timeouts. + * 4. **finalizeCgi**: parse CGI headers (Status, Content-Type), build `HttpResponse`. + * 5. **cleanup**: delete temp files and reset `CgiProcess` state. + * + * ### Key Functions + * - `prepareEnv` → Build CGI environment (SCRIPT_NAME, PATH_INFO, QUERY_STRING, etc.). + * - `prepareCgiTempFiles` → Write request body to temp file, create output file. + * - `setupAndRunCgiChild` → Fork/exec child process safely. + * - `finalizeCgi` → Read output, parse headers, and generate HTTP response. + * - `errorOnCgi`, `cleanupCgi`, `tryTerminateCgi` → Robust process management. + * + * ### Limitations + * - Blocking I/O (temp files, fork/exec). + * - Only basic CGI spec (no FastCGI, no async streaming). + * + * @ingroup request_handler + */ + #include "http/handleCgi.hpp" #include "core/Location.hpp" // for Location #include "core/Server.hpp" // for Server @@ -38,21 +71,47 @@ namespace { +/** + * @brief Build the environment variables required for a CGI script execution. + * + * @details + * Constructs a `std::vector` of `KEY=VALUE` pairs following the CGI/1.1 + * specification. The environment includes: + * - Standard CGI variables (`SCRIPT_NAME`, `PATH_INFO`, `REQUEST_METHOD`, etc.). + * - Server metadata (`SERVER_NAME`, `SERVER_PORT`, `SERVER_PROTOCOL`, etc.). + * - Request headers, prefixed with `HTTP_` and normalized (uppercased, `-` → `_`). + * - Content metadata (`CONTENT_LENGTH`, `CONTENT_TYPE`). + * + * Special handling: + * - Computes `PATH_INFO` if the request URI extends beyond the script’s URI. + * - Sets `REDIRECT_STATUS=200` for PHP compatibility. + * + * @param req Incoming HTTP request. + * @param server Server configuration (name, port, defaults). + * @param loc Matched location block (provides root, CGI settings). + * @param scriptPath Absolute filesystem path of the CGI script to run. + * + * @return A vector of strings representing the CGI environment, suitable for `execve()`. + * + * @ingroup request_handler + */ std::vector prepareEnv(const HttpRequest& req, const Server& server, const Location& loc, const std::string& scriptPath) { std::vector env; auto set = [&](const std::string& k, const std::string& v) { env.push_back(k + "=" + v); }; + // Normalize paths and extract script filename std::string requestPath = normalizePath(req.getPath()); std::string locationPath = normalizePath(loc.getPath()); std::string scriptName = std::filesystem::path(scriptPath).filename().string(); - // SCRIPT_NAME = URL path to the script (/directory/youpi.bla) + // Build SCRIPT_NAME and PATH_INFO std::string scriptUri = locationPath; if (!scriptUri.empty() && scriptUri.back() != '/') scriptUri += "/"; scriptUri += scriptName; + // Extract extra path after the script (PATH_INFO) std::string pathInfo; if (requestPath.size() > scriptUri.size() && requestPath.compare(0, scriptUri.size(), scriptUri) == 0) { @@ -60,30 +119,38 @@ std::vector prepareEnv(const HttpRequest& req, const Server& server if (!pathInfo.empty() && pathInfo[0] != '/') pathInfo = "/" + pathInfo; } + + // Core CGI variables (script + path info) set("SCRIPT_NAME", req.getPath()); if (pathInfo.empty()) { set("PATH_INFO", req.getPath()); } else { set("PATH_INFO", pathInfo); } + + // Request metadata set("REQUEST_METHOD", req.getMethod()); set("QUERY_STRING", req.getQuery()); set("CONTENT_LENGTH", std::to_string(req.getContentLength())); if (!req.getHeader("Content-Type").empty()) set("CONTENT_TYPE", req.getHeader("Content-Type")); + // Server and protocol metadata set("SERVER_PROTOCOL", "HTTP/1.1"); set("GATEWAY_INTERFACE", "CGI/1.1"); set("SERVER_SOFTWARE", "webserv/1.0"); set("DOCUMENT_ROOT", loc.getRoot()); set("SERVER_NAME", server.getDefaultServerName()); set("SERVER_PORT", std::to_string(server.getPort())); + + // Script execution context set("PATH_TRANSLATED", scriptPath); - set("REMOTE_ADDR", "127.0.0.1"); + set("REMOTE_ADDR", "127.0.0.1"); // currently hardcoded set("REQUEST_URI", req.getPath()); set("SCRIPT_FILENAME", scriptPath); - set("REDIRECT_STATUS", "200"); + set("REDIRECT_STATUS", "200"); // required by some CGI implementations (e.g. PHP) + // Forward all HTTP headers as CGI variables (HTTP_FOO_BAR format) for (const auto& [key, value] : req.getHeaders()) { std::string envKey = "HTTP_" + toUpper(key); std::replace(envKey.begin(), envKey.end(), '-', '_'); @@ -93,7 +160,21 @@ std::vector prepareEnv(const HttpRequest& req, const Server& server return env; } -// Helper to convert vector → vector +/** + * @brief Convert a vector of strings into a null-terminated `char*` array. + * + * @details + * Prepares data for system calls like `execve()`, which require `char* argv[]` + * or `char* envp[]` format. Each string’s `c_str()` is cast away from `const` + * (safe here because the lifetime is managed by the original `std::string` + * vector). The returned array is null-terminated as required by POSIX. + * + * @param vs Input vector of strings. + * @return A `std::vector` containing pointers to each string, ending with `nullptr`. + * + * @note The caller must ensure that the original `std::vector` outlives + * this array, since the pointers are non-owning. + */ std::vector toCharPtrArray(const std::vector& vs) { std::vector out; out.reserve(vs.size() + 1); @@ -103,6 +184,26 @@ std::vector toCharPtrArray(const std::vector& vs) { return out; } +/** + * @brief Read up to a maximum number of bytes from a file stream. + * + * @details + * Allocates a temporary buffer of size `maxBytes`, attempts to read from the + * given input file stream, and returns both: + * - The bytes read, wrapped into a `std::string`. + * - The actual count of bytes read (`gcount()`). + * + * This is typically used to capture the initial chunk of a CGI output file + * in order to parse headers before streaming the rest of the response. + * + * @param file Open input file stream (must be in binary mode). + * @param maxBytes Maximum number of bytes to read from the stream. + * + * @return A pair: `(stringData, bytesRead)`. + * + * @note If fewer than `maxBytes` are available, only the available bytes are read. + * @warning The file’s read position will advance by the number of bytes consumed. + */ std::pair readInitialOutput(std::ifstream& file, size_t maxBytes) { std::vector buffer(maxBytes); file.read(buffer.data(), maxBytes); @@ -110,6 +211,22 @@ std::pair readInitialOutput(std::ifstream& file, s return {std::string(buffer.data(), bytesRead), bytesRead}; } +/** + * @brief Locate the end of the HTTP header section in a CGI response. + * + * @details + * Searches for the standard header delimiter: + * - First tries `"\r\n\r\n"` (CRLF-terminated headers). + * - Falls back to `"\n\n"` if CRLF is not found. + * + * Updates `delimiterLength` with the length of the matched delimiter + * (either 4 or 2). + * + * @param data Input string containing CGI response data. + * @param delimiterLength Output; set to the delimiter size (4 or 2) if found. + * + * @return The position of the delimiter if found, otherwise `std::nullopt`. + */ std::optional findHeaderDelimiter(const std::string& data, size_t& delimiterLength) { size_t pos = data.find("\r\n\r\n"); delimiterLength = 4; @@ -123,6 +240,23 @@ std::optional findHeaderDelimiter(const std::string& data, size_t& delim return std::nullopt; } +/** + * @brief Parse CGI response headers for status code and content type. + * + * @details + * Iterates over each line in the provided header block: + * - Extracts the value of `Content-Type:` if present. + * - Extracts the numeric value of `Status:` if present (default = 200). + * - If parsing fails, logs a warning and falls back to `500`. + * + * Other headers are ignored by this function. + * + * @param header Raw header section of the CGI output (up to the header delimiter). + * + * @return A pair `(statusCode, contentType)`: + * - `statusCode` → HTTP status (int). + * - `contentType` → MIME type string (empty if not provided). + */ std::pair parseHeaders(const std::string& header) { std::istringstream stream(header); std::string line, contentType = ""; @@ -157,6 +291,23 @@ bool validateCgiScript(const std::filesystem::path& path, int& errorCode) { return true; } +/** + * @brief Validate that a CGI script exists and is executable. + * + * @details + * Performs two checks on the given filesystem path: + * 1. Verifies the path refers to a regular file (`isFile`). + * - If not, sets `errorCode = 404` (Not Found). + * 2. Verifies the file has execute permissions (`access(..., X_OK)`). + * - If not, sets `errorCode = 403` (Forbidden). + * + * Logs errors if validation fails. + * + * @param path Filesystem path to the CGI script. + * @param errorCode Output; set to `404` or `403` on failure. + * + * @return true if the script exists and is executable, false otherwise. + */ bool prepareCgiTempFiles(CgiProcess& cgi, const HttpRequest& req, int& bodyFd, int& outputFd) { static unsigned counter = 0; cgi.input_path = make_temp_name("webserv_in", counter); @@ -186,6 +337,33 @@ bool prepareCgiTempFiles(CgiProcess& cgi, const HttpRequest& req, int& bodyFd, i return true; } +/** + * @brief Setup file descriptors, environment, and execute a CGI script in the child process. + * + * @details + * This function is called after `fork()` in the CGI child. It: + * 1. Redirects `stdin` from the temporary body file (`dup2(body_fd, STDIN_FILENO)`). + * 2. Redirects `stdout` to the temporary output file (`dup2(output_fd, STDOUT_FILENO)`). + * 3. Closes all unrelated file descriptors to avoid leaks. + * 4. Resolves the CGI interpreter from the script extension (if configured). + * 5. Builds the `argv` array for `execve()` (`[interpreter?, scriptPath, NULL]`). + * 6. Prepares the CGI environment variables with `prepareEnv()`. + * 7. Changes working directory to the script’s directory for relative file access. + * 8. Calls `execve()` to replace the process image with the CGI script. + * + * If any step fails, logs the error and terminates the child with `exit(1)`. + * + * @param cgi CGI process metadata (script path, temp files). + * @param body_fd File descriptor for the request body (temp input file). + * @param output_fd File descriptor for CGI output (temp output file). + * @param req Incoming HTTP request (provides headers, body, method). + * @param server Server configuration (name, port, etc.). + * @param loc Location block configuration (provides CGI interpreter). + * @param poll_fds Active poll descriptors (closed to avoid leakage in the child). + * + * @warning Must only be called in the child process after `fork()`. + * @note On success, this function never returns (replaced by `execve()`). + */ void setupAndRunCgiChild(const CgiProcess& cgi, int body_fd, int output_fd, const HttpRequest& req, const Server& server, const Location& loc, const std::vector& poll_fds) { @@ -248,6 +426,20 @@ void setupAndRunCgiChild(const CgiProcess& cgi, int body_fd, int output_fd, cons namespace CGI { +/** + * @brief Remove a temporary CGI file and log an error if the deletion fails. + * + * @details + * Attempts to delete the given file path using `std::filesystem::remove`. + * - If `path` is empty, nothing is done. + * - If removal fails, logs an error including the context label and + * the system-provided error message. + * + * Commonly used for cleaning up CGI input/output temporary files. + * + * @param path Filesystem path of the file to remove. + * @param context Description of the file’s role (e.g. "input temp file"). + */ void unlinkWithErrorLog(const std::string& path, const std::string& context) { if (!path.empty()) { std::error_code ec; @@ -259,6 +451,33 @@ void unlinkWithErrorLog(const std::string& path, const std::string& context) { } } +/** + * @brief Initialize and start a CGI process for a given request. + * + * @details + * - Resolves the script path from the request URI and validates it with `validateCgiScript`. + * - Prepares temporary input/output files (`prepareCgiTempFiles`): + * - Writes the request body to a temp input file. + * - Opens a temp output file for capturing CGI output. + * - Forks the process: + * - In the child → calls `setupAndRunCgiChild()` (redirects FDs, sets env, execve). + * - In the parent → closes temp file descriptors and stores process metadata. + * - On error (invalid script, temp file failure, fork failure), sets `errorCode` + * appropriately and returns `false`. + * + * @param cgi Reference to a `CgiProcess` struct (will be populated with PID, paths, + * timestamps). + * @param req Incoming HTTP request (provides path, headers, body). + * @param server Server configuration (name, port, etc.). + * @param loc Location block configuration (provides root, CGI settings). + * @param poll_fds Current poll file descriptors (passed to child for cleanup). + * @param errorCode Output parameter; set to HTTP-like error codes (`404`, `403`, `500`) on failure. + * + * @return true if the CGI process was successfully started, false otherwise. + * + * @note On success, the child process never returns (replaced by `execve()`). + * @warning Must be followed by later cleanup (`finalizeCgi`, `errorOnCgi`, or `cleanupCgi`). + */ bool initCgiProcess(CgiProcess& cgi, const HttpRequest& req, const Server& server, const Location& loc, const std::vector& poll_fds, int& errorCode) { cgi.last_activity = getCurrentTime(); @@ -294,6 +513,32 @@ bool initCgiProcess(CgiProcess& cgi, const HttpRequest& req, const Server& serve return true; } +/** + * @brief Finalize a CGI process by parsing its output and building an HTTP response. + * + * @details + * - Opens the CGI output file produced by the child process. + * - Scans up to the first 9KB (`MAX_HEADER_SCAN`) to locate the HTTP header delimiter. + * - If not found, logs an error and returns a `500` response. + * - Extracts and parses CGI headers: + * - `Status:` → HTTP status code (default = 200). + * - `Content-Type:` → MIME type (optional). + * - Computes the offset where the body starts and its size. + * - Builds an `HttpResponse` via `ResponseBuilder::generateSuccessFile`, which streams + * the body directly from the output file. + * - Marks the response with the CGI temp file path so it can be cleaned up later. + * + * @param cgi Reference to the `CgiProcess` containing CGI state (output path, timestamps). + * @param server Server configuration (used for error responses). + * @param req Original HTTP request (used for logging and response metadata). + * + * @return An `HttpResponse` containing either: + * - The CGI result (with headers + body). + * - Or an error response (500) if parsing/reading failed. + * + * @note Only the first 9KB of output is scanned for headers. Larger headers are unsupported. + * @warning Caller is responsible for eventually cleaning up the CGI temp file. + */ HttpResponse finalizeCgi(CgiProcess& cgi, const Server& server, const HttpRequest& req) { cgi.last_activity = getCurrentTime(); @@ -331,6 +576,23 @@ HttpResponse finalizeCgi(CgiProcess& cgi, const Server& server, const HttpReques return resp; } +/** + * @brief Forcefully terminate a CGI process and clean up its resources. + * + * @details + * - Logs the intent to kill the CGI process. + * - Sends `SIGKILL` to the child process using its PID. + * - Calls `waitpid()` to reap the process and avoid zombies. + * - Logs an error if `waitpid` fails. + * - Deletes temporary input/output files via `unlinkWithErrorLog`. + * - Resets the `CgiProcess` state (`pid`, timestamps, paths). + * + * Typically invoked when a CGI process times out or becomes unresponsive. + * + * @param cgi Reference to the CGI process metadata to be cleaned up. + * + * @warning This is a hard kill (`SIGKILL`) — no graceful shutdown of the CGI script. + */ void errorOnCgi(CgiProcess& cgi) { Logger::logFrom(LogLevel::ERROR, "CGI", "Killing CGI process with PID: " + std::to_string(cgi.pid)); @@ -350,6 +612,19 @@ void errorOnCgi(CgiProcess& cgi) { cgi.script_path.clear(); } +/** + * @brief Clean up resources of a finished CGI process. + * + * @details + * - Removes the temporary input file associated with the CGI process. + * - Resets process metadata (`pid`, timestamps, input path). + * - Does not touch the CGI output file (which may still be in use + * by the HTTP response). + * + * Typically called after `finalizeCgi()` has built the response. + * + * @param cgi Reference to the CGI process metadata to be reset. + */ void cleanupCgi(CgiProcess& cgi) { unlinkWithErrorLog(cgi.input_path, "input temp file"); cgi.pid = -1; @@ -358,6 +633,23 @@ void cleanupCgi(CgiProcess& cgi) { cgi.input_path.clear(); } +/** + * @brief Check whether a CGI process has terminated, and reap it if so. + * + * @details + * - Calls `waitpid(pid, &status, WNOHANG)`: + * - Returns `false` if the process is still running (`result == 0`). + * - Returns `true` if the process has terminated (or if `waitpid` fails). + * - Logs an error if `waitpid` itself fails (`result == -1`). + * + * This function is non-blocking and can be safely called in the event loop + * to periodically check if a CGI process is done. + * + * @param cgi Reference to the CGI process metadata. + * + * @return `true` if the CGI process has terminated (or an error occurred), + * `false` if it is still running. + */ bool tryTerminateCgi(CgiProcess& cgi) { int status; pid_t result = waitpid(cgi.pid, &status, WNOHANG); diff --git a/src/http/methodsHandler/handleDelete.cpp b/src/http/methodsHandler/handleDelete.cpp index 7c32b5fa..7352331d 100644 --- a/src/http/methodsHandler/handleDelete.cpp +++ b/src/http/methodsHandler/handleDelete.cpp @@ -3,13 +3,38 @@ /* ::: :::::::: */ /* handleDelete.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/21 15:06:07 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:22:25 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 10:23:19 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file methodsHandler.hpp + * @brief Declares handlers for supported HTTP request methods. + * + * @details This header exposes the top-level functions used by the + * request router to process method-specific logic: + * - @ref handleGet: serve static files, run CGI, or generate + * autoindex listings. + * - @ref handlePost: handle client uploads (raw body, + * URL-encoded forms, multipart forms). + * - @ref handleDelete: remove existing resources from the + * server filesystem. + * + * Additionally, it provides: + * - @ref generateAutoindex: build a directory listing page + * when autoindexing is enabled. + * - @ref handleMultipartForm: internal helper for parsing + * multipart form-data uploads. + * + * These functions are invoked by the router after method + * validation (see @ref requestRouter.cpp). + * + * @ingroup request_handler + */ + #include "http/HttpRequest.hpp" // for HttpRequest #include "http/HttpResponse.hpp" // for HttpResponse #include "http/responseBuilder.hpp" // for generateError, generateSuccess @@ -28,21 +53,54 @@ class Server; namespace { +/** + * @brief Attempts to delete a file safely, producing an error response on failure. + * + * @details This helper encapsulates the low-level checks and filesystem + * operations for handling HTTP `DELETE` requests. It verifies: + * - Existence of the target (returns 404 if not found). + * - That the target is **not** a directory or special file (returns 403). + * - That the target is a regular file (otherwise rejected with 403). + * - Filesystem deletion via `std::filesystem::remove`, with + * detailed error mapping: + * - EACCES / EPERM → 403 Forbidden + * - ENOENT (file vanished) → 404 Not Found + * - Other errors → 500 Internal Server Error + * + * On success, the file is removed and the function returns `true`. + * On failure, `outError` is populated with an appropriate + * @ref HttpResponse. + * + * @param filepath Full path to the file to delete. + * @param req Original HTTP request (used in error responses). + * @param server Active server context (used in error responses). + * @param outError Populated with a generated error response if deletion fails. + * + * @return `true` if the file was successfully unlinked, `false` otherwise. + * + * @ingroup request_handler + */ bool unlinkFile(const std::string& filepath, const HttpRequest& req, const Server& server, HttpResponse& outError) { struct stat st; + + // 1) Stat the file: reject if not found if (stat(filepath.c_str(), &st) != 0) { Logger::logFrom(LogLevel::WARN, "Delete Handler", "File not found → rejecting DELETE: " + filepath); outError = ResponseBuilder::generateError(404, server, req); return false; } + + // 2) Reject directories outright if (S_ISDIR(st.st_mode)) { Logger::logFrom(LogLevel::WARN, "Delete Handler", "Target is a directory → rejecting DELETE: " + filepath); outError = ResponseBuilder::generateError(403, server, req); return false; } + + // 3) Reject non-regular files (devices, sockets, etc.) if (!S_ISREG(st.st_mode)) { Logger::logFrom(LogLevel::WARN, "Delete Handler", "Target is not a regular file → rejecting DELETE: " + filepath); @@ -50,6 +108,7 @@ bool unlinkFile(const std::string& filepath, const HttpRequest& req, const Serve return false; } + // 4) Attempt deletion with detailed error handling std::error_code ec; if (!std::filesystem::remove(filepath, ec)) { if (ec.value() == EACCES || ec.value() == EPERM) { @@ -70,9 +129,24 @@ bool unlinkFile(const std::string& filepath, const HttpRequest& req, const Serve return false; } + // Success: file removed return true; } +/** + * @brief Generates a simple HTML confirmation page for a deleted file. + * + * @details Builds a minimal HTML5 document containing a title and message + * confirming that the requested file has been successfully deleted. + * The filename is HTML-escaped before insertion to prevent XSS. + * + * @param filename Name of the deleted file (not a path). Will be sanitized + * via @ref htmlEscape before being injected into the HTML. + * + * @return A fully-formed HTML string suitable as the body of a 200 OK response. + * + * @ingroup request_handler + */ std::string generateDeleteHtml(const std::string& filename) { std::stringstream html; html << R"( @@ -95,7 +169,32 @@ std::string generateDeleteHtml(const std::string& filename) { } // namespace +/** + * @brief Handles an HTTP DELETE request for a given resource. + * + * @details This function enforces server security rules and attempts + * to remove the requested file if permitted: + * 1. Resolve the request path to a physical filesystem path. + * 2. Reject empty resolutions (403). + * 3. Reject symlinks (403) to prevent symlink attacks. + * 4. Reject directory-like URIs (403) to avoid recursive deletion. + * 5. Attempt file deletion with @ref unlinkFile, which validates + * type, permissions, and maps errors to proper responses. + * 6. On success, build and return a 200 OK response containing + * a confirmation HTML page (via @ref generateDeleteHtml). + * + * @param req The incoming HTTP request object. + * @param server Active server context. + * @param loc The matched location block from the configuration. + * + * @return A fully constructed @ref HttpResponse: + * - 200 OK with HTML confirmation if deletion succeeds. + * - Error response (403, 404, or 500) on failure. + * + * @ingroup request_handler + */ HttpResponse handleDelete(const HttpRequest& req, const Server& server, const Location& loc) { + // 1) Resolve the requested URI to a filesystem path std::string path = resolvePhysicalPath(req, loc); if (path.empty()) { Logger::logFrom(LogLevel::WARN, "Delete Handler", @@ -103,12 +202,14 @@ HttpResponse handleDelete(const HttpRequest& req, const Server& server, const Lo return ResponseBuilder::generateError(403, server, req); } + // 2) Reject symbolic links to avoid symlink traversal exploits if (isSymlink(path)) { Logger::logFrom(LogLevel::WARN, "Delete Handler", "Target is a symlink → rejecting DELETE for URI: " + req.getPath()); return ResponseBuilder::generateError(403, server, req); } + // 3) Reject URIs ending with '/' (treated as directories) if (req.getPath().back() == '/') { Logger::logFrom(LogLevel::WARN, "Delete Handler", "Request URI ends with '/' → rejecting DELETE for directory-like path: " + @@ -116,15 +217,19 @@ HttpResponse handleDelete(const HttpRequest& req, const Server& server, const Lo return ResponseBuilder::generateError(403, server, req); } + // 4) Try to delete the file; errors are mapped to proper HTTP responses HttpResponse errResp; if (!unlinkFile(path, req, server, errResp)) { return errResp; } + // 5) On success: build a sanitized confirmation page std::string rawName = extractFilenameFromUri(req.getPath()); - std::string safeName = htmlEscape(rawName); - std::string body = generateDeleteHtml(safeName); + std::string safeName = htmlEscape(rawName); // escape to prevent XSS + std::string body = generateDeleteHtml(safeName); // build confirmation HTML Logger::logFrom(LogLevel::INFO, "Delete Handler", "Successfully deleted “" + safeName + "” → sending HTML confirmation"); + + // 6) Return a 200 OK response with confirmation body return ResponseBuilder::generateSuccess(200, body, "text/html", req); } diff --git a/src/http/methodsHandler/handleGet/generateAutoindex.cpp b/src/http/methodsHandler/handleGet/generateAutoindex.cpp index 83474c6c..75e4219c 100644 --- a/src/http/methodsHandler/handleGet/generateAutoindex.cpp +++ b/src/http/methodsHandler/handleGet/generateAutoindex.cpp @@ -3,13 +3,44 @@ /* ::: :::::::: */ /* generateAutoindex.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov +#+ +:+ +#+ */ +/* By: nlouis +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/06 09:08:33 by nlouis #+# #+# */ -/* Updated: 2025/08/18 11:06:54 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 10:53:24 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file generateAutoindex.cpp + * @brief Implements directory autoindex (HTML listing) generation. + * + * @details This file provides nginx-like autoindex functionality for HTTP GET + * requests on directories when no index file is found and autoindexing + * is enabled in the location block. + * + * Main responsibilities: + * - **Directory scanning** (@ref listDirectoryEntries): + * Collects entries, separates directories from files, preserves `..`. + * - **Escaping utilities**: + * - @ref escapeUriComponent for safe URIs. + * - @ref htmlEscape (from utils/htmlUtils) for HTML-safe output. + * - **Formatting helpers**: + * - @ref formatMTimeUTC for human-readable UTC timestamps. + * - @ref renderRow for HTML table rows with icons and metadata. + * - **HTML layout**: + * - @ref htmlHeader and @ref htmlFooter wrap the index table. + * - **Entrypoint**: @ref generateAutoindex assembles the full HTML + * directory listing and returns a @ref HttpResponse. + * + * Errors (nonexistent or inaccessible directories) result in 403 + * responses. Valid directories produce a simple HTML table with: + * - File/directory name (with 📄/📁 icons). + * - Last modified timestamp (UTC). + * - File size (bytes). + * + * @ingroup request_handler + */ + #include "http/HttpRequest.hpp" // for HttpRequest #include "http/HttpResponse.hpp" // for HttpResponse #include "http/responseBuilder.hpp" // for generateError, generateSuccess @@ -34,7 +65,28 @@ namespace fs = std::filesystem; namespace { -// 1) Scan directory entries: separate names into dirs/files, keep “..” first if present +/** + * @struct DirEntry + * @brief Represents a single entry in a directory for autoindexing. + * + * @details Used by @ref listDirectoryEntries to store metadata for each + * file or subdirectory. Provides the minimal information needed + * to render an autoindex table row in HTML. + * + * @var DirEntry::name + * The entry’s filename (e.g., `"file.txt"` or `"subdir"`). + * + * @var DirEntry::size + * File size in bytes. For directories and `".."`, this is set to `0`. + * + * @var DirEntry::mtime + * Last modification time (UTC, as `time_t`). Used for “Last Modified” column. + * + * @var DirEntry::isDir + * Whether the entry is a directory (`true`) or a regular file (`false`). + * + * @ingroup request_handler + */ struct DirEntry { std::string name; std::uintmax_t size; @@ -44,6 +96,30 @@ struct DirEntry { namespace fs = std::filesystem; +/** + * @brief Lists and sorts directory entries for autoindex generation. + * + * @details Scans the given filesystem path and collects metadata into + * @ref DirEntry objects. Handles errors in a non-throwing way + * using `std::error_code`. Results are separated into: + * - `".."` (if parent exists, inserted at the front). + * - Subdirectories (sorted alphabetically). + * - Files (sorted alphabetically). + * + * Each entry records: + * - `name`: filename or `".."`. + * - `isDir`: directory vs file. + * - `size`: file size in bytes (0 for directories). + * - `mtime`: last modification time (UTC). + * + * @param path Filesystem path to the directory. + * @param ok Reference flag set to `false` if the directory + * cannot be opened or scanned safely. + * + * @return A vector of @ref DirEntry objects sorted for autoindex rendering. + * + * @ingroup request_handler + */ std::vector listDirectoryEntries(const std::string& path, bool& ok) { std::vector entries; ok = true; @@ -54,7 +130,7 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { return {}; } - // Try to construct a directory_iterator without throwing: + // Try non-throwing directory iterator std::error_code dirEc; fs::directory_iterator it(dirPath, dirEc); if (dirEc) { @@ -62,10 +138,10 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { return {}; } - // Now loop in a non‐throwing way: + // Walk entries safely (clear errors on failure) std::error_code entryEc; for (; it != fs::directory_iterator{}; it.increment(entryEc)) { - if (entryEc) { + if (entryEc) { // skip unreadable entries entryEc.clear(); continue; } @@ -73,6 +149,7 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { fs::path p = it->path(); std::error_code statEc; + // Get last modification time fs::file_time_type ftime = fs::last_write_time(p, statEc); if (statEc) continue; @@ -80,9 +157,11 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { DirEntry entry; entry.name = p.filename().string(); + // Detect directory std::error_code dirCheckEc; entry.isDir = fs::is_directory(p, dirCheckEc) && !dirCheckEc; + // Get size (only for files) if (entry.isDir) { entry.size = 0ULL; } else { @@ -91,7 +170,7 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { entry.size = 0ULL; } - // Convert ftime → time_t + // Convert ftime → time_t for portable rendering auto sctp = std::chrono::time_point_cast( ftime - fs::file_time_type::clock::now() + std::chrono::system_clock::now()); entry.mtime = std::chrono::system_clock::to_time_t(sctp); @@ -99,7 +178,7 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { entries.push_back(entry); } - // Insert “..” if parent exists: + // Insert “..” if parent directory exists fs::path parent = dirPath.parent_path(); if (!parent.empty() && fs::exists(parent)) { DirEntry up; @@ -110,7 +189,7 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { entries.insert(entries.begin(), up); } - // Sort “..” at front, then other dirs, then files: + // Sort: “..” first, then directories, then files (alphabetically) std::vector dirs, files; for (auto const& e : entries) { if (e.name == "..") @@ -124,63 +203,134 @@ std::vector listDirectoryEntries(const std::string& path, bool& ok) { std::sort(dirs.begin(), dirs.end(), cmp); std::sort(files.begin(), files.end(), cmp); + // Build sorted result std::vector sorted; if (!entries.empty() && entries[0].name == "..") - sorted.push_back(entries[0]); + sorted.push_back(entries[0]); // keep parent first sorted.insert(sorted.end(), dirs.begin(), dirs.end()); sorted.insert(sorted.end(), files.begin(), files.end()); return sorted; } +/** + * @brief Percent-encodes a string for safe inclusion in a URI component. + * + * @details Encodes all characters except the unreserved set defined by RFC 3986: + * `ALPHA / DIGIT / "-" / "_" / "." / "~"`. + * Other bytes are converted into `%HH` format using uppercase hex digits. + * + * @param s Input string (raw filename or path segment). + * + * @return A URI-safe string with reserved characters percent-encoded. + * + * @note This is used when generating autoindex links to ensure that spaces, + * control characters, and unsafe symbols don’t break the resulting URL. + * + * @ingroup request_handler + */ std::string escapeUriComponent(const std::string& s) { std::ostringstream oss; oss << std::uppercase << std::hex; + for (unsigned char c : s) { + // Allowed characters remain unchanged if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') { oss << c; } else { + // Encode everything else as %HH oss << '%' << std::setw(2) << std::setfill('0') << static_cast(c); } } return oss.str(); } +/** + * @brief Formats a file modification time into a human-readable UTC string. + * + * @details Converts a `time_t` (last modification time) into a textual + * representation using `gmtime()` and `std::put_time`. + * The format is: `DD-Mon-YYYY HH:MM` (UTC). + * + * @param t Last modification time as `std::time_t`. + * + * @return A formatted string (e.g., `"19-Aug-2025 09:42"`) or an empty + * string if conversion fails. + * + * @note This is used in autoindex HTML tables for the **Last Modified** column. + * + * @ingroup request_handler + */ std::string formatMTimeUTC(std::time_t t) { - std::tm* gmPtr = std::gmtime(&t); + std::tm* gmPtr = std::gmtime(&t); // Convert to UTC (thread-unsafe) if (!gmPtr) { return {}; } std::ostringstream ss; - ss << std::put_time(gmPtr, "%d-%b-%Y %H:%M"); + ss << std::put_time(gmPtr, "%d-%b-%Y %H:%M"); // Format: "19-Aug-2025 09:42" return ss.str(); } +/** + * @brief Renders a single `` row in the autoindex HTML table. + * + * @details Converts a directory entry into a clickable HTML link with + * metadata (last modified, size). + * - The `".."` entry is treated as a parent directory link. + * - Directories get a trailing slash (`/`) in both display and href. + * - Files show their size in bytes, while directories/`".."` show `"-"`. + * - Icons: 📁 for directories, 📄 for files. + * + * @param body Output stringstream accumulating the HTML response. + * @param entry Directory entry metadata (name, size, mtime, type). + * @param baseUri Base URI (must end with `/`) to prepend to href links. + * + * @note This function applies both @ref escapeUriComponent (for href safety) + * and @ref htmlEscape (for display safety), preventing XSS or broken links. + * + * @ingroup request_handler + */ void renderRow(std::ostringstream& body, const DirEntry& entry, const std::string& baseUri) { bool isParent = (entry.name == ".."); bool isDir = entry.isDir; + // Construct href and display name std::string href; std::string disp = entry.name; if (isParent) { - href = baseUri + "../"; + href = baseUri + "../"; // parent directory link disp = "../"; } else { - href = baseUri + escapeUriComponent(entry.name); + href = baseUri + escapeUriComponent(entry.name); // percent-encode special chars if (isDir) { disp += "/"; href += "/"; } } + // Metadata columns std::string icon = isDir ? "📁" : "📄"; std::string mtime = isParent ? "-" : formatMTimeUTC(entry.mtime); std::string size = (isDir || isParent) ? "-" : std::to_string(entry.size); + // Append HTML table row body << " " << "" << icon << " " << htmlEscape(disp) << "" << "" << mtime << "" << "" << size << "" << "\n"; } +/** + * @brief Generates the opening HTML markup for an autoindex page. + * + * @details Builds the document ``, page title, and initial table + * structure for directory listing. + * The provided @p uri is escaped with @ref htmlEscape to prevent + * XSS and is displayed in both the `` and `<h1>` headers. + * + * @param uri The request URI being indexed (e.g. "/images/"). + * @return HTML string containing the document header and table headers. + * + * @ingroup request_handler + */ std::string htmlHeader(const std::string& uri) { std::ostringstream ss; ss << R"(<!DOCTYPE html> @@ -199,36 +349,77 @@ std::string htmlHeader(const std::string& uri) { return ss.str(); } +/** + * @brief Generates the closing HTML markup for an autoindex page. + * + * @details Completes the open `<table>`, closes `<body>`, and terminates + * the HTML document. This function should always be called after + * @ref htmlHeader and rows rendered with @ref renderRow. + * + * @return HTML string containing the footer markup. + * + * @ingroup request_handler + */ std::string htmlFooter() { return std::string(" </table>\n</body>\n</html>"); } - } // anonymous namespace +/** + * @brief Generates an HTML autoindex page for a directory listing. + * + * @details This function inspects the given filesystem path, enumerates + * its directory entries via @ref listDirectoryEntries, and + * produces an HTML table showing subdirectories and files with + * name, last modification time, and size. + * + * Workflow: + * 1. **Directory validation**: Ensure the target path exists and is + * accessible. If not, return a `403 Forbidden` error. + * 2. **Base URI normalization**: Guarantee that the URI ends with + * a trailing slash (`/`) for proper link resolution. + * 3. **HTML generation**: Build the autoindex page by composing + * @ref htmlHeader, calling @ref renderRow for each entry, and + * closing with @ref htmlFooter. + * 4. **Response assembly**: Return a `200 OK` response with the + * generated HTML body and `text/html` MIME type. + * + * @param filepath Filesystem path to the directory being indexed. + * @param uri Request URI corresponding to the directory. + * @param request Incoming HTTP request context. + * @param server Server configuration (used for error responses). + * + * @return HttpResponse with a full HTML autoindex page on success, + * or an error response (403) if the directory cannot be accessed. + * + * @ingroup request_handler + */ HttpResponse generateAutoindex(const std::string& filepath, const std::string& uri, const HttpRequest& request, const Server& server) { bool dirOk; auto entries = listDirectoryEntries(filepath, dirOk); if (!dirOk) { + // Directory not accessible → return 403 Forbidden Logger::logFrom(LogLevel::WARN, "Autoindex", "Cannot open directory: " + filepath + " → rejecting autoindex for URI: " + request.getPath()); return ResponseBuilder::generateError(403, server, request); } - // Ensure baseUri ends with “/” + // Ensure base URI ends with “/” for proper relative link building std::string baseUri = uri; if (baseUri.empty() || baseUri.back() != '/') baseUri += '/'; - // Build the HTML body + // Build the HTML body: header + rows + footer std::ostringstream body; body << htmlHeader(uri); for (auto const& e : entries) { - renderRow(body, e, baseUri); + renderRow(body, e, baseUri); // Add a row for each directory/file entry } body << htmlFooter(); + // Log and return success response with generated HTML Logger::logFrom(LogLevel::INFO, "Autoindex", "Generated autoindex for URI: " + request.getPath() + " (directory: " + filepath + ")"); diff --git a/src/http/methodsHandler/handleGet/handleGet.cpp b/src/http/methodsHandler/handleGet/handleGet.cpp index 6a5ba158..c848e91e 100644 --- a/src/http/methodsHandler/handleGet/handleGet.cpp +++ b/src/http/methodsHandler/handleGet/handleGet.cpp @@ -3,13 +3,38 @@ /* ::: :::::::: */ /* handleGet.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/15 12:39:41 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:24:33 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 10:46:55 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file handleGet.cpp + * @brief Implements the HTTP GET request handler. + * + * @details This file provides the logic for serving resources in response + * to HTTP GET requests. It includes: + * - **MIME type detection** (@ref detectMimeType) for correct + * `Content-Type` headers. + * - **File serving** (@ref serveFile) with small-file in-memory + * responses and large-file streaming responses. + * - **Directory handling** (@ref processDirectory): + * - Redirects URIs missing a trailing slash. + * - Serves configured `index` files if present. + * - Generates autoindex listings if enabled. + * - Returns 403 when directory listing is disabled. + * - **Main dispatcher** (@ref handleGet) which ties everything + * together, enforcing nginx-like behavior: + * - Rejects symlinks for security. + * - Differentiates between directories and regular files. + * - Returns appropriate errors (403/404) when access is denied + * or resources are missing. + * + * @ingroup request_handler + */ + #include "core/Location.hpp" // for Location #include "http/HttpRequest.hpp" // for HttpRequest #include "http/HttpResponse.hpp" // for HttpResponse @@ -30,7 +55,23 @@ class Server; namespace { +/** + * @brief Detects the MIME type of a file based on its extension. + * + * @details Uses a static lookup table mapping common file extensions + * (e.g., `.html`, `.png`, `.json`) to their corresponding + * MIME types. The file extension is normalized to lowercase + * before lookup. If the extension is not recognized, the + * generic `application/octet-stream` type is returned. + * + * @param file_path Path to the file whose MIME type should be determined. + * + * @return A MIME type string suitable for the `Content-Type` header. + * + * @ingroup request_handler + */ static std::string detectMimeType(const std::string& file_path) { + // Static lookup table of common extensions → MIME types static const std::map<std::string, std::string> mime_types = { {".html", "text/html"}, {".htm", "text/html"}, {".css", "text/css"}, {".js", "application/javascript"}, @@ -43,12 +84,15 @@ static std::string detectMimeType(const std::string& file_path) { {".mp3", "audio/mpeg"}, {".mp4", "video/mp4"}, {".wasm", "application/wasm"}}; + // Extract file extension from path std::filesystem::path fsPath(file_path); std::string ext = fsPath.extension().string(); + // Normalize extension to lowercase for lookup std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) { return static_cast<char>(std::tolower(c)); }); + // Return known type if found, else default to binary auto it = mime_types.find(ext); if (it != mime_types.end()) return it->second; @@ -56,25 +100,59 @@ static std::string detectMimeType(const std::string& file_path) { return "application/octet-stream"; } +/** + * @brief Serves a static file in response to an HTTP GET request. + * + * @details This helper reads a file from disk and builds the + * appropriate @ref HttpResponse: + * - Verifies that the target is a valid file. + * - Opens the file in binary mode, rejects if inaccessible. + * - Determines the `Content-Type` header (uses + * @ref detectMimeType if none is provided). + * - If the file size is small (≤ 100 KiB), reads the + * entire file into memory and returns it inline. + * - If the file size is larger, streams it back using + * @ref ResponseBuilder::generateSuccessFile. + * + * Logs each decision (missing file, forbidden, in-memory, + * or streaming). + * + * @param file_path Filesystem path to the target file. + * @param request Incoming HTTP request. + * @param server Active server context (for error generation). + * @param content_type Optional MIME type override; if empty, auto-detected. + * + * @return A valid @ref HttpResponse: + * - 200 with body or stream on success. + * - 404 if file not found. + * - 403 if access is denied. + * + * @ingroup request_handler + */ static HttpResponse serveFile(const std::string& file_path, const HttpRequest& request, const Server& server, std::string content_type) { + // 1) Check if the path is a file if (!isFile(file_path)) { Logger::logFrom(LogLevel::WARN, "Get Handler", "File not found: " + file_path); return ResponseBuilder::generateError(404, server, request); } + // 2) Attempt to open the file std::ifstream file(file_path, std::ios::binary | std::ios::ate); if (!file.is_open()) { Logger::logFrom(LogLevel::WARN, "Get Handler", "Cannot open file: " + file_path); return ResponseBuilder::generateError(403, server, request); } + // 3) Determine file size std::streamsize size = file.tellg(); file.seekg(0, std::ios::beg); + // 4) Detect MIME type if not provided if (content_type.empty()) content_type = detectMimeType(file_path); + // 5) Small files (≤100 KiB) → load fully into memory constexpr std::streamsize MEMORY_LIMIT = 100 * 1024; if (size <= MEMORY_LIMIT) { std::ostringstream buffer; @@ -86,6 +164,7 @@ static HttpResponse serveFile(const std::string& file_path, const HttpRequest& r " bytes): " + file_path); return ResponseBuilder::generateSuccess(200, body, content_type, request); } else { + // 6) Large files → return as streaming response Logger::logFrom(LogLevel::INFO, "Get Handler", "Serving large file via streaming (size: " + std::to_string(size) + " bytes): " + file_path); @@ -93,9 +172,38 @@ static HttpResponse serveFile(const std::string& file_path, const HttpRequest& r } } +/** + * @brief Handles HTTP GET requests targeting a directory. + * + * @details This function enforces nginx-like directory handling rules: + * 1. Normalize the URI and reject empty results (403). + * 2. If the URI does not end with a slash, issue a `301 Moved Permanently` + * redirect to the slash-terminated URI. + * 3. If an `index` file is configured: + * - Serve it if it exists. + * - Otherwise, if autoindex is enabled, generate a directory listing. + * - Otherwise, return 403 (listing disabled). + * 4. If no index is configured: + * - Generate autoindex if enabled. + * - Otherwise, return 403 (directory listing forbidden). + * + * @param dirPath Physical filesystem path to the requested directory. + * @param uri Original request URI. + * @param request Incoming HTTP request. + * @param server Active server context. + * @param loc The matched location block for this URI. + * + * @return A fully built @ref HttpResponse: + * - 301 Redirect if missing slash. + * - 200 OK with index file or autoindex if allowed. + * - 403 Forbidden if neither index nor autoindex is permitted. + * + * @ingroup request_handler + */ static HttpResponse processDirectory(const std::string& dirPath, const std::string& uri, const HttpRequest& request, const Server& server, const Location& loc) { + // 1) Normalize URI and reject empty (security check) std::string normalized = normalizePath(uri); if (normalized.empty()) { Logger::logFrom(LogLevel::WARN, "Get Handler", @@ -103,21 +211,24 @@ static HttpResponse processDirectory(const std::string& dirPath, const std::stri return ResponseBuilder::generateError(403, server, request); } + // 2) Redirect if URI does not end with '/' if (normalized.back() != '/') { std::string target = normalized + "/"; Logger::logFrom(LogLevel::INFO, "Get Handler", "Redirecting to: " + target); return ResponseBuilder::generateRedirect(301, target, request); } + // 3) If an index file is configured in the location const std::string& indexName = loc.getIndex(); if (!indexName.empty()) { std::string indexFullPath = joinPath(dirPath, indexName); if (isFile(indexFullPath)) { - return serveFile(indexFullPath, request, server, ""); + return serveFile(indexFullPath, request, server, + ""); // serve index.html (or configured name) } if (loc.isAutoindexEnabled()) { - return generateAutoindex(dirPath, uri, request, server); + return generateAutoindex(dirPath, uri, request, server); // fallback to autoindex } Logger::logFrom(LogLevel::WARN, "Get Handler", @@ -125,6 +236,7 @@ static HttpResponse processDirectory(const std::string& dirPath, const std::stri return ResponseBuilder::generateError(403, server, request); } + // 4) If no index: allow autoindex or forbid if (loc.isAutoindexEnabled()) { return generateAutoindex(dirPath, uri, request, server); } @@ -136,11 +248,38 @@ static HttpResponse processDirectory(const std::string& dirPath, const std::stri } // namespace +/** + * @brief Handles an HTTP GET request for a resource. + * + * @details This function implements nginx-like GET semantics: + * 1. Resolve the request URI to a physical filesystem path. + * 2. If the URI ends with a `/` but points to a file (e.g., `/file/`): + * - Reject with 404 (trailing slash on file). + * 3. Reject symlinks for security (403). + * 4. If the path exists: + * - If it’s a directory, delegate to @ref processDirectory + * (redirect, index, or autoindex handling). + * - If it’s a regular file, delegate to @ref serveFile + * (in-memory or streaming response). + * 5. If nothing matches, return 404 (not found). + * + * @param request Incoming HTTP request object. + * @param server Active server context. + * @param loc The matched location block for this URI. + * + * @return A fully constructed @ref HttpResponse: + * - 200 OK with file contents or directory listing. + * - 301 Redirect for missing slash. + * - 403 Forbidden for symlinks or disallowed directories. + * - 404 Not Found when resource doesn’t exist. + * + * @ingroup request_handler + */ HttpResponse handleGet(const HttpRequest& request, const Server& server, const Location& loc) { + // 1) Resolve request path to a filesystem path std::string filepath = resolvePhysicalPath(request, loc); - // NGINX: any GET ending in '/' is a directory lookup. - // If stripping the slash yields an existing file, return 404. + // 2) Special nginx rule: if URI ends with '/' but points to a file → 404 const std::string& uri = request.getPath(); if (!uri.empty() && uri.back() == '/') { std::string fileNoSlash = filepath; @@ -153,25 +292,30 @@ HttpResponse handleGet(const HttpRequest& request, const Server& server, const L } } + // 3) Reject symlinks for security if (isSymlink(filepath)) { Logger::logFrom(LogLevel::WARN, "Get Handler", "Symlink detected, rejecting request for: " + filepath); return ResponseBuilder::generateError(403, server, request); } + // 4) Check file type struct stat fileStat; if (stat(filepath.c_str(), &fileStat) == 0) { const std::string& uri = request.getPath(); if (S_ISDIR(fileStat.st_mode)) { + // Directory → delegate to processDirectory return processDirectory(filepath, uri, request, server, loc); } if (S_ISREG(fileStat.st_mode)) { + // Regular file → delegate to serveFile return serveFile(filepath, request, server, ""); } } + // 5) Not found or inaccessible Logger::logFrom(LogLevel::WARN, "Get Handler", "File not found or inaccessible: " + filepath); return ResponseBuilder::generateError(404, server, request); } diff --git a/src/http/methodsHandler/handlePost/handleMultipartForm.cpp b/src/http/methodsHandler/handlePost/handleMultipartForm.cpp index a25c8d13..8784100a 100644 --- a/src/http/methodsHandler/handlePost/handleMultipartForm.cpp +++ b/src/http/methodsHandler/handlePost/handleMultipartForm.cpp @@ -3,13 +3,47 @@ /* ::: :::::::: */ /* handleMultipartForm.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/06 21:44:51 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:25:35 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 11:24:48 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file handleMultipartForm.cpp + * @brief Handles `multipart/form-data` POST uploads and persists the first file part to disk. + * + * @details + * Parses the request body using the `boundary` from the `Content-Type` header, + * scans parts until it finds a file part (`filename="..."`), trims the trailing + * CRLF from the part payload, enforces `client_max_body_size`, and writes the + * bytes to `<upload_dir>/<sanitized-filename>` (or a time‑based fallback). + * + * ### Flow + * 1. Extract `boundary` from `Content-Type`; `400` if missing. + * 2. `parseMultipartForm`: iterate parts, split headers/data, detect `filename=`, + * sanitize file name, trim payload CRLF, return name + bytes. + * 3. Enforce per‑part size ≤ `Server::getClientMaxBodySize()`; `413` otherwise. + * 4. Open destination file (binary) and write bytes atomically; `500` on I/O error. + * 5. Log outcome and return `201 Created` with a minimal HTML confirmation. + * + * ### Security & Robustness + * - **Filename sanitization**: strips unsafe characters to prevent path traversal. + * - **Size limits**: rejects oversized payloads to mitigate DoS. + * - **Strict delimiter matching**: avoids bleed between parts. + * + * ### Limitations + * - Parses the **first** file part only (ignores subsequent files/fields). + * - Operates in‑memory (no streaming/chunking); large files may increase RAM use. + * + * @note Consider extending to stream to a temp file, support multiple files/fields, + * and return JSON for API clients. + * + * @see HttpRequest, HttpResponse, ResponseBuilder, Logger, filesystemUtils + * @ingroup request_handler + */ + #include "core/Server.hpp" // for Server #include "http/HttpRequest.hpp" // for HttpRequest #include "http/HttpResponse.hpp" // for HttpResponse @@ -25,17 +59,34 @@ namespace { +/** + * @brief Construct the opening delimiter for a multipart part. + * + * @param boundary Boundary token from the Content-Type header. + * @return A delimiter string in the form `"--<boundary>\r\n"`. + */ inline std::string makePartDelimiter(const std::string& boundary) { return "--" + boundary + "\r\n"; } +/** + * @brief Construct the closing delimiter for a multipart body. + * + * @param boundary Boundary token from the Content-Type header. + * @return A delimiter string in the form `"--<boundary>--"`. + */ inline std::string makeCloseDelimiter(const std::string& boundary) { return "--" + boundary + "--"; } /** - * Find the next occurrence of `delimiter` in `body` at or after `startPos`. - * Returns true if found (and sets outPos), false otherwise. + * @brief Find the next occurrence of a delimiter in a multipart body. + * + * @param body Full request body. + * @param delimiter Delimiter string to search for. + * @param startPos Starting offset in the body. + * @param outPos Output parameter; set to the position of the delimiter if found. + * @return true if the delimiter was found, false otherwise. */ bool findNextDelimiter(const std::string& body, const std::string& delimiter, size_t startPos, size_t& outPos) { @@ -44,16 +95,24 @@ bool findNextDelimiter(const std::string& body, const std::string& delimiter, si } /** - * Check if at position `pos` in `body` there is a closing boundary. + * @brief Check if the body contains the closing delimiter at the given position. + * + * @param body Full request body. + * @param pos Position to check. + * @param closeDelimiter Expected closing delimiter string. + * @return true if the body matches the closing delimiter at `pos`. */ bool isCloseDelimiter(const std::string& body, size_t pos, const std::string& closeDelimiter) { return body.compare(pos, closeDelimiter.size(), closeDelimiter) == 0; } /** - * Given a single “part” (everything between delimiters), split it into - * headersBlock (before the first "\r\n\r\n") and dataBlock (after it). - * Returns false if no double‐CRLF is found. + * @brief Split a raw part into headers and data sections. + * + * @param part Raw part (between delimiters). + * @param headersBlock Output; substring up to the first double CRLF. + * @param dataBlock Output; substring after the first double CRLF. + * @return true if headers and data were successfully split, false otherwise. */ bool parseHeadersAndData(const std::string& part, std::string& headersBlock, std::string& dataBlock) { @@ -67,8 +126,11 @@ bool parseHeadersAndData(const std::string& part, std::string& headersBlock, } /** - * Look for filename="..." inside headersBlock. If found, extract the filename - * (without the quotes) into `outFilename` and return true. Otherwise return false. + * @brief Extract and sanitize the filename from a part's headers. + * + * @param headersBlock Headers of the multipart part. + * @param outFilename Output; sanitized filename if found. + * @return true if a `filename="..."` token was found, false otherwise. */ bool extractFilename(const std::string& headersBlock, std::string& outFilename) { const std::string token = "filename=\""; @@ -90,7 +152,10 @@ bool extractFilename(const std::string& headersBlock, std::string& outFilename) } /** - * If `data` ends with "\r\n", strip those two characters off. Otherwise return as-is. + * @brief Strip a trailing CRLF sequence from data if present. + * + * @param data Raw part data block. + * @return A copy of `data` without a trailing `"\r\n"`, if it existed. */ std::string trimEndingCRLF(const std::string& data) { if (data.size() >= 2 && data[data.size() - 2] == '\r' && data[data.size() - 1] == '\n') { @@ -100,16 +165,27 @@ std::string trimEndingCRLF(const std::string& data) { } /** - * Parse a multipart/form-data body and extract the first file‐part’s - * filename and its raw content. Returns true on success, false otherwise. + * @brief Parse a multipart/form-data body and extract the first file part. + * + * @details + * Iterates over all parts in the request body, delimited by the given boundary. + * For each part: + * - Splits headers and data at the first double CRLF. + * - Checks for a `filename="..."` header. + * - If found, sanitizes the filename, trims trailing CRLF from the data block, + * and outputs both. + * + * Stops at the first file part encountered; ignores non-file parts. + * + * @param body Full HTTP request body (including all parts). + * @param boundary Boundary token from the Content-Type header. + * @param filename Output; sanitized filename from the file part. + * @param fileContent Output; raw file content (as bytes in a std::string). * - * - body: full HTTP request body (including all parts) - * - boundary: the “boundary” token from Content-Type, e.g. "----WebKitFormBoundary…" - * - filename: output parameter; set to the file’s original name - * - fileContent: output parameter; set to the file’s bytes (as a std::string) + * @return true if a file part was found and parsed successfully, false otherwise. * - * This will scan part by part until it finds a “filename=” header. If none - * is found, it ultimately returns false. + * @note Only the first file part is extracted; additional files are ignored. + * @warning Operates in memory; large uploads may impact RAM usage. */ bool parseMultipartForm(const std::string& body, const std::string& boundary, std::string& filename, std::string& fileContent) { @@ -181,8 +257,32 @@ bool parseMultipartForm(const std::string& body, const std::string& boundary, st } // anonymous namespace +/** + * @brief Handle a multipart/form-data POST request and persist the uploaded file. + * + * @details + * - Extracts the boundary from the `Content-Type` header. + * - Parses the multipart body to find the first file part. + * - Enforces `client_max_body_size` to prevent DoS. + * - Generates a fallback filename if none was provided. + * - Writes the file content to disk under the given upload directory. + * - Returns an appropriate `HttpResponse`: + * - 400 if malformed, + * - 413 if too large, + * - 500 if I/O fails, + * - 201 with a confirmation HTML page on success. + * + * @param request Incoming HTTP request containing the multipart body. + * @param server Server configuration (used for limits and error pages). + * @param fullDirPath Absolute path of the directory where uploads are stored. + * + * @return HttpResponse representing the outcome (success or error). + * + * @ingroup request_handler + */ HttpResponse handleMultipartForm(const HttpRequest& request, const Server& server, const std::string& fullDirPath) { + // 1) Extract boundary from Content-Type header std::string contentType = request.getHeader("Content-Type"); size_t bpos = contentType.find("boundary="); if (bpos == std::string::npos) { @@ -191,6 +291,7 @@ HttpResponse handleMultipartForm(const HttpRequest& request, const Server& serve } std::string boundary = contentType.substr(bpos + 9); + // 2) Parse body to extract filename + file content std::string extractedFilename; std::string fileContent; if (!parseMultipartForm(request.getBody(), boundary, extractedFilename, fileContent)) { @@ -198,7 +299,7 @@ HttpResponse handleMultipartForm(const HttpRequest& request, const Server& serve return ResponseBuilder::generateError(400, server, request); } - // Enforce per-part size limit to avoid DoS + // 3) Enforce per-part size limit size_t maxBody = server.getClientMaxBodySize(); if (fileContent.size() > maxBody) { Logger::logFrom(LogLevel::WARN, "handleMultipartForm", @@ -207,12 +308,13 @@ HttpResponse handleMultipartForm(const HttpRequest& request, const Server& serve return ResponseBuilder::generateError(413, server, request); } + // 4) Fallback filename if client didn’t provide one if (extractedFilename.empty()) { extractedFilename = "upload_" + std::to_string(getCurrentTime()); } - std::string fullpath = joinPath(fullDirPath, extractedFilename); - + // 5) Compute destination path and open file + std::string fullpath = joinPath(fullDirPath, extractedFilename); std::ofstream out(fullpath, std::ios::binary); if (!out.is_open()) { Logger::logFrom(LogLevel::ERROR, "handleMultipartForm", @@ -220,6 +322,7 @@ HttpResponse handleMultipartForm(const HttpRequest& request, const Server& serve return ResponseBuilder::generateError(500, server, request); } + // 6) Write content and verify out << fileContent; out.close(); if (out.fail()) { @@ -228,6 +331,7 @@ HttpResponse handleMultipartForm(const HttpRequest& request, const Server& serve return ResponseBuilder::generateError(500, server, request); } + // 7) Success → return confirmation page Logger::logFrom(LogLevel::INFO, "handleMultipartForm", "Successfully uploaded: " + fullpath); return ResponseBuilder::generateSuccess( 201, "<html><body><h1>Uploaded: " + htmlEscape(extractedFilename) + "</h1></body></html>", diff --git a/src/http/methodsHandler/handlePost/handlePost.cpp b/src/http/methodsHandler/handlePost/handlePost.cpp index 8ff97031..fcbe2ac1 100644 --- a/src/http/methodsHandler/handlePost/handlePost.cpp +++ b/src/http/methodsHandler/handlePost/handlePost.cpp @@ -3,13 +3,35 @@ /* ::: :::::::: */ /* handlePost.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/19 10:19:13 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:25:19 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 11:17:37 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file handlePost.cpp + * @brief Implements HTTP POST request handling logic. + * + * @details This file provides the full workflow for processing POST requests: + * - Validating request body size and upload configuration. + * - Normalizing and securing the target upload path against symlinks, + * escapes, and unsafe locations. + * - Supporting multiple content types: + * - `multipart/form-data` → handled by @ref handleMultipartForm. + * - `application/x-www-form-urlencoded` → parsed into key/value + * pairs and saved as HTML. + * - Raw body data → stored directly as a file. + * - Generating safe, unique filenames for uploads when needed. + * - Returning appropriate `HttpResponse` objects: + * - `201 Created` with confirmation on success. + * - Error responses (`400`, `403`, `404`, `413`, `500`) on invalid + * or failed operations. + * + * @ingroup request_handler + */ + #include "core/Location.hpp" // for Location #include "core/Server.hpp" // for Server #include "http/HttpRequest.hpp" // for HttpRequest @@ -34,8 +56,36 @@ namespace { +/** + * @brief Handle a `application/x-www-form-urlencoded` POST request. + * + * @details + * Parses the URL-encoded body of the request into key/value pairs, + * generates a simple HTML confirmation page, and writes it to a file + * at the given target path. + * + * Workflow: + * - Parse form body using `parseFormUrlEncoded`. + * - Reject with **400 Bad Request** if empty or malformed. + * - Build an HTML document containing submitted form fields. + * - Attempt to write the HTML to the specified file path. + * - On failure, return **500 Internal Server Error**. + * - On success, return **201 Created** with confirmation HTML. + * + * @param request The incoming HTTP request (provides form body). + * @param server The server context (used for error generation). + * @param fullpath Filesystem path where the HTML output should be saved. + * @param filename Suggested filename for confirmation message. + * @return HttpResponse + * - 201 Created with confirmation HTML if saved successfully. + * - 400 Bad Request if form body is invalid. + * - 500 Internal Server Error if file operations fail. + * + * @ingroup request_handler + */ static HttpResponse handleUrlEncodedForm(const HttpRequest& request, const Server& server, const std::string& fullpath, const std::string& filename) { + // Parse form body into key/value pairs auto form = parseFormUrlEncoded(request.getBody()); if (form.empty()) { Logger::logFrom(LogLevel::WARN, "Post Handler", @@ -43,6 +93,7 @@ static HttpResponse handleUrlEncodedForm(const HttpRequest& request, const Serve return ResponseBuilder::generateError(400, server, request); } + // Build an HTML page displaying submitted form fields std::string html = "<html><body><h1>Form Received</h1>"; for (auto& formField : form) { html += "<p><b>" + htmlEscape(formField.first) + ":</b> " + htmlEscape(formField.second) + @@ -50,6 +101,7 @@ static HttpResponse handleUrlEncodedForm(const HttpRequest& request, const Serve } html += "</body></html>"; + // Try writing the HTML to the target file std::ofstream out(fullpath); if (!out.is_open()) { Logger::logFrom(LogLevel::ERROR, "Post Handler", @@ -65,14 +117,41 @@ static HttpResponse handleUrlEncodedForm(const HttpRequest& request, const Serve return ResponseBuilder::generateError(500, server, request); } + // Success: log and return confirmation response Logger::logFrom(LogLevel::INFO, "Post Handler", "Successfully wrote URL-encoded form to: " + fullpath); return ResponseBuilder::generateSuccess( 201, "<h1>Form Received. File " + filename + " created.</h1>", "text/html", request); } +/** + * @brief Handle a POST request with a raw body (no specific content type). + * + * @details + * Writes the raw request body directly to the given filesystem path. + * Primarily used for generic uploads when the content type is not + * `multipart/form-data` or `application/x-www-form-urlencoded`. + * + * Workflow: + * - Open target file for binary writing. + * - If opening fails, return **500 Internal Server Error**. + * - Write the raw body from the request into the file. + * - If writing or closing fails, return **500 Internal Server Error**. + * - On success, return **201 Created** with confirmation HTML. + * + * @param request The incoming HTTP request (provides body to save). + * @param server The server context (used for error generation). + * @param fullpath Filesystem path where the body should be saved. + * @param filename Name of the saved file (for confirmation message). + * @return HttpResponse + * - 201 Created with confirmation HTML if saved successfully. + * - 500 Internal Server Error if file I/O fails. + * + * @ingroup request_handler + */ static HttpResponse handleRawBody(const HttpRequest& request, const Server& server, const std::string& fullpath, const std::string& filename) { + // Try opening the target file in binary mode std::ofstream out(fullpath, std::ios::binary); if (!out.is_open()) { Logger::logFrom(LogLevel::ERROR, "Post Handler", @@ -80,14 +159,18 @@ static HttpResponse handleRawBody(const HttpRequest& request, const Server& serv return ResponseBuilder::generateError(500, server, request); } + // Write the raw request body into the file out << request.getBody(); out.close(); + + // Verify the write and close operations succeeded if (out.fail()) { Logger::logFrom(LogLevel::ERROR, "Post Handler", "Failed to write or close file: " + fullpath); return ResponseBuilder::generateError(500, server, request); } + // Success: log and return confirmation response Logger::logFrom(LogLevel::INFO, "Post Handler", "Successfully saved file to: " + fullpath); return ResponseBuilder::generateSuccess( 201, "<html><body><h1>File " + htmlEscape(filename) + " created.</h1></body></html>", @@ -96,12 +179,36 @@ static HttpResponse handleRawBody(const HttpRequest& request, const Server& serv static std::atomic<uint64_t> uploadCounter{0}; +/** + * @brief Generate a unique filename for uploaded files. + * + * @details + * This function ensures that every uploaded file gets a unique name + * even under high concurrency. + * It combines: + * - **Current time in nanoseconds since epoch** (high resolution). + * - **An atomic counter** (`uploadCounter`) to break ties when multiple + * uploads happen within the same nanosecond. + * + * The resulting filename follows the format: + * ``` + * upload_<nanoseconds_since_epoch>_<sequence_number> + * ``` + * + * @return std::string + * A unique, collision-resistant filename suitable for uploads. + * + * @ingroup request_handler + */ static std::string generateFilename() { - // 1) high-res time → nanoseconds since epoch + // 1) Get high-resolution current time → nanoseconds since epoch auto now = std::chrono::system_clock::now().time_since_epoch(); uint64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now).count(); - // 2) per-process counter to break ties within the same nanosecond - uint64_t seq = uploadCounter.fetch_add(1, std::memory_order_relaxed); + + // 2) Increment atomic counter to avoid collisions in the same nanosecond + uint64_t seq = uploadCounter.fetch_add(1, std::memory_order_relaxed); + + // 3) Build the final filename using both components std::ostringstream oss; oss << "upload_" << ns << "_" << seq; return oss.str(); @@ -109,14 +216,40 @@ static std::string generateFilename() { } // namespace +/** + * @brief Validate the prerequisites for processing a POST request. + * + * @details + * This function enforces three key checks before allowing a POST to proceed: + * - Ensures that the request body is not empty (rejects with **400 Bad Request**). + * - Ensures that the body size does not exceed the server's configured + * `client_max_body_size` (rejects with **413 Payload Too Large**). + * - Ensures that the target location has a configured `upload_store` + * (rejects with **403 Forbidden**). + * + * If any of these conditions fail, an appropriate `HttpResponse` is generated. + * Otherwise, the request is considered valid and processing can continue. + * + * @param request The HTTP request to validate. + * @param server The server configuration, used to check limits. + * @param location The location configuration, used to check upload store availability. + * + * @return std::optional<HttpResponse> + * - `std::nullopt` if the request is valid. + * - A generated error response if validation fails. + * + * @ingroup request_handler + */ static std::optional<HttpResponse> validatePostRequest(HttpRequest const& request, Server const& server, Location const& location) { + // 1) Reject empty bodies if (request.getBody().empty()) { Logger::logFrom(LogLevel::WARN, "Post Handler", "Empty body → rejecting POST for URI: " + request.getPath()); return ResponseBuilder::generateError(400, server, request); } + // 2) Reject requests exceeding configured max body size if (request.getBody().size() > server.getClientMaxBodySize()) { Logger::logFrom(LogLevel::WARN, "Post Handler", "Body size " + std::to_string(request.getBody().size()) + @@ -126,6 +259,7 @@ validatePostRequest(HttpRequest const& request, Server const& server, Location c return ResponseBuilder::generateError(413, server, request); } + // 3) Reject if no upload store is configured for this location if (location.getUploadStore().empty()) { Logger::logFrom(LogLevel::WARN, "Post Handler", "No upload store configured → rejecting POST for URI: " + @@ -137,6 +271,39 @@ validatePostRequest(HttpRequest const& request, Server const& server, Location c } namespace fs = std::filesystem; + +/** + * @brief Resolve and validate the filesystem path for a POST upload target. + * + * @details + * This function computes the safe destination path for a file uploaded via POST, + * ensuring it is confined within the configured `upload_store` of a location. + * It enforces NGINX-like semantics and several security checks: + * - Rejects POST requests to paths like `/file.ext/` if `file.ext` exists (→ 404). + * - Decodes percent-encoded components (e.g. `%20`) in the request path. + * - Builds the target path under the `upload_store`, appending a generated + * filename if the client POSTs to a directory or with a trailing slash. + * - Canonicalizes the target path and checks that it remains within the upload root. + * - Rejects paths containing symlinks to prevent traversal attacks (→ 403). + * - Ensures the target directory exists, creating it if needed. + * - Rejects uploads if the target file already exists (→ 400). + * + * On success, the function populates the output references with the resolved + * path, directory, and filename to be used for writing the uploaded content. + * + * @param request The incoming POST request. + * @param server The current server configuration (used for error responses). + * @param location The matched location configuration, with upload store info. + * @param outTargetPath Output: absolute safe path where the file will be written. + * @param outTargetDirectory Output: directory part of the resolved path. + * @param outTargetFilename Output: filename part of the resolved path. + * + * @return std::optional<HttpResponse> + * - `std::nullopt` if the path is valid and safe for writing. + * - An error `HttpResponse` if validation fails. + * + * @ingroup request_handler + */ static std::optional<HttpResponse> preparePostTargetPath(HttpRequest const& request, Server const& server, Location const& location, fs::path& outTargetPath, std::string& outTargetDirectory, @@ -232,32 +399,86 @@ preparePostTargetPath(HttpRequest const& request, Server const& server, Location return std::nullopt; } -static HttpResponse dispatchPostByContentType(HttpRequest const& request, Server const& server, +/** + * @brief Dispatch POST handling based on the request's Content-Type. + * + * @details Chooses the appropriate handler: + * - **multipart/form-data** → @ref handleMultipartForm (saves parts into @p + * targetDirectory). + * - **application/x-www-form-urlencoded** → @ref handleUrlEncodedForm (writes parsed HTML + * to @p targetPath). + * - **(fallback)** any other/absent type → @ref handleRawBody (dumps raw body to @p + * targetPath). + * + * This function relies on a simple substring match of the `Content-Type` + * header. If you later support parameters (e.g. `charset=...`) or more + * types, consider normalizing and parsing the media type token. + * + * @param request Incoming HTTP request (provides headers/body). + * @param server Server context (for error responses). + * @param targetPath Absolute file path chosen for single-file saves. + * @param targetDirectory Directory path to store multipart parts. + * @param targetFilename Leaf filename for single-file saves. + * + * @return A populated @ref HttpResponse produced by the selected handler. + * + * @ingroup request_handler + */ +static HttpResponse dispatchPostByContentType(HttpRequest const& request, const Server& server, std::filesystem::path const& targetPath, std::string const& targetDirectory, std::string const& targetFilename) { + // Read Content-Type once; could be empty or contain parameters (e.g., boundary/charset) std::string contentTypeHeader = request.getHeader("Content-Type"); + // Multipart form → delegate to multipart handler (uses targetDirectory) if (contentTypeHeader.find("multipart/form-data") != std::string::npos) { return handleMultipartForm(request, server, targetDirectory); } + // URL-encoded form → parse and render confirmation HTML to targetPath if (contentTypeHeader.find("application/x-www-form-urlencoded") != std::string::npos) { return handleUrlEncodedForm(request, server, targetPath.string(), targetFilename); } + // Fallback → store raw body verbatim at targetPath return handleRawBody(request, server, targetPath.string(), targetFilename); } +/** + * @brief Handles an HTTP POST request end-to-end. + * + * @details Pipeline: + * 1) **Preconditions** — validate body presence/size and that the @ref Location + * has an `upload_store` (see @ref validatePostRequest). Returns 400/413/403 on failure. + * 2) **Target path preparation** — resolve a safe destination under the + * location's upload store, forbid traversal/symlinks, create parent dirs, + * and choose a filename if needed (see @ref preparePostTargetPath). Returns + * 404/403/500/400 on failure per checks. + * 3) **Dispatch by Content-Type** — select the concrete handler: + * - `multipart/form-data` → @ref handleMultipartForm + * - `application/x-www-form-urlencoded` → @ref handleUrlEncodedForm + * - otherwise → @ref handleRawBody + * + * On success, returns a `201 Created`; on errors, returns the appropriate + * generated @ref HttpResponse from earlier stages. + * + * @param request Incoming HTTP request. + * @param server Active server context. + * @param location Matched location configuration (upload policy, paths). + * @return A fully constructed @ref HttpResponse. + * + * @ingroup request_handler + */ HttpResponse handlePost(HttpRequest const& request, Server const& server, Location const& location) { - // 1) Preconditions + // 1) Preconditions: body present/within limits, upload_store configured std::optional<HttpResponse> maybeErrorResponse = validatePostRequest(request, server, location); if (maybeErrorResponse.has_value()) { return *maybeErrorResponse; } - // 2) Filesystem path prep + // 2) Prepare a safe target path under upload_store (dirs, filename, symlink checks) std::filesystem::path targetPath; std::string targetDirectory; std::string targetFilename; @@ -268,6 +489,6 @@ HttpResponse handlePost(HttpRequest const& request, Server const& server, return *maybePreparationError; } - // 3) Content-type dispatch + // 3) Route to the proper POST handler based on Content-Type return dispatchPostByContentType(request, server, targetPath, targetDirectory, targetFilename); } diff --git a/src/http/requestRouter.cpp b/src/http/requestRouter.cpp index fed75062..1033ddeb 100644 --- a/src/http/requestRouter.cpp +++ b/src/http/requestRouter.cpp @@ -3,13 +3,40 @@ /* ::: :::::::: */ /* requestRouter.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/12 23:13:23 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:17:33 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 10:15:21 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file requestRouter.cpp + * @brief Routes parsed HTTP requests to the appropriate handler. + * + * @details Implements the routing pipeline that maps an incoming + * @ref HttpRequest to a @ref HttpResponse using the active + * @ref Server configuration and its @ref Location blocks. + * + * Workflow: + * 1) **Directory slash redirect:** If a request targets a physical + * directory without a trailing slash, issue a 301 to the + * slash-terminated URI (see @ref redirectOnDirectorySlash). + * 2) **Location resolution:** Find the best matching @ref Location by + * exact match or longest-prefix (see @ref findLocation). + * 3) **Configured redirects:** Apply `return`-based redirects if present + * in the matched location (see @ref redirectOnConfigured). + * 4) **Method validation:** Check supported/allowed methods, returning + * 501 or 405 as appropriate (see @ref validateRequestMethod). + * 5) **Dispatch:** Call the concrete method handler + * (GET/POST/DELETE) (see @ref dispatchByMethod). + * + * Errors are rendered via @ref ResponseBuilder helpers. Utilities from + * `filesystemUtils` are used to normalize URIs and resolve physical paths. + * + * @ingroup request_handler + */ + #include "core/Location.hpp" // for Location #include "core/Server.hpp" // for Server #include "http/HttpRequest.hpp" // for HttpRequest @@ -26,6 +53,31 @@ #include <vector> // for vector namespace { +/** + * @brief Redirects to a slash-terminated URI when the target is a real directory. + * + * @details This helper enforces nginx-like behavior for directory URIs: + * if the request path maps to a directory but lacks a trailing slash, + * issue a 301 redirect to the slash-appended path. + * + * Rules: + * - Only considers @ref Location blocks whose configured path ends with '/'. + * - Matches the request URI by location prefix (sans trailing slash). + * - Resolves a physical filesystem path with `resolvePhysicalPath` and + * checks it with `stat`; symlink-to-dir is treated as a directory. + * - Applies to any HTTP method (GET/POST/DELETE), not only GET. + * + * @param req Parsed HTTP request (used for path, method, headers). + * @param server Active server context (provides location list). + * @param uri Normalized request URI (usually `normalizePath(req.getPath())`). + * + * @return std::nullopt if no redirect is needed; otherwise a ready 301 response + * with `Location: <path>/`. + * + * @note If `req.getPath()` already ends with '/', no redirect occurs. + * @warning Only locations defined with a trailing slash are inspected; locations + * configured without a trailing '/' are intentionally ignored here. + */ std::optional<HttpResponse> redirectOnDirectorySlash(const HttpRequest& req, const Server& server, const std::string& uri) { // If there's already a trailing slash in the request path, nothing to do. @@ -62,7 +114,28 @@ std::optional<HttpResponse> redirectOnDirectorySlash(const HttpRequest& req, con return std::nullopt; } -// 2) Find exact‐match or longest‐prefix location +/** + * @brief Selects the best-matching Location for a normalized request URI. + * + * @details Resolution is two-phase: + * 1) **Exact match**: return the first location whose normalized path + * equals `uri`. + * 2) **Longest-prefix match**: otherwise, return the location with the + * longest normalized path that is a prefix of `uri`. + * + * Each candidate path is normalized with `normalizePath`. Empty + * location paths are ignored for prefix matching. + * + * @param uri Normalized request URI (e.g., from `normalizePath(req.getPath())`). + * @param server Active server whose location set is consulted. + * + * @return Pointer to the selected @ref Location, or `nullptr` if none match. + * + * @note Matching is purely lexical (prefix test), not filesystem-based. + * @warning If multiple locations normalize to the same path, the first one + * encountered wins (implementation-defined by iteration order). + * @complexity O(N · C) where N = number of locations and C = normalization + prefix check cost. + */ const Location* findLocation(const std::string& uri, const Server& server) { // Exact match for (const Location& loc : server.getLocations()) { @@ -87,7 +160,28 @@ const Location* findLocation(const std::string& uri, const Server& server) { return best; } -// 3) Handle configured “return” redirects on GET +/** + * @brief Apply a location-scoped redirect, if configured. + * + * @details If the matched @ref Location declares a redirect (i.e. a `return` rule), + * this function builds and returns a redirect response using + * @ref ResponseBuilder::generateRedirect. The redirect is applied + * unconditionally for any HTTP method and happens before method + * validation/dispatch in the routing pipeline. + * + * @param request Incoming HTTP request (used to propagate version/connection metadata). + * @param loc The matched location to inspect for a configured redirect. + * + * @return A populated @ref HttpResponse when a redirect rule is present; + * `std::nullopt` otherwise. + * + * @note The target URL is taken verbatim from configuration; no normalization is + * performed here. Ensure the configured target is either an absolute URI or a + * valid absolute-path per your configuration semantics. + * + * @warning Choose an appropriate status code (e.g., 301/302/307/308) in the + * configuration to control method preservation and cacheability semantics. + */ std::optional<HttpResponse> redirectOnConfigured(const HttpRequest& request, const Location& loc) { if (loc.hasRedirect()) { Logger::logFrom(LogLevel::INFO, "Router", @@ -98,7 +192,29 @@ std::optional<HttpResponse> redirectOnConfigured(const HttpRequest& request, con return std::nullopt; } -// 4) Check for un-implemented / not-allowed methods +/** + * @brief Validates the HTTP method against server support and location policy. + * + * @details Two checks in order: + * 1) **Server support**: rejects methods not implemented by this server + * (currently {"GET","POST","DELETE"}) with **501 Not Implemented**. + * 2) **Location policy**: rejects methods not allowed by the matched + * @ref Location with **405 Method Not Allowed**. + * + * Both cases are logged and rendered via @ref ResponseBuilder::generateError. + * + * @param request Incoming HTTP request (used for method/path and response metadata). + * @param loc Matched configuration block to query method allowance. + * @param server Active server (used by error rendering). + * + * @return `std::nullopt` when the method is valid for this location; otherwise a + * ready error @ref HttpResponse (501 or 405). + * + * @note Per RFC 9110, servers **SHOULD** include an `Allow` header on 405 + * listing permitted methods for the target resource. + * @warning Method matching here is case-sensitive; normalize upstream if you + * want to accept non‑uppercase tokens. + */ std::optional<HttpResponse> validateRequestMethod(const HttpRequest& request, const Location& loc, const Server& server) { static const std::set<std::string> supported = {"GET", "POST", "DELETE"}; @@ -121,7 +237,25 @@ std::optional<HttpResponse> validateRequestMethod(const HttpRequest& request, co return std::nullopt; } -// 5) Dispatch to the specific handler +/** + * @brief Dispatches a validated request to the concrete method handler. + * + * @details Assumes @ref validateRequestMethod has already run, so only + * implemented-and-allowed methods reach this point. Dispatch is a + * simple chain on the request method: + * - "GET" → @ref handleGet + * - "POST" → @ref handlePost + * - "DELETE" → @ref handleDelete + * + * @param request Parsed HTTP request. + * @param server Active server context. + * @param loc Matched @ref Location. + * + * @return The handler-produced @ref HttpResponse. + * + * @note If additional methods are added in the future, extend this dispatcher + * (or switch to a table-driven approach). + */ HttpResponse dispatchByMethod(const HttpRequest& request, const Server& server, const Location& loc) { const std::string& method = request.getMethod(); @@ -136,14 +270,41 @@ HttpResponse dispatchByMethod(const HttpRequest& request, const Server& server, } } // anonymous namespace +/** + * @brief Top-level router entrypoint for processing an HTTP request. + * + * @details This function implements the main request handling pipeline: + * 1. **Normalize URI** — sanitize the request path. + * 2. **Directory-slash redirect** — if a directory is requested without a trailing + * slash, generate a `301` redirect (see @ref redirectOnDirectorySlash). + * 3. **Location resolution** — find the best-matching @ref Location in the + * server configuration (exact or longest-prefix match). + * 4. **Configured redirect** — apply `return` directive–based redirects if present + * in the matched location (see @ref redirectOnConfigured). + * 5. **Method validation** — reject unsupported (501) or disallowed (405) methods + * (see @ref validateRequestMethod). + * 6. **Dispatch** — call the concrete handler for GET/POST/DELETE + * (see @ref dispatchByMethod). + * + * Errors at any stage are logged and turned into responses via @ref ResponseBuilder. + * + * @param request Parsed @ref HttpRequest. + * @param server Active @ref Server context. + * @return Fully constructed @ref HttpResponse to send back to the client. + * + * @ingroup request_handler + */ HttpResponse handleRequest(const HttpRequest& request, const Server& server) { + // 1) Normalize the request path std::string uri = normalizePath(request.getPath()); + // 2) Check if this is a directory missing a trailing slash → 301 redirect std::optional<HttpResponse> redirectResponse = redirectOnDirectorySlash(request, server, uri); if (redirectResponse.has_value()) { return redirectResponse.value(); } + // 3) Find best matching Location block const Location* loc = findLocation(uri, server); if (loc == nullptr) { Logger::logFrom(LogLevel::WARN, "Router", @@ -151,15 +312,18 @@ HttpResponse handleRequest(const HttpRequest& request, const Server& server) { return ResponseBuilder::generateError(404, server, request); } + // 4) Check for a configured `return` redirect on this location std::optional<HttpResponse> configuredRedirect = redirectOnConfigured(request, *loc); if (configuredRedirect.has_value()) { return configuredRedirect.value(); } + // 5) Validate that the method is implemented and allowed in this location std::optional<HttpResponse> validationError = validateRequestMethod(request, *loc, server); if (validationError.has_value()) { return validationError.value(); } + // 6) Finally, dispatch to the actual handler (GET/POST/DELETE) return dispatchByMethod(request, server, *loc); } diff --git a/src/http/responseBuilder.cpp b/src/http/responseBuilder.cpp index 9dfa7bbb..d9ea896b 100644 --- a/src/http/responseBuilder.cpp +++ b/src/http/responseBuilder.cpp @@ -3,13 +3,40 @@ /* ::: :::::::: */ /* responseBuilder.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/11 12:14:23 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:18:46 by irychkov ### ########.fr */ +/* Updated: 2025/08/19 09:31:28 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file responseBuilder.cpp + * @brief Implements the ResponseBuilder utilities for constructing HTTP responses. + * + * @details This file defines helper functions to generate HTTP responses of different + * types (success, file-based success, error, redirect). It encapsulates the + * boilerplate logic of setting status codes, headers, body content, and + * connection persistence rules based on HTTP version and `Connection` headers. + * + * Key features: + * - Provides default status messages for standard HTTP codes + * via @ref MessageHandler::getDefaultMessage. + * - Centralizes connection management (keep-alive vs. close) in + * `initializeResponse`, consistent with HTTP/1.0 and HTTP/1.1 semantics. + * - Supports custom error pages configured in @ref Server instances, + * falling back to generated HTML when missing. + * - Generates file-backed responses (for static files or CGI output) + * with proper `Content-Length` and offset handling. + * - Generates redirect responses with appropriate `Location` headers. + * + * These utilities are typically invoked by request handlers (GET, POST, + * DELETE, CGI, etc.) to produce fully-formed @ref HttpResponse objects + * that can be sent back to clients. + * + * @ingroup http + */ + #include "http/responseBuilder.hpp" #include "core/Location.hpp" // for Location #include "core/Server.hpp" // for Server @@ -25,8 +52,40 @@ #include <utility> // for pair #include <vector> // for vector +/** + * @namespace MessageHandler + * @brief Utility namespace for HTTP status code reason phrases. + * + * @details Provides helpers to map numeric HTTP status codes + * (e.g., 200, 404, 500) to their standard reason phrases + * (e.g., "OK", "Not Found", "Internal Server Error"). + * + * Currently, this namespace exposes a single function: + * - @ref getDefaultMessage : Returns the standard reason phrase + * for a given status code, or `"Unknown Error"` if not found. + * + * Used throughout response builders and handlers to ensure + * consistent status messages across all HTTP responses. + * + * @ingroup http + */ namespace MessageHandler { - +/** + * @brief Returns the default reason phrase for a given HTTP status code. + * + * @details Uses a static lookup table of standard HTTP status codes + * (e.g., 200 → "OK", 404 → "Not Found"). The table is initialized + * once on first call and reused for all subsequent calls. + * + * If the provided status code is not in the table, the function + * returns the fallback string `"Unknown Error"`. + * + * @param status_code The numeric HTTP status code (e.g., 200, 404, 500). + * @return The corresponding reason phrase as a string, or `"Unknown Error"` + * if the code is not recognized. + * + * @ingroup http + */ std::string getDefaultMessage(int status_code) { // Define a local alias for better readability and maintainability using StatusMessageMap = std::map<int, std::string>; @@ -64,7 +123,24 @@ std::string getDefaultMessage(int status_code) { namespace { -// Determines keep-alive status from HTTP version and connection header. +/** + * @brief Determines whether the TCP connection should be kept alive + * based on HTTP version and the `Connection` header. + * + * @details Implements protocol-specific keep-alive rules: + * - **HTTP/1.1**: persistent connections are the default. + * Only `"Connection: close"` disables keep-alive. + * - **HTTP/1.0**: connections close by default. + * Only `"Connection: keep-alive"` enables persistence. + * - **Other/unknown versions**: defaults to closing + * the connection for safety. + * + * @param version The HTTP version string from the request (e.g. `"HTTP/1.1"`). + * @param conn The value of the `Connection` header, lowercase-insensitive. + * @return `true` if the connection may be kept alive, `false` if it should close. + * + * @ingroup http + */ bool shouldKeepAlive(const std::string& version, const std::string& conn) { // HTTP/1.1 defaults to keep-alive unless explicitly closed if (version == "HTTP/1.1") @@ -76,10 +152,33 @@ bool shouldKeepAlive(const std::string& version, const std::string& conn) { return false; } -// Common initialization for any response. +/** + * @brief Initializes a new HTTP response with status and connection metadata. + * + * @details This helper performs common setup for all responses: + * - Sets the numeric status code and reason phrase (e.g., `200 OK`). + * - Records the request's HTTP version and `Connection` header + * for later connection management. + * - Applies protocol-specific keep-alive rules via + * @ref shouldKeepAlive and closes the connection for certain + * error codes (400, 408, 413, 500). + * - Inserts the appropriate `Connection` header (`keep-alive` or `close`) + * into the response. + * + * It centralizes connection-handling logic so that all + * response builders (`success`, `error`, `redirect`, etc.) + * stay consistent with HTTP/1.0 and HTTP/1.1 semantics. + * + * @param response The @ref HttpResponse object being initialized. + * @param status_code The numeric HTTP status code (e.g., 200, 404, 500). + * @param message The reason phrase associated with the status code. + * @param request The original @ref HttpRequest, used to extract + * version and `Connection` header metadata. + * + * @ingroup http + */ void initializeResponse(HttpResponse& response, int status_code, const std::string& message, const HttpRequest& request) { - // Set HTTP status code and message (e.g., 200 OK) response.setStatus(status_code, message); // Store request version and connection header to guide connection persistence response.setRequestMeta(request.getVersion(), request.getHeader("Connection")); @@ -100,55 +199,162 @@ void initializeResponse(HttpResponse& response, int status_code, const std::stri } // anonymous namespace +/** + * @namespace ResponseBuilder + * @brief Factory helpers for constructing HTTP responses. + * + * @details The ResponseBuilder namespace groups together utility + * functions that generate fully-formed @ref HttpResponse + * objects for different scenarios: + * + * - @ref generateSuccess : Build a success response with an inline body. + * - @ref generateSuccessFile : Build a response backed by a file (static file or CGI temp + * file). + * - @ref generateError : Build an error response, using custom pages if available or a + * default fallback. + * - @ref generateRedirect : Build a redirect response with a `Location` header. + * + * All builders rely on @ref initializeResponse to apply + * consistent status metadata, HTTP version, and connection + * persistence rules. This ensures uniform behavior across + * all response types. + * + * These functions are typically used by request handlers, + * CGI logic, and the router when constructing responses + * to be sent back to clients. + * + * @ingroup http + */ namespace ResponseBuilder { - +/** + * @brief Builds a successful HTTP response with an inline body. + * + * @details This helper constructs a standard success response: + * - Looks up the default reason phrase for the given status code + * (e.g., `200 → "OK"`). + * - Initializes the response with status line, HTTP version, and + * connection persistence rules via @ref initializeResponse. + * - Sets the `Content-Type` header to the provided MIME type. + * - Attaches the supplied response body directly in memory. + * + * Typical use cases include sending JSON, HTML, or plain-text + * responses for successful GET/POST requests. + * + * @param status_code The HTTP status code to return (e.g., 200, 201). + * @param body The response body payload as a string. + * @param content_type The MIME type for the body (e.g., `"text/html"`, + * `"application/json"`). + * @param request The original @ref HttpRequest, used to propagate + * version and connection metadata. + * + * @return A fully constructed @ref HttpResponse object ready to send. + * + * @ingroup http + */ HttpResponse generateSuccess(int status_code, const std::string& body, const std::string& content_type, const HttpRequest& request) { HttpResponse response; - // Get the default reason phrase for the given status code (e.g., "OK") - std::string message = MessageHandler::getDefaultMessage(status_code); + std::string message = MessageHandler::getDefaultMessage(status_code); // Set status, connection headers, and keep-alive logic initializeResponse(response, status_code, message, request); - // Set the content type (e.g., "text/html", "application/json") response.setHeader("Content-Type", content_type); - // Attach the response body response.setBody(body); return response; } +/** + * @brief Builds a successful HTTP response backed by a file on disk. + * + * @details This helper constructs a response intended for file or CGI output: + * - Looks up the default reason phrase for the given status code + * (e.g., `200 → "OK"`). + * - Initializes the response with status line, HTTP version, and + * connection persistence rules via @ref initializeResponse. + * - Sets `Content-Type` to the provided MIME type. + * - Sets `Content-Length` to the provided file size. + * - Records the CGI body offset (if applicable) to indicate where + * dynamic output begins inside a temporary file. + * - Stores the file path, allowing the networking layer to stream + * the file contents directly instead of keeping them in memory. + * + * Typical use cases include serving static files (HTML, CSS, JS, images) + * or returning CGI-generated content that lives in a temporary file. + * + * @param status_code The HTTP status code to return (e.g., 200, 206). + * @param file_path Filesystem path to the file that should be streamed. + * @param content_type The MIME type of the file (e.g., `"text/html"`, + * `"application/octet-stream"`). + * @param request The original @ref HttpRequest, used to propagate + * version and connection metadata. + * @param content_length Size of the file (or portion) to be served, in bytes. + * @param cgiBodyOffset Byte offset where the CGI response body begins, + * if serving a CGI-generated temp file; usually 0 otherwise. + * + * @return A fully constructed @ref HttpResponse object configured for file streaming. + * + * @ingroup http + */ HttpResponse generateSuccessFile(int status_code, const std::string& file_path, const std::string& content_type, const HttpRequest& request, std::streamsize content_length, std::streamsize cgiBodyOffset) { HttpResponse response; - // Get the default reason phrase for the given status code (e.g., "OK") - std::string message = MessageHandler::getDefaultMessage(status_code); + std::string message = MessageHandler::getDefaultMessage(status_code); // Set status, connection headers, and keep-alive logic initializeResponse(response, status_code, message, request); - // Set the content type (e.g., "text/html", "application/octet-stream") response.setHeader("Content-Type", content_type); response.setHeader("Content-Length", std::to_string(content_length)); response.setCgiBodyOffset(cgiBodyOffset); - // Set the file path for serving response.setFilePath(file_path); return response; } +/** + * @brief Builds an HTTP error response, optionally using a custom error page. + * + * @details This helper constructs error responses in two stages: + * 1. It checks the server configuration for a custom error page + * matching the given status code (via @ref Server::getErrorPages). + * If found, it attempts to read the file and use its contents + * as the response body. + * 2. If no custom page is configured or the file cannot be read, + * it falls back to generating a minimal default HTML page with + * the status code and reason phrase (e.g., `"404 Not Found"`). + * + * In both cases, the response is initialized with correct status line, + * HTTP version, and connection persistence rules using + * @ref initializeResponse, and the `Content-Type` is set to `"text/html"`. + * + * This ensures clients always receive a valid, informative error page, + * even if the custom configuration fails. + * + * @param status_code The HTTP error code to return (e.g., 404, 500). + * @param server Reference to the @ref Server configuration, + * used to look up custom error pages. + * @param request The original @ref HttpRequest, used for version and + * connection metadata propagation. + * + * @return A fully constructed @ref HttpResponse object representing the error. + * + * @ingroup http + */ HttpResponse generateError(int status_code, const Server& server, const HttpRequest& request) { HttpResponse response; std::string message = MessageHandler::getDefaultMessage(status_code); std::string body; - // Try custom error page + // 1) Look for a custom error page in the server config const auto& error_pages = server.getErrorPages(); auto it = error_pages.find(status_code); if (it != error_pages.end()) { std::string uri = it->second; - // Strip leading slash + + // Strip leading slash to avoid double slashes in joinPath if (!uri.empty() && uri.front() == '/') uri.erase(0, 1); - // Determine default root (location "/") + + // Find the root path from the default "/" location std::string default_root; for (const auto& loc : server.getLocations()) { if (loc.getPath() == "/") { @@ -156,13 +362,16 @@ HttpResponse generateError(int status_code, const Server& server, const HttpRequ break; } } + + // Build full filesystem path and try to load the file std::string full_path = joinPath(default_root, uri); std::ifstream file(full_path.c_str()); - if (file) + if (file) { body.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); + } } - // Fallback default page + // 2) Fallback: generate a minimal default HTML error page if (body.empty()) { std::ostringstream ss; ss << "<html><body><h1>" << status_code << " " << htmlEscape(message) @@ -170,29 +379,47 @@ HttpResponse generateError(int status_code, const Server& server, const HttpRequ body = ss.str(); } + // 3) Initialize the response metadata (status, headers, connection) initializeResponse(response, status_code, message, request); response.setHeader("Content-Type", "text/html"); - // Attach the generated or loaded error page body + + // 4) Attach the error body (custom or generated) --- if (!body.empty()) { response.setBody(body); } - /* response.setBody(body); */ return response; } +/** + * @brief Generates an HTTP redirect response. + * + * @details Creates a response with the given redirect status code + * (e.g., 301 Moved Permanently, 302 Found, 307 Temporary Redirect), + * sets the appropriate `Location` header pointing to the target URL, + * and applies connection-handling logic via @ref initializeResponse. + * + * By default, no response body is included for redirects, and most + * clients rely solely on the `Location` header. A `Content-Length: 0` + * header may be explicitly added if desired, but is optional. + * + * @param status_code Redirect status code (must be in the 3xx range). + * @param location Target URL or path for the redirect. + * @param request The original HTTP request (provides metadata for connection handling). + * + * @return An initialized @ref HttpResponse representing the redirect. + * + * @ingroup http + */ HttpResponse generateRedirect(int status_code, const std::string& location, const HttpRequest& request) { HttpResponse response; - // Get default reason phrase for the redirect status code (e.g., "Moved Permanently") - std::string message = MessageHandler::getDefaultMessage(status_code); + std::string message = MessageHandler::getDefaultMessage(status_code); // Set status line, connection handling, and keep-alive logic initializeResponse(response, status_code, message, request); // Set the Location header to indicate the redirect target response.setHeader("Location", location); - // No body is sent in most redirects → explicitly set Content-Length to 0 - // response.setHeader("Content-Length", "0"); return response; } diff --git a/src/network/SocketManager.cpp b/src/network/SocketManager.cpp index a411b44e..6e2b2499 100644 --- a/src/network/SocketManager.cpp +++ b/src/network/SocketManager.cpp @@ -3,13 +3,31 @@ /* ::: :::::::: */ /* SocketManager.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: ktieu <ktieu@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/03 13:51:20 by irychkov #+# #+# */ -/* Updated: 2025/06/09 01:29:07 by ktieu ### ########.fr */ +/* Updated: 2025/08/18 23:11:01 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file SocketManager.cpp + * @brief Implements the SocketManager core loop and server socket setup. + * + * @details + * Provides the main event-driven networking logic of Webserv. + * - Sets up listening sockets for configured servers. + * - Runs the non-blocking poll() loop to multiplex all I/O. + * - Handles new connections, client requests, and responses. + * - Integrates CGI handling, timeout checks, and error recovery. + * + * This file contains the **control flow backbone** of the web server: + * it ties together sockets, HTTP parsing, response building, CGI execution, + * and cleanup into a single poll-driven loop. + * + * @ingroup socket_manager + */ + #include "network/SocketManager.hpp" #include "utils/Logger.hpp" // for LogLevel, Logger #include "utils/filesystemUtils.hpp" // for getCurrentTime @@ -27,23 +45,69 @@ static volatile sig_atomic_t running = 1; +/** + * @internal + * @brief Signal handler for process-level interrupts. + * + * @param signum Signal number received. + * + * @details + * - On `SIGINT` (Ctrl+C), sets the global `running` flag to `0` + * to trigger a graceful shutdown. + * - `SIGPIPE` is ignored in the constructor (see @ref SocketManager), + * to prevent crashes when clients disconnect abruptly. + */ static void signalHandler(int signum) { if (signum == SIGINT) running = 0; } -// Constructor: sets up sockets for each server defined in the config +/** + * @brief Constructs a new SocketManager. + * + * @details + * - Installs signal handlers (`SIGINT` for shutdown, `SIGPIPE` ignored). + * - Initializes listening sockets for all configured servers. + * + * @param servers List of configured @ref Server instances. + * + * @throws SocketError if socket creation, binding, or listening fails. + */ SocketManager::SocketManager(const std::vector<Server>& servers) { signal(SIGINT, signalHandler); signal(SIGPIPE, SIG_IGN); setupSockets(servers); } +/** + * @brief Destructor for SocketManager. + * + * @details + * Closes all file descriptors tracked in the poll set to + * ensure a clean shutdown. + */ SocketManager::~SocketManager() { for (const pollfd& pfd : _poll_fds) close(pfd.fd); } +/** + * @brief Handles error or hangup events reported by poll(). + * + * @details + * Interprets the poll revents mask for a given client file descriptor + * and logs the corresponding error type: + * - `POLLNVAL`: Invalid poll event on this fd. + * - `POLLERR`: Generic socket error. + * - `POLLHUP`: Client disconnected. + * + * After logging, the client connection is closed and cleaned up + * via @ref cleanupClientConnectionClose. + * + * @param fd The file descriptor that triggered the error event. + * @param index Index of the fd in the internal poll list. + * @param revents The bitmask of poll events received for the fd. + */ void SocketManager::handlePollError(int fd, size_t index, short revents) { if (revents & POLLNVAL) { Logger::logFrom(LogLevel::ERROR, "SocketManager", @@ -67,112 +131,171 @@ const char* SocketManager::SocketError::what() const throw() { return (_msg.c_str()); } -// Set up sockets for each server (host:port) +/** + * @brief Initializes and binds listening sockets for all configured servers. + * + * @details + * Iterates over the provided @ref Server instances and: + * - Ensures each (host, port) pair is only bound once. + * - Creates a non-blocking TCP socket (`SOCK_STREAM | SOCK_NONBLOCK`). + * - Enables `SO_REUSEADDR` to allow quick restarts on the same port. + * - Binds the socket to the host:port. + * - Puts the socket into listening mode with `listen()`. + * - Registers the socket in the internal poll list for event monitoring. + * - Groups servers sharing the same host:port into a virtual host list. + * + * @param servers List of servers to initialize listening sockets for. + * + * @throws SocketError If socket creation, setsockopt, bind, or listen fails. + * + * @note + * - "localhost" is explicitly mapped to `127.0.0.1`. + * - Duplicate host:port entries are skipped to avoid double-binding. + */ void SocketManager::setupSockets(const std::vector<Server>& servers) { - std::set<std::pair<std::string, int>> bound; + std::set<std::pair<std::string, int>> bound; // Track already-bound host:port pairs + for (size_t i = 0; i < servers.size(); ++i) { const std::string& host = servers[i].getHost(); int port = servers[i].getPort(); + // Skip if this host:port has already been bound std::pair<std::string, int> key = std::make_pair(host, port); if (bound.count(key)) - continue; // Already bound, skip + continue; bound.insert(key); + // Create a non-blocking TCP socket int fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); if (fd < 0) throw SocketError("socket() failed: " + std::string(std::strerror(errno))); - int opt = 1; // Reuse same address/port if the server is restarted quickly + // Allow socket address reuse to restart quickly after shutdown + int opt = 1; if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) { close(fd); throw SocketError("setsockopt() failed: " + std::string(std::strerror(errno))); } + // Prepare socket address structure sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_port = htons(port); // Convert port to network byte order - // Convert hostname to IP address + + // Map "localhost" to loopback explicitly, otherwise use given host string if (host == "localhost") addr.sin_addr.s_addr = inet_addr("127.0.0.1"); else addr.sin_addr.s_addr = inet_addr(host.c_str()); - if (bind(fd, (sockaddr*) &addr, sizeof(addr)) < 0) { // Bind socket to IP:port + // Bind socket to the host:port + if (bind(fd, (sockaddr*) &addr, sizeof(addr)) < 0) { close(fd); throw SocketError("bind() failed on " + host + ":" + std::to_string(port) + ": " + strerror(errno)); } - if (listen(fd, SOMAXCONN) < 0) { // Start listening for incoming connections + // Put socket into listening mode + if (listen(fd, SOMAXCONN) < 0) { close(fd); throw SocketError("listen() failed: " + std::string(std::strerror(errno))); } - // Register fd in poll list + // Register fd in poll list for event monitoring _poll_fds.push_back((pollfd){fd, POLLIN, 0}); - // Collect all servers for this host:port + + // Collect all servers configured on this host:port (virtual hosts) std::vector<Server> vhosts; for (size_t j = 0; j < servers.size(); ++j) { if (servers[j].getHost() == host && servers[j].getPort() == port) vhosts.push_back(servers[j]); } + // Associate this listening fd with its vhost set _listen_map[fd] = vhosts; + Logger::logFrom(LogLevel::INFO, "SocketManager", "Listening on " + host + ":" + std::to_string(port)); } } +/** + * @brief Runs the main event loop of the server. + * + * @details + * This method drives the **non-blocking, poll()-based event loop** of Webserv. + * - Uses a single `poll()` call to multiplex all I/O (listening sockets, client FDs, CGI pipes). + * - Periodically checks for CGI activity and client timeouts. + * - Handles: + * - New connections (on listening sockets), + * - Read events (incoming client data), + * - Write events (pending responses), + * - Error/hangup events (POLLERR, POLLHUP, POLLNVAL). + * - Ensures resilience with try/catch around per-FD handling. + * + * The loop continues until the global `running` flag is cleared (SIGINT). + * + * @throws SocketError If `poll()` fails unexpectedly (other than EINTR). + */ void SocketManager::run() { while (running) { + // Wait up to 1s for activity on any registered FD int n = poll(&_poll_fds[0], _poll_fds.size(), 1000); if (n < 0) { if (errno == EINTR) { + // Interrupted by signal → trigger graceful shutdown running = 0; continue; } throw SocketError("poll() failed: " + std::string(std::strerror(errno))); } + // First, handle any active CGI processes (timeout, completion, cleanup) handleCgiPollEvents(); - // Then handle sockets — but skip *all* CGI FDs before doing error/HUP checks + // Process poll events for each file descriptor in reverse order for (size_t i = _poll_fds.size(); i-- > 0;) { try { - short revents = _poll_fds[i].revents; + short revents = _poll_fds[i].revents; // events that fired int current_fd = _poll_fds[i].fd; + + // Check if this client has hit a timeout (idle/header/body/send) if (checkClientTimeouts(current_fd, i)) { - _poll_fds[i].events |= - POLLOUT; // If connection keep-alive but client idle we close + // Mark for POLLOUT → we will close after sending error/timeout response + _poll_fds[i].events |= POLLOUT; } - // **FIX**: skip CGI pipe FDs entirely + // Skip CGI pipe descriptors entirely — they’re handled in handleCgiPollEvents() if (_fd_to_cgi.contains(current_fd)) { continue; } - // Now error/hangup on *client* sockets + // Handle socket-level errors or hangups if (revents & POLLERR || revents & POLLHUP || revents & POLLNVAL) { handlePollError(current_fd, i, revents); continue; } + // Incoming data available if (revents & POLLIN) { if (_listen_map.count(current_fd)) { + // This is a listening socket → accept new connection handleNewConnection(current_fd); } else { + // Existing client → read request if (!handleClientData(current_fd, i)) - continue; - // we have a response queued, request poll‐out + continue; // client closed or error, skip further handling + // At least one response is now queued → request POLLOUT _poll_fds[i].events |= POLLOUT; } } + // Ready to write response if ((revents & POLLOUT) && !_client_info[current_fd].responses.empty()) { sendResponse(current_fd, i); } } catch (const std::exception& e) { + // Per-FD error resilience: log and cleanup Logger::logFrom(LogLevel::ERROR, "SocketManager", "Exception in poll loop for fd: " + std::to_string(_poll_fds[i].fd) + ": " + e.what()); @@ -189,70 +312,154 @@ void SocketManager::run() { Logger::logFrom(LogLevel::INFO, "SocketManager", "Shutting down server"); } +/** + * @brief Initializes tracking information for a newly accepted client. + * + * @details + * Creates or resets a @ref ClientInfo entry for the given client file descriptor: + * - Stores the client FD for later reference. + * - Initializes timestamps (`lastRequestTime`, `connectionStartTime`). + * - Resets request/response state (header/body counters, completion flags). + * - Clears send progress tracking (`bytes_sent`). + * - Associates the client with all servers (virtual hosts) listening on the + * same port as the accept()'ed socket. + * + * @param client_fd File descriptor of the newly accepted client socket. + * @param listen_fd File descriptor of the listening socket that accepted this client. + */ void SocketManager::initializeClientInfo(int client_fd, int listen_fd) { - ClientInfo& info = _client_info[client_fd]; + ClientInfo& info = _client_info[client_fd]; // Create/lookup client state entry + + // Basic identification + info.client_fd = client_fd; - info.client_fd = client_fd; + // Initialize timers for connection and activity info.lastRequestTime = getCurrentTime(); info.connectionStartTime = getCurrentTime(); + + // Reset counters and parsing state info.headerBytesReceived = 0; info.bodyBytesReceived = 0; info.headerComplete = false; - info.bytes_sent = 0; - info.serversOnPort = _listen_map[listen_fd]; + + // Reset response send tracking + info.bytes_sent = 0; + + // Assign list of servers bound to the same listen_fd (vhost support) + info.serversOnPort = _listen_map[listen_fd]; } -// Accept new client and add to poll list +/** + * @brief Accepts and registers a new client connection. + * + * @details + * Called when a listening socket signals readiness (`POLLIN`): + * - Calls `accept()` to retrieve a new client socket FD. + * - Handles resource exhaustion gracefully: + * - `EMFILE` / `ENFILE`: process/system FD limit reached → log and drop. + * - `MAX_CLIENTS` limit: too many clients → reject and close FD. + * - Initializes the new client state via @ref initializeClientInfo. + * - Adds the client FD to the poll set, monitoring for read events. + * + * @param listen_fd The listening socket FD on which the connection was accepted. + */ void SocketManager::handleNewConnection(int listen_fd) { + // Accept a pending connection on the listening socket int client_fd = accept(listen_fd, NULL, NULL); if (client_fd < 0) { if (errno == EMFILE || errno == ENFILE) { - // We’ve hit the per‐process or system FD limit. + // System or process has run out of file descriptors Logger::logFrom( LogLevel::ERROR, "SocketManager", "Out of file descriptors (accept failed: " + std::string(strerror(errno)) + ")"); return; } + // Generic accept() failure Logger::logFrom(LogLevel::ERROR, "SocketManager", "accept() failed: " + std::string(std::strerror(errno))); return; } + // Enforce application-level connection limit if (_poll_fds.size() >= MAX_CLIENTS) { Logger::logFrom(LogLevel::WARN, "SocketManager", "Maximum client limit reached. Rejecting connection."); - close(client_fd); // Optionally send HTTP error before closing (nice but optional) 503 + close(client_fd); // Could optionally send an HTTP 503 before closing return; } + // Initialize per-client state tracking initializeClientInfo(client_fd, listen_fd); + + // Register client FD into poll list, listening for incoming data _poll_fds.push_back((pollfd){client_fd, POLLIN, 0}); + + // Debug log: report accepted client and total open connections Logger::logFrom(LogLevel::kDEBUG, "SocketManager", "Accept returned fd: " + std::to_string(client_fd) + " | current open clients: " + std::to_string(_client_info.size())); } +/** + * @brief Processes incoming data for a client connection. + * + * @details + * Called when a client socket is readable (`POLLIN`): + * 1. Reads available data from the client using @ref receiveFromClient. + * - If the client disconnects or a read error occurs → return `false` (FD will be closed). + * 2. Attempts to parse one or more complete HTTP requests from the buffer + * via @ref parseAndQueueRequests. + * - If parsing is incomplete, waits for more data. + * - If a parse error occurs, queues an error response. + * 3. Processes all queued requests (except those blocked by CGI execution) + * via @ref processPendingRequests. + * + * Exception safety: + * - Catches `std::bad_alloc` (memory exhaustion), `std::exception`, and unknown errors. + * - On error: disables further POLLIN events for this client and queues a `500 Internal Server + * Error`. + * + * @param client_fd File descriptor of the client socket. + * @param index Index of the client FD in the internal poll list. + * + * @return `true` if processing completed (even with errors queued), + * `false` if the connection should be closed. + */ bool SocketManager::handleClientData(int client_fd, size_t index) { try { + // 1) Receive raw bytes from the client socket if (!receiveFromClient(client_fd, index)) - return false; + return false; // Client disconnected or fatal recv() error + + // 2) Parse requests and queue them for processing if (!parseAndQueueRequests(client_fd)) - return false; + return false; // Incomplete request → wait for more data + + // 3) Execute queued requests (routing, CGI, response building) processPendingRequests(client_fd); - return (true); + + return true; } catch (const std::bad_alloc& e) { + // Out-of-memory while processing this client Logger::logFrom(LogLevel::ERROR, "SocketManager", "Memory allocation failed while handling client " + std::to_string(client_fd) + ": " + e.what()); } catch (const std::exception& e) { + // Generic runtime error Logger::logFrom(LogLevel::ERROR, "SocketManager", "Exception while handling client " + std::to_string(client_fd) + ": " + e.what()); } catch (...) { + // Any other unknown error Logger::logFrom(LogLevel::ERROR, "SocketManager", "Unknown exception while handling client " + std::to_string(client_fd)); } + + // Disable further reads from this client (avoid repeated errors) _poll_fds[index].events &= ~POLLIN; + + // Queue an internal server error response respondError(client_fd, 500); - return (true); + + return true; // Keep connection alive long enough to send error response } diff --git a/src/network/SocketManagerRequest.cpp b/src/network/SocketManagerRequest.cpp index ea63db9d..53b32ef5 100644 --- a/src/network/SocketManagerRequest.cpp +++ b/src/network/SocketManagerRequest.cpp @@ -6,10 +6,33 @@ /* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/08 14:53:38 by irychkov #+# #+# */ -/* Updated: 2025/06/09 00:47:10 by nlouis ### ########.fr */ +/* Updated: 2025/08/18 23:09:00 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file SocketManagerRequest.cpp + * @brief Implements request handling logic for SocketManager. + * + * @details + * This file contains all methods of @ref SocketManager related to + * **client request processing**: + * - Receiving raw data from clients (@ref receiveFromClient). + * - Parsing HTTP requests into structured @ref HttpRequest objects + * (@ref parseAndQueueRequests). + * - Executing and routing pending requests, including CGI support + * (@ref processPendingRequests, @ref handleCgiRequest). + * - Managing CGI process polling, timeouts, and finalization + * (@ref handleCgiPollEvents). + * - Handling request errors and generating error responses + * (@ref handleRequestErrorIfAny). + * + * It forms the **read-path of the event loop**, turning client bytes into + * application-level requests and responses. + * + * @ingroup socket_manager + */ + #include "core/Location.hpp" // for Location #include "core/Server.hpp" // for Server #include "http/HttpRequest.hpp" // for HttpRequest @@ -35,44 +58,81 @@ #include <utility> // for pair #include <vector> // for vector +/** + * @brief Receives raw data from a client socket. + * + * @details + * - Performs a non-blocking `recv()` on the given client FD. + * - Handles connection termination cases: + * - `bytes == 0`: client closed the connection → cleanup and return `false`. + * - `bytes < 0`: read error → cleanup and return `false`. + * - Appends received data to the client’s `requestBuffer`. + * - Tracks request progress: + * - Updates header vs. body byte counters. + * - Marks `headerComplete` when the `\r\n\r\n` delimiter is first encountered. + * + * @param client_fd The client socket file descriptor. + * @param index Index of the client FD in the poll list (needed for cleanup). + * + * @return `true` if data was successfully received and buffered, + * `false` if the connection should be closed. + * + * @throws None. Errors are logged, and cleanup is performed internally. + */ bool SocketManager::receiveFromClient(int client_fd, size_t index) { char buffer[RECV_BUFFER]; + + // Update last activity timestamp (used for idle timeout detection) _client_info[client_fd].lastRequestTime = getCurrentTime(); + + // Non-blocking read from the client int bytes = recv(client_fd, buffer, sizeof(buffer) - 1, MSG_DONTWAIT); + if (bytes == 0) { + // Client performed an orderly shutdown Logger::logFrom(LogLevel::INFO, "SocketManager", "Client fd " + std::to_string(client_fd) + " disconnected."); cleanupClientConnectionClose(client_fd, index); return false; } if (bytes < 0) { + // Fatal read error Logger::logFrom(LogLevel::ERROR, "SocketManager", std::string("recv() failed: ") + std::strerror(errno)); cleanupClientConnectionClose(client_fd, index); return false; } + + // Null-terminate buffer for safe string construction buffer[bytes] = '\0'; + // Append the new chunk to the client's request buffer std::string single_msg(buffer, bytes); _client_info[client_fd].requestBuffer += single_msg; + + // Detect end of HTTP headers ("\r\n\r\n") size_t headerEndPos = _client_info[client_fd].requestBuffer.find("\r\n\r\n"); if (headerEndPos == std::string::npos) { + // Still receiving headers if (_client_info[client_fd].headerBytesReceived == 0) { + // First time seeing header data → mark connection start _client_info[client_fd].connectionStartTime = getCurrentTime(); } _client_info[client_fd].headerBytesReceived += bytes; } else { if (!_client_info[client_fd].headerComplete) { + // Header just completed in this recv() size_t fullHeaderSize = headerEndPos + 4; size_t oldBufferSize = _client_info[client_fd].requestBuffer.size() - bytes; size_t headerBytesThisTime = std::max((ssize_t) 0, (ssize_t) (fullHeaderSize - oldBufferSize)); + _client_info[client_fd].headerBytesReceived += headerBytesThisTime; _client_info[client_fd].bodyBytesReceived += (bytes - headerBytesThisTime); _client_info[client_fd].headerComplete = true; } else { - // Header already counted, this must be body + // Header already completed → this is pure body data _client_info[client_fd].bodyBytesReceived += bytes; } } @@ -80,36 +140,84 @@ bool SocketManager::receiveFromClient(int client_fd, size_t index) { return true; } +/** + * @brief Finds the best-matching Location for a given request path. + * + * @details + * Iterates through all @ref Location objects configured in the given @ref Server + * and selects the one whose `path` prefix matches the request path with the + * **longest match length** (longest-prefix match). + * + * Matching logic: + * - A location matches if `normalizePath(loc.getPath())` is a prefix of `path`. + * - If multiple locations match, the one with the longest normalized path is chosen. + * - If no location matches, returns `nullptr`. + * + * @param path The normalized request target path (e.g. `/images/foo.jpg`). + * @param server The server whose configured locations should be searched. + * + * @return Pointer to the best matching @ref Location, or `nullptr` if none match. + */ static const Location* findMatchingLocation(const std::string& path, const Server& server) { const Location* best = nullptr; size_t max = 0; + + // Iterate over all configured locations for this server for (const Location& loc : server.getLocations()) { + // Check if location path is a prefix of the request path if (path.rfind(normalizePath(loc.getPath()), 0) == 0 && normalizePath(loc.getPath()).size() > max) { + // Keep track of the longest prefix match so far best = &loc; max = normalizePath(loc.getPath()).size(); } } - return best; + + return best; // nullptr if no location matched } +/** + * @brief Processes all pending HTTP requests for a client. + * + * @details + * Runs through the client's queued @ref HttpRequest objects and: + * 1. Skips processing if a CGI process is currently running + * (only one CGI may run per client at a time). + * 2. Checks for parse errors in the request: + * - If present, generates an error response via @ref handleRequestErrorIfAny. + * 3. Resolves the request target to the best-matching @ref Location + * using @ref findMatchingLocation. + * - If no location matches, responds with 404. + * 4. If the request must be served by a CGI: + * - Marks client as running CGI, initializes the process with + * @ref handleCgiRequest, and defers further processing. + * 5. Otherwise, handles the request normally via @ref handleRequest and + * queues the @ref HttpResponse. + * + * The loop continues until: + * - No more pending requests, OR + * - A CGI request is started, OR + * - A response requires connection close. + * + * @param client_fd File descriptor of the client whose requests are processed. + */ void SocketManager::processPendingRequests(int client_fd) { ClientInfo& client = _client_info[client_fd]; - // As long as there is at least one pending request AND no CGI is currently running: + // Process while there are queued requests and no active CGI process while (!client.pendingRequests.empty() && !client.isCgiProcessRunning) { HttpRequest nextReq = client.pendingRequests.front(); const Server& server = client.serversOnPort[nextReq.getMatchedServerIndex()]; - // nextReq.printRequest(); - + // Handle requests with parsing errors int code = nextReq.getParseErrorCode(); if (code != 0) { if (handleRequestErrorIfAny(client_fd, code, nextReq, server)) - return; - continue; + return; // stop if error requires closing connection + continue; // otherwise, process next pending request } + // Match request path against server locations const Location* location = findMatchingLocation(normalizePath(nextReq.getPath()), server); if (!location) { if (handleRequestErrorIfAny(client_fd, 404, nextReq, server)) @@ -117,28 +225,62 @@ void SocketManager::processPendingRequests(int client_fd) { continue; } + // If request matches a CGI location → launch CGI process if (shouldSpawnCgi(nextReq, *location)) { client.currentCgiRequest = nextReq; client.isCgiProcessRunning = true; bool ok = handleCgiRequest(client_fd, nextReq, server, *location); client.pendingRequests.pop(); if (!ok) - continue; - return; + continue; // CGI init failed → continue with next request + return; // CGI started → stop until it finishes } + // Otherwise, handle request normally and enqueue response HttpResponse resp = handleRequest(nextReq, server); client.responses.push(resp); client.pendingRequests.pop(); + + // If response signals "Connection: close" → stop processing if (resp.isConnectionClose()) return; } } +/** + * @brief Parses buffered client data into HTTP requests and queues them. + * + * @details + * This function repeatedly attempts to parse complete HTTP requests + * from a client’s `requestBuffer`: + * + * 1. **Check request limits:** + * - If headers exceed configured limits, clears buffer and pushes + * a `431 Request Header Fields Too Large` error. + * + * 2. **Parse next request:** + * - Uses @ref HttpRequestParser::parse to decode from the buffer. + * - On success: consumes parsed bytes, resets state, and queues the request. + * - On incomplete parse (`errorCode == 0`): returns `false` to wait for more data. + * - On parse error (`400`, `411`, `413`, `415`, etc.): + * - Marks the request with the error code. + * - Either clears the whole buffer (fatal errors) or erases consumed bytes. + * - Resets state and queues the error request. + * + * 3. **Loop continuation:** + * - If more data remains in `requestBuffer` (and headers are present), + * parsing continues until buffer is exhausted or incomplete. + * + * @param client_fd File descriptor of the client whose buffer is parsed. + * + * @return `true` if parsing succeeded or error requests were queued, + * `false` if parsing is incomplete and more data is required. + */ bool SocketManager::parseAndQueueRequests(int client_fd) { ClientInfo& client = _client_info[client_fd]; while (true) { + // 1) enforce request size/limit rules if (checkRequestLimits(client_fd)) { client.requestBuffer.clear(); resetRequestState(client_fd); @@ -149,34 +291,41 @@ bool SocketManager::parseAndQueueRequests(int client_fd) { int errorCode = 0; std::size_t consumedBytes = 0; + // 2) attempt to parse one HTTP request from the buffer bool ok = HttpRequestParser::parse(request, client.requestBuffer, client.serversOnPort, errorCode, consumedBytes); if (!ok) { if (errorCode == 0) - return false; // incomplete + return false; // Incomplete request → wait for more data + // Fatal parse error (e.g. invalid headers, bad content length) request.setParseErrorCode(errorCode); request.printRequest(); + // Clear buffer for hard errors, else drop only consumed part if (errorCode == 415 || errorCode == 411 || errorCode == 400 || errorCode == 413) client.requestBuffer.clear(); else client.requestBuffer.erase(0, consumedBytes); + // Reset parsing state and queue error request resetRequestState(client_fd); client.pendingRequests.push(request); + // Stop if no headers left in buffer if (client.requestBuffer.find("\r\n\r\n") == std::string::npos) break; continue; } + // Success: remove consumed bytes, reset state, queue request client.requestBuffer.erase(0, consumedBytes); resetRequestState(client_fd); client.pendingRequests.push(request); + // Stop if buffer doesn’t contain another full header if (client.requestBuffer.find("\r\n\r\n") == std::string::npos) break; } @@ -184,25 +333,57 @@ bool SocketManager::parseAndQueueRequests(int client_fd) { return true; } +/** + * @brief Monitors and manages active CGI processes for all clients. + * + * @details + * Iterates over all connected clients and checks if a CGI process + * is associated with them. For each active CGI: + * + * 1. **Timeout check:** + * - If the process has been idle longer than @ref CGI_TIMEOUT_SECONDS, + * it is killed and a `504 Gateway Timeout` response is queued. + * + * 2. **Completion check:** + * - If @ref CGI::tryTerminateCgi indicates the process is done, + * finalize output with @ref CGI::finalizeCgi, push response, + * cleanup process state, and re-enable `POLLOUT` to send data. + * - After cleanup, if more requests remain queued for the client, + * continue processing them with @ref processPendingRequests. + * + * 3. **Exception safety:** + * - If any error occurs during CGI handling, logs the error, + * queues a `500 Internal Server Error`, and cleans up CGI state. + * + * Integration: + * - This method is called once per poll loop iteration before + * handling socket-level events. + * - It ensures CGI subprocesses do not block the server’s main loop. + * + * @see processPendingRequests + */ void SocketManager::handleCgiPollEvents() { for (auto& [client_fd, client] : _client_info) { if (!client.cgiProcess) - continue; + continue; // Skip clients without an active CGI try { CgiProcess& cgi = *client.cgiProcess; const Server& server = client.serversOnPort[client.currentCgiRequest.getMatchedServerIndex()]; + // 1. Timeout check if (getCurrentTime() - cgi.last_activity > CGI_TIMEOUT_SECONDS) { Logger::logFrom(LogLevel::WARN, "CGI", "Timeout. Killing CGI process for fd: " + std::to_string(client_fd)); client.responses.push(ResponseBuilder::generateError(504, server, {})); - CGI::errorOnCgi(cgi); - client.cgiProcess.reset(); + CGI::errorOnCgi(cgi); // Kill process with error + client.cgiProcess.reset(); // Drop process handle client.isCgiProcessRunning = false; client.currentCgiRequest = HttpRequest(); + + // Ensure POLLOUT so timeout response gets sent for (auto& pfd : _poll_fds) { if (pfd.fd == client_fd) { pfd.events |= POLLOUT; @@ -212,14 +393,16 @@ void SocketManager::handleCgiPollEvents() { continue; } + // 2. Completion check if (CGI::tryTerminateCgi(cgi)) { HttpResponse resp = CGI::finalizeCgi(cgi, server, client.currentCgiRequest); client.responses.push(resp); - CGI::cleanupCgi(cgi); + CGI::cleanupCgi(cgi); // Free temp files/resources client.cgiProcess.reset(); client.isCgiProcessRunning = false; client.currentCgiRequest = HttpRequest(); + // Mark socket ready for sending CGI response for (auto& pfd : _poll_fds) { if (pfd.fd == client_fd) { pfd.events |= POLLOUT; @@ -227,6 +410,7 @@ void SocketManager::handleCgiPollEvents() { } } + // Resume processing queued requests (pipelined) size_t idx = 0; for (; idx < _poll_fds.size(); ++idx) { if (_poll_fds[idx].fd == client_fd) @@ -236,11 +420,14 @@ void SocketManager::handleCgiPollEvents() { processPendingRequests(client_fd); } } catch (const std::exception& e) { + // 3. Exception handling: fail safe Logger::logFrom(LogLevel::ERROR, "SocketManager", "Exception during CGI handling for fd " + std::to_string(client_fd) + ": " + e.what()); - respondError(client_fd, 500); - cleanupCgiForClient(client_fd); + respondError(client_fd, 500); // Queue internal error + cleanupCgiForClient(client_fd); // Force cleanup + + // Mark socket for POLLOUT to flush error response for (auto& pfd : _poll_fds) { if (pfd.fd == client_fd) { pfd.events |= POLLOUT; @@ -256,38 +443,112 @@ bool hasFullChunkedBody(const std::string& buffer, size_t bodyStart) { return end != std::string::npos; } +/** + * @brief Initializes and starts a CGI process for a client request. + * + * @details + * - Allocates a new @ref CgiProcess in the client state. + * - Attempts to initialize the CGI environment via @ref CGI::initCgiProcess. + * - On success: leaves the process active and returns `true`. + * - On failure: + * - Logs an error with details. + * - Queues an error @ref HttpResponse (e.g. `500`, or errorCode returned). + * - Cleans up CGI state and clears the current request. + * - Returns `false`. + * + * @param client_fd The client file descriptor. + * @param request The HTTP request to be executed by CGI. + * @param server The server context for this request. + * @param location The location context where CGI execution is configured. + * + * @return `true` if CGI initialization succeeded, + * `false` if an error response was queued instead. + * + * @see handleCgiPollEvents, processPendingRequests + */ bool SocketManager::handleCgiRequest(int client_fd, const HttpRequest& request, const Server& server, const Location& location) { ClientInfo& client = _client_info[client_fd]; - client.cgiProcess.emplace(); + client.cgiProcess.emplace(); // Allocate a new CGI process slot - int errorCode = 500; + int errorCode = 500; // Default fallback error code if (!CGI::initCgiProcess(*client.cgiProcess, request, server, location, _poll_fds, errorCode)) { + // Failed to spawn CGI → log and queue error response Logger::logFrom(LogLevel::ERROR, "SocketManager", "[CGI] Failed to initialize CGI process for client_fd " + std::to_string(client_fd) + " with script: " + location.getPath()); + HttpResponse err = ResponseBuilder::generateError(errorCode, server, request); _client_info[client_fd].responses.push(err); + + // Reset CGI state so client can recover client.cgiProcess.reset(); client.isCgiProcessRunning = false; - client.currentCgiRequest = HttpRequest(); // clears request - return false; // error response queued + client.currentCgiRequest = HttpRequest(); + + return false; // Error response queued instead of running CGI } - return true; // handled as CGI + return true; // CGI successfully started, will be managed by handleCgiPollEvents() } +/** + * @brief Handles an HTTP request error by generating and queuing an error response. + * + * @details + * - Builds an error @ref HttpResponse using @ref ResponseBuilder::generateError. + * - Pushes the response into the client’s response queue. + * - Removes the faulty request from the pending request queue. + * - Returns whether the response requires closing the connection (e.g. HTTP/1.0 without + * keep-alive). + * + * @param fd File descriptor of the client connection. + * @param code HTTP status code to return (e.g. 400, 404, 413). + * @param req The offending HTTP request (may be partially parsed). + * @param server The server context used for error page resolution. + * + * @return `true` if the error response requires closing the connection, + * `false` otherwise (keep-alive). + */ bool SocketManager::handleRequestErrorIfAny(int fd, int code, HttpRequest& req, const Server& server) { + // Generate error response for this request HttpResponse err = ResponseBuilder::generateError(code, server, req); + + // Queue the response to be sent back to the client _client_info[fd].responses.push(err); + + // Drop the faulty request from the pending queue _client_info[fd].pendingRequests.pop(); + // Decide if the connection must be closed after sending this response return err.isConnectionClose(); } +/** + * @brief Determines whether a request should be handled by a CGI process. + * + * @details + * A request qualifies for CGI execution if: + * - The path can be resolved to a valid absolute file path (`resolveAbsolutePath` not empty). + * - The HTTP method is either `GET` or `POST` (other methods are not CGI-eligible). + * - The resolved path matches a CGI-enabled location (`isCgiRequest`). + * + * If all conditions are met, returns `true` so the request is delegated + * to @ref handleCgiRequest. Otherwise, it should be served as a normal static/dynamic response. + * + * @param req The incoming HTTP request to evaluate. + * @param location The matched @ref Location context for the request path. + * + * @return `true` if the request should spawn a CGI process, + * `false` if it should be handled normally. + */ bool SocketManager::shouldSpawnCgi(const HttpRequest& req, const Location& location) { + // Resolve absolute filesystem path of the requested resource std::string resolved = location.resolveAbsolutePath(req.getPath()); + + // Only GET/POST methods are supported for CGI execution, + // and only if the path belongs to a CGI-enabled location. return !resolved.empty() && (req.getMethod() == "GET" || req.getMethod() == "POST") && location.isCgiRequest(normalizePath(req.getPath())); } diff --git a/src/network/SocketManagerResponse.cpp b/src/network/SocketManagerResponse.cpp index 6d410573..d8ffca86 100644 --- a/src/network/SocketManagerResponse.cpp +++ b/src/network/SocketManagerResponse.cpp @@ -3,13 +3,38 @@ /* ::: :::::::: */ /* SocketManagerResponse.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/08 15:00:54 by irychkov #+# #+# */ -/* Updated: 2025/06/08 18:59:11 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 23:09:04 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file SocketManagerResponse.cpp + * @brief Implements response sending logic for SocketManager. + * + * @details + * This file contains all methods of @ref SocketManager related to + * **writing HTTP responses back to clients**: + * - Logging response status codes (@ref logResponseStatus). + * - Sending file-backed responses in chunks (@ref sendFileResponse). + * - Sending in-memory/raw responses (@ref sendRawResponse). + * - Driving the high-level send path for a client (@ref sendResponse). + * + * Responsibilities: + * - Stream headers and body data over non-blocking sockets. + * - Support both static files and CGI-generated temporary files. + * - Handle partial writes (tracking progress via `bytes_sent`). + * - Manage connection lifecycle (`keep-alive` vs `close`). + * - Ensure resource cleanup (closing file streams, unlinking CGI temp files). + * + * This file implements the **write-path** of the event loop, + * complementing @ref SocketManagerRequest.cpp which handles the read-path. + * + * @ingroup socket_manager + */ + #include "http/HttpResponse.hpp" // for HttpResponse #include "http/handleCgi.hpp" // for unlinkWithErrorLog #include "network/SocketManager.hpp" // for ClientInfo, SocketManager @@ -30,6 +55,22 @@ #include <utility> // for pair #include <vector> // for vector +/** + * @brief Logs the HTTP response status being sent to a client. + * + * @details + * - Formats a message indicating the status code and target file descriptor. + * - Logs at different severity levels depending on status code range: + * - `< 400`: Informational (success / redirection). + * - `400–499`: Warning (client error). + * - `>= 500`: Error (server error). + * + * This function is called before sending a response to provide + * visibility into the server’s behavior and help with debugging. + * + * @param status The HTTP status code of the response. + * @param fd The client file descriptor receiving the response. + */ void SocketManager::logResponseStatus(int status, int fd) { std::string message = "Sending HTTP " + std::to_string(status) + " → fd " + std::to_string(fd); if (status < 400) @@ -40,9 +81,54 @@ void SocketManager::logResponseStatus(int status, int fd) { Logger::logFrom(LogLevel::ERROR, "SocketManager sendResponse", message); } +/** + * @brief Sends a file-based HTTP response to a client. + * + * @details + * Handles responses that stream a file (static resource or CGI temp file) + * to the client in a **non-blocking, incremental fashion**: + * + * Workflow: + * 1. **First call (headers not sent yet):** + * - Opens the response’s file path in binary mode. + * - If CGI body offset is set, seeks past CGI headers. + * - Builds and stores the HTTP status line and headers in + * `client.current_raw_response`. + * + * 2. **Send headers (if not fully sent yet):** + * - Writes remaining header bytes using `send(MSG_DONTWAIT)`. + * - Updates offset tracking (`bytes_sent`). + * - Returns early if partial write occurs (retry later). + * + * 3. **Send file body (chunked):** + * - Reads next chunk (8 KB) from the file into a buffer. + * - Sends it to the client socket. + * - Returns early if partial write occurs. + * + * 4. **Completion:** + * - On EOF (or 0 bytes read), closes file stream. + * - Clears `current_raw_response` and resets offset. + * - If response came from a CGI temp file, deletes it. + * - Pops the response from the queue. + * - Handles connection policy: + * - If `Connection: close` → closes client FD. + * - If keep-alive and no more responses → disables POLLOUT. + * + * Error handling: + * - On file open failure → queues a `500 Internal Server Error`. + * - On socket send failure → logs error, closes the connection. + * + * @param fd Client socket file descriptor. + * @param index Index of the client FD in the poll list. + * @param response The HTTP response object to send. + * + * @return `true` if more data remains to be sent, + * `false` if connection was closed or error occurred. + */ bool SocketManager::sendFileResponse(int fd, size_t index, HttpResponse& response) { ClientInfo& client = _client_info[fd]; + // Step 1: open the file and build headers (first call) if (!client.file_stream.is_open()) { client.file_stream.open(response.getFilePath(), std::ios::binary); if (!client.file_stream.is_open()) { @@ -52,15 +138,18 @@ bool SocketManager::sendFileResponse(int fd, size_t index, HttpResponse& respons if (response.getCgiBodyOffset() > 0) client.file_stream.seekg(response.getCgiBodyOffset()); + // Build HTTP response headers std::ostringstream head; head << "HTTP/1.1 " << response.getStatusCode() << " " << response.getStatusMessage() << "\r\n"; for (const auto& header : response.getHeaders()) head << header.first << ": " << header.second << "\r\n"; head << "\r\n"; + client.current_raw_response = head.str(); } + // Step 2: send HTTP headers if not finished std::string& raw = client.current_raw_response; size_t& offset = client.bytes_sent; if (offset < raw.size()) { @@ -74,9 +163,10 @@ bool SocketManager::sendFileResponse(int fd, size_t index, HttpResponse& respons offset += sent; client.lastSendAttemptTime = getCurrentTime(); if (offset < raw.size()) - return true; + return true; // partial header write → retry later } + // Step 3: stream file body in chunks char buffer[8192]; client.file_stream.read(buffer, sizeof(buffer)); std::streamsize bytes_read = client.file_stream.gcount(); @@ -89,9 +179,10 @@ bool SocketManager::sendFileResponse(int fd, size_t index, HttpResponse& respons return false; } client.lastSendAttemptTime = getCurrentTime(); - return true; + return true; // still more file left } + // Step 4: completion (EOF reached) if (client.file_stream.eof() || bytes_read == 0) { Logger::logFrom(LogLevel::INFO, "SocketManager", "[DONE] We sent full FILE RESPONSE to fd:" + std::to_string(fd)); @@ -99,10 +190,13 @@ bool SocketManager::sendFileResponse(int fd, size_t index, HttpResponse& respons client.current_raw_response.clear(); offset = 0; + // Cleanup CGI temp file if response came from CGI if (response.isCgiTempFile()) { CGI::unlinkWithErrorLog(response.getCgiTempFile(), "out temp file"); response.setCgiTempFile(""); } + + // Pop response and handle connection policy bool shouldClose = response.isConnectionClose(); client.responses.pop(); if (shouldClose) { @@ -113,22 +207,49 @@ bool SocketManager::sendFileResponse(int fd, size_t index, HttpResponse& respons } else if (client.responses.empty()) { Logger::logFrom(LogLevel::INFO, "SocketManager", "Connection: keep-alive - keeping the connection open"); - _poll_fds[index].events &= ~POLLOUT; // Disable POLLOUT for keep-alive + _poll_fds[index].events &= ~POLLOUT; // disable POLLOUT until more data } } return true; } +/** + * @brief Sends an in-memory (raw) HTTP response to a client. + * + * @details + * Handles responses that are already fully materialized in memory: + * 1) On first call, serialize the @ref HttpResponse to a byte string + * (`toHttpString()`) and reset the per-client send offset. + * 2) Perform non-blocking `send()` of the remaining bytes; update `bytes_sent`. + * 3) When all bytes are sent: + * - Pop the response from the queue, + * - Clear the staging buffer and reset offset, + * - Honor connection policy: + * * `Connection: close` → close FD, + * * keep-alive → if no more responses, drop POLLOUT to avoid busy loops. + * + * Error handling: + * - On `send()` failure, logs the error and closes the connection. + * + * @param fd Client socket file descriptor. + * @param index Index of the client FD in the poll list. + * @param response The response object to serialize and send. + * + * @return `true` if the socket remains usable (may need more sends), + * `false` if the connection was closed or a fatal error occurred. + */ bool SocketManager::sendRawResponse(int fd, size_t index, HttpResponse& response) { ClientInfo& client = _client_info[fd]; size_t& offset = client.bytes_sent; + // First invocation for this response: serialize headers + body into a single string. if (client.current_raw_response.empty()) { client.current_raw_response = response.toHttpString(); offset = 0; } + // Attempt to send the remaining bytes (non-blocking). std::string& raw = client.current_raw_response; if (offset < raw.size()) { ssize_t sent = send(fd, raw.c_str() + offset, raw.size() - offset, MSG_DONTWAIT); @@ -142,6 +263,7 @@ bool SocketManager::sendRawResponse(int fd, size_t index, HttpResponse& response client.lastSendAttemptTime = getCurrentTime(); } + // If everything is sent, finalize and apply connection policy. if (offset >= raw.size()) { bool shouldClose = response.isConnectionClose(); client.responses.pop(); @@ -149,46 +271,73 @@ bool SocketManager::sendRawResponse(int fd, size_t index, HttpResponse& response Logger::logFrom(LogLevel::INFO, "SocketManager", "[DONE] We sent full RESPONSE to fd:" + std::to_string(fd)); client.current_raw_response.clear(); + if (shouldClose) { Logger::logFrom(LogLevel::INFO, "SocketManager", "Connection: close - closing the connection"); cleanupClientConnectionClose(fd, index); return false; } else if (client.responses.empty()) { + // No more data to write → stop polling for POLLOUT to avoid spin. Logger::logFrom(LogLevel::INFO, "SocketManager", "Connection: keep-alive - keeping the connection open"); - _poll_fds[index].events &= ~POLLOUT; // Disable POLLOUT for keep-alive + _poll_fds[index].events &= ~POLLOUT; } } return true; } +/** + * @brief Dispatches sending of the next queued response for a client. + * + * @details + * Retrieves the front @ref HttpResponse for the given client, logs its status + * via @ref logResponseStatus, then delegates to the appropriate write path: + * - File-backed response → @ref sendFileResponse (streams headers + file body). + * - In-memory response → @ref sendRawResponse (single serialized buffer). + * + * Both send paths are **non-blocking**; they may return early when the socket + * can’t accept more bytes. Any fatal error (including allocation failure) is + * caught here; the client connection is then closed to keep the server healthy. + * + * @param client_fd Client socket file descriptor. + * @param index Index of the client FD in the poll list. + * + * @note This function assumes there is at least one queued response for + * `client_fd`. Callers should check the queue before enabling POLLOUT. + */ void SocketManager::sendResponse(int client_fd, size_t index) { try { + // Peek the next response to send for this client. HttpResponse& response = _client_info[client_fd].responses.front(); + // Log status code with severity (info/warn/error). logResponseStatus(response.getStatusCode(), client_fd); + // Choose write path: file-backed (streamed) vs raw (in-memory). if (response.isFileResponse()) { if (!sendFileResponse(client_fd, index, response)) - return; + return; // connection closed or fatal error handled inside } else { if (!sendRawResponse(client_fd, index, response)) - return; + return; // connection closed or fatal error handled inside } - return; + return; // keep connection; more data may remain for later POLLOUT } catch (const std::bad_alloc& e) { Logger::logFrom(LogLevel::ERROR, "SocketManager", "Fatal memory allocation error while sending response to fd " + std::to_string(client_fd)); } catch (const std::exception& e) { + // Any recoverable runtime error while sending this response. Logger::logFrom(LogLevel::ERROR, "SocketManager", "Exception in sendResponse for fd " + std::to_string(client_fd) + ": " + e.what()); } catch (...) { + // Last-resort safety net. Logger::logFrom(LogLevel::ERROR, "SocketManager", "Unknown fatal error in sendResponse for fd " + std::to_string(client_fd)); } + // On fatal errors, close the client cleanly to avoid undefined state. cleanupClientConnectionClose(client_fd, index); } diff --git a/src/network/SocketManagerTimeouts.cpp b/src/network/SocketManagerTimeouts.cpp index b5f1cbe5..546a2786 100644 --- a/src/network/SocketManagerTimeouts.cpp +++ b/src/network/SocketManagerTimeouts.cpp @@ -3,13 +3,40 @@ /* ::: :::::::: */ /* SocketManagerTimeouts.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/08 15:03:19 by irychkov #+# #+# */ -/* Updated: 2025/06/08 19:09:54 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 23:09:08 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file SocketManagerTimeouts.cpp + * @brief Implements timeout and limit enforcement for SocketManager. + * + * @details + * This file contains helper routines used by the poll-driven event loop to + * detect stalled or abusive connections and enforce request limits: + * - @ref isHeaderTimeout: header phase exceeded @c HEADER_TIMEOUT_SECONDS. + * - @ref isBodyTimeout: body phase exceeded @c TIMEOUT. + * - @ref isSendTimeout: no progress while sending for longer than @c TIMEOUT. + * - @ref isIdleTimeout: idle connection (no header bytes) exceeded @c TIMEOUT. + * - @ref checkClientTimeouts: orchestrates per-FD timeout checks and applies + * cleanup / event-mask adjustments. + * - @ref checkRequestLimits: enforces header length cap (@c HEADER_MAX_LENGTH) + * and queues a 431 if exceeded. + * + * Responsibilities: + * - Decide when to close connections vs. temporarily disable POLLIN. + * - Queue appropriate HTTP error responses (e.g., 408, 431). + * - Avoid interfering with active CGI executions. + * + * These functions are invoked from the main event loop to keep the server + * responsive, fair, and resilient under slowloris-style behavior or network stalls. + * + * @ingroup socket_manager + */ + #include "http/HttpResponse.hpp" // for HttpResponse #include "http/handleCgi.hpp" // for CgiProcess #include "network/SocketManager.hpp" // for ClientInfo, SocketManager, TIMEOUT @@ -23,13 +50,40 @@ #include <time.h> // for time_t, size_t #include <vector> // for vector +/** + * @brief Detects header read timeout for a client connection. + * + * @details + * Triggers when: + * - No response is currently being sent (`responses` and `current_raw_response` empty), + * - The HTTP headers are not yet complete (`headerComplete == false`) but at least + * one header byte has arrived (`headerBytesReceived > 0`), + * - And the elapsed time since `connectionStartTime` exceeds + * `HEADER_TIMEOUT_SECONDS`. + * + * On timeout, queues a `408 Request Timeout` via @ref respondError and returns `true` + * so the caller can disable further reads (POLLIN) and flush the error response. + * + * @param fd Client socket file descriptor. + * @param now Current time snapshot used for comparison. + * @return `true` if a header timeout was detected and an error response was queued, + * otherwise `false`. + * + * @note This does not close the connection immediately; the caller is expected to + * keep the socket writable to send the queued 408 response. + */ bool SocketManager::isHeaderTimeout(int fd, time_t now) { ClientInfo& client = _client_info[fd]; + // No ongoing send, header not complete, some header bytes seen, + // and the header phase exceeded the configured timeout. if (client.responses.empty() && client.current_raw_response.empty() && !client.headerComplete && client.headerBytesReceived > 0 && now - client.connectionStartTime > HEADER_TIMEOUT_SECONDS) { + Logger::logFrom(LogLevel::WARN, "SocketManager", "Timeout on fd: " + std::to_string(fd)); + + // Queue 408 so the writer path can flush it to the client. respondError(fd, 408); return true; } @@ -37,66 +91,210 @@ bool SocketManager::isHeaderTimeout(int fd, time_t now) { return false; } +/** + * @brief Detects request body timeout for a client connection. + * + * @details + * This timeout applies when: + * - No response is currently being sent (`responses` and `current_raw_response` empty), + * - The HTTP headers have already been received completely (`headerComplete == true`), + * - And the elapsed time since `connectionStartTime` exceeds @c TIMEOUT. + * + * This typically indicates that the client started a request but did not finish + * sending the body in time (e.g. stalled upload or slowloris-style attack). + * + * On timeout, a `408 Request Timeout` is queued via @ref respondError and the function + * returns `true`, so the caller may disable reads (`POLLIN`) and flush the error. + * + * @param fd Client socket file descriptor. + * @param now Current server time snapshot. + * @return `true` if a body timeout occurred and an error response was queued, + * otherwise `false`. + * + * @note The connection is not closed immediately; it is marked for sending the 408 + * response before shutdown. + */ bool SocketManager::isBodyTimeout(int fd, time_t now) { ClientInfo& client = _client_info[fd]; + + // No ongoing send, header complete, and body phase exceeded TIMEOUT. if (client.responses.empty() && client.current_raw_response.empty() && client.headerComplete && now - client.connectionStartTime > TIMEOUT) { + Logger::logFrom(LogLevel::WARN, "SocketManager", "Timeout on fd: " + std::to_string(fd)); - respondError(fd, 408); + + respondError(fd, 408); // Queue HTTP 408 Request Timeout return true; } + return false; } +/** + * @brief Detects send timeout for a client connection. + * + * @details + * This timeout applies when: + * - There are still responses waiting to be sent (`!responses.empty()`), + * - A response is actively being transmitted (`!current_raw_response.empty()`), + * - But no progress has been made for more than @c TIMEOUT seconds + * (based on `lastSendAttemptTime`). + * + * This usually indicates that the client has stopped reading from its socket + * (e.g., slow or dead client), preventing the server from completing the response. + * + * On timeout, no error response is queued (since we are already mid-send), + * but the function signals the caller (`true`) so that the connection can be + * closed and cleaned up. + * + * @param fd Client socket file descriptor. + * @param now Current server time snapshot. + * @return `true` if a send timeout occurred and the connection should be closed, + * otherwise `false`. + * + * @note Unlike header/body timeouts, this does not queue a 408 response, + * because the server was already in the process of sending a response. + */ bool SocketManager::isSendTimeout(int fd, time_t now) { ClientInfo& client = _client_info[fd]; + + // Active send in progress but stalled for longer than TIMEOUT. if (!client.responses.empty() && !client.current_raw_response.empty() && now - client.lastSendAttemptTime > TIMEOUT) { + Logger::logFrom(LogLevel::WARN, "SocketManager", "Send timeout on fd: " + std::to_string(fd)); return true; } + return false; } +/** + * @brief Detects idle timeout for a client connection. + * + * @details + * This timeout applies when: + * - The client is not running a CGI process (`cgiProcess` is empty), + * - No response is in progress (`responses` and `current_raw_response` are empty), + * - No headers have been fully received (`headerComplete == false`), + * - No header bytes have been received at all (`headerBytesReceived == 0`), + * - And the time since the last recorded client activity (`lastRequestTime`) + * exceeds @c TIMEOUT. + * + * This typically indicates an idle TCP connection where the client opened + * a socket but never sent any meaningful data. + * + * On timeout, no explicit error response is queued; the caller is expected to + * close the connection after receiving `true`. + * + * @param fd Client socket file descriptor. + * @param now Current server time snapshot. + * @return `true` if the connection is considered idle and should be closed, + * otherwise `false`. + * + * @note CGI processes are exempted from idle timeout handling because they may + * take significant time before producing output. + */ bool SocketManager::isIdleTimeout(int fd, time_t now) { ClientInfo& client = _client_info[fd]; + + // Skip idle timeout checks if a CGI is running. if (client.cgiProcess.has_value()) { return false; } + + // No response, no header progress, and idle for too long. if (client.responses.empty() && client.current_raw_response.empty() && !client.headerComplete && client.headerBytesReceived == 0 && now - client.lastRequestTime > TIMEOUT) { + Logger::logFrom(LogLevel::WARN, "SocketManager", "Idle timeout on fd: " + std::to_string(fd)); return true; } + return false; } +/** + * @brief Checks and applies timeout rules for a client connection. + * + * @details + * This function orchestrates all timeout checks for a client: + * - **Idle timeout**: connection opened but no activity (`isIdleTimeout`). + * - **Send timeout**: stalled during an ongoing response (`isSendTimeout`). + * - **Header timeout**: headers started but not completed in time (`isHeaderTimeout`). + * - **Body timeout**: headers done but body not finished within limit (`isBodyTimeout`). + * + * Behavior: + * - If idle or send timeout occurs → the connection is closed immediately via + * `cleanupClientConnectionClose()`, and the function returns `false`. + * - If header or body timeout occurs → the connection stays open long enough to + * send a timeout response (408). To prevent further reads, `POLLIN` is disabled + * for this fd, and the function returns `true`. + * - If no timeout is triggered, returns `false`. + * + * @param client_fd The socket file descriptor of the client. + * @param index Index of the client's pollfd entry in `_poll_fds`. + * @return `true` if a recoverable timeout (header/body) was detected and the + * connection is left open for sending a response, + * `false` if the connection was closed or no timeout occurred. + * + * @note This function integrates multiple specialized timeout checks into the + * main poll loop, ensuring the server proactively cleans up stale or + * non-responsive connections. + */ bool SocketManager::checkClientTimeouts(int client_fd, size_t index) { if (!_client_info.count(client_fd)) return false; time_t now = getCurrentTime(); + + // Hard timeouts → connection closed if (isIdleTimeout(client_fd, now) || isSendTimeout(client_fd, now)) { cleanupClientConnectionClose(client_fd, index); return false; } + + // Soft timeouts → send 408 and disable POLLIN if (isHeaderTimeout(client_fd, now) || isBodyTimeout(client_fd, now)) { _poll_fds[index].events &= ~POLLIN; return true; } + return false; } +/** + * @brief Enforces request size limits for an active client connection. + * + * @details + * This function checks whether the accumulated request headers for a client + * exceed the configured maximum length (`HEADER_MAX_LENGTH`). + * + * Behavior: + * - If the header is incomplete **and** the number of bytes received so far + * exceeds the allowed maximum, the server immediately queues a + * **431 Request Header Fields Too Large** error response using + * `respondError()`, and returns `true`. + * - Otherwise, the request is considered within limits and processing continues. + * + * @param fd The socket file descriptor of the client being checked. + * @return `true` if the request exceeded the configured header limit + * (error response queued), + * `false` if still within allowed limits. + * + * @note This check is only enforced **before header completion**. Once headers + * are fully parsed, the limit is no longer evaluated here. + */ bool SocketManager::checkRequestLimits(int fd) { ClientInfo& client = _client_info[fd]; - // Only enforce header-length limit while headers are still incomplete + // Enforce header-length limit only while headers are incomplete if (client.headerBytesReceived > HEADER_MAX_LENGTH) { Logger::logFrom(LogLevel::WARN, "SocketManager", "Request header too large from fd: " + std::to_string(fd)); - respondError(fd, 431); // Request Header Fields Too Large + respondError(fd, 431); // 431: Request Header Fields Too Large return true; } diff --git a/src/network/SocketManagerUtils.cpp b/src/network/SocketManagerUtils.cpp index 7c8f6405..61e3cb74 100644 --- a/src/network/SocketManagerUtils.cpp +++ b/src/network/SocketManagerUtils.cpp @@ -3,13 +3,38 @@ /* ::: :::::::: */ /* SocketManagerUtils.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/08 14:58:17 by irychkov #+# #+# */ -/* Updated: 2025/08/17 21:04:16 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 23:09:12 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file SocketManagerUtils.cpp + * @brief Utility methods for connection lifecycle management in SocketManager. + * + * @details + * This file implements helper functions used internally by the + * `SocketManager` class to manage client state, poll descriptors, + * error handling, and cleanup routines. These utilities ensure proper + * resource deallocation and safe connection teardown. + * + * Main responsibilities: + * - Cleaning up CGI processes associated with a client (`cleanupCgiForClient`). + * - Removing `pollfd` entries from the monitored list (`removePollFd`). + * - Resetting client request parsing state (`resetRequestState`). + * - Gracefully closing client connections and releasing resources + * (`cleanupClientState`, `cleanupClientConnectionClose`). + * - Generating and queuing error responses (`respondError`). + * + * These functions are invoked by higher-level event loop and request + * handling methods to enforce robustness, prevent resource leaks, and + * handle exceptional conditions. + * + * @ingroup socket_manager + */ + #include "core/Server.hpp" // for Server #include "http/HttpRequest.hpp" // for HttpRequest #include "http/HttpResponse.hpp" // for HttpResponse @@ -27,6 +52,20 @@ #include <utility> // for pair #include <vector> // for vector +/** + * @brief Cleans up and resets CGI state for a specific client. + * + * @details + * If the client identified by @p client_fd has an active CGI process, + * this function terminates it via `CGI::cleanupCgi`, resets its state, + * and clears the currently running CGI request. + * If no CGI process is active, the function returns without action. + * + * @param client_fd The file descriptor of the client whose CGI state should be cleaned up. + * + * @note This function does not close the client connection itself — it only + * resets CGI-related state. Connection cleanup is handled separately. + */ void SocketManager::cleanupCgiForClient(int client_fd) { if (!_client_info.contains(client_fd)) return; @@ -41,12 +80,48 @@ void SocketManager::cleanupCgiForClient(int client_fd) { client.currentCgiRequest = HttpRequest(); } +/** + * @brief Removes a file descriptor entry from the poll list. + * + * @details + * This function erases the `pollfd` structure at the given index + * from the `_poll_fds` vector, which is used by `poll()` in the + * main event loop to monitor active sockets. + * + * If the index is out of range, no action is performed. + * + * @param index The position in `_poll_fds` corresponding to the + * client or listening socket to be removed. + * + * @note This does not close the socket itself. The caller is responsible + * for performing connection cleanup separately (see + * `cleanupClientConnectionClose`). + */ void SocketManager::removePollFd(size_t index) { if (index < _poll_fds.size()) { _poll_fds.erase(_poll_fds.begin() + index); } } +/** + * @brief Cleans up all state associated with a client connection. + * + * @details + * This function safely releases all resources tied to a client: + * - Drains the response queue and deletes any CGI temporary files. + * - Cleans up and terminates any active CGI process. + * - Closes any open file streams linked to the client. + * - Erases the client entry from `_client_info`. + * + * Unlike `cleanupClientConnectionClose`, this function does not close + * the socket or modify the poll list — it only clears per-client state. + * + * @param client_fd The socket file descriptor of the client whose state + * should be cleaned up. + * + * @note Call this when tearing down a client session to ensure no resource + * leaks (file handles, temp files, or CGI processes). + */ void SocketManager::cleanupClientState(int client_fd) { auto it = _client_info.find(client_fd); if (it == _client_info.end()) @@ -54,6 +129,7 @@ void SocketManager::cleanupClientState(int client_fd) { ClientInfo& client = it->second; + // Clean up pending responses and unlink any CGI temp files while (!client.responses.empty()) { HttpResponse& resp = client.responses.front(); if (resp.isCgiTempFile()) { @@ -62,6 +138,7 @@ void SocketManager::cleanupClientState(int client_fd) { client.responses.pop(); } + // Handle any active CGI process if (client.cgiProcess) { CGI::errorOnCgi(*client.cgiProcess); client.cgiProcess.reset(); @@ -69,13 +146,33 @@ void SocketManager::cleanupClientState(int client_fd) { client.currentCgiRequest = HttpRequest(); } + // Close file stream if open if (client.file_stream.is_open()) { client.file_stream.close(); } + // Finally remove client record _client_info.erase(it); } +/** + * @brief Gracefully closes a client connection and cleans up its state. + * + * @details + * This function performs a full teardown of a client connection: + * - Removes the file descriptor from the poll list (`removePollFd`). + * - Cleans up all per-client state (`cleanupClientState`), including + * pending responses, CGI processes, and open streams. + * - Closes the actual socket file descriptor via `close()`. + * - Logs the closure event. + * + * @param client_fd The socket file descriptor of the client being closed. + * @param index The index in `_poll_fds` corresponding to the client_fd. + * + * @note Use this when the connection must be terminated (e.g. client + * disconnect, fatal error, or `Connection: close` response). + * For state cleanup without closing the socket, see `cleanupClientState()`. + */ void SocketManager::cleanupClientConnectionClose(int client_fd, size_t index) { removePollFd(index); cleanupClientState(client_fd); @@ -84,24 +181,62 @@ void SocketManager::cleanupClientConnectionClose(int client_fd, size_t index) { "Closed FD (Connection: close): " + std::to_string(client_fd)); } +/** + * @brief Resets the parsing state of a client’s current HTTP request. + * + * @details + * This function clears or resets request-related counters for a given client: + * - If the `requestBuffer` still contains data, the connection is treated + * as having an incomplete header. In this case, `headerComplete` is reset + * to false, but `headerBytesReceived` is preserved so that header timeout + * tracking continues. + * - If the buffer is empty, the function resets all parsing counters + * (`headerComplete`, `headerBytesReceived`, `bodyBytesReceived`). + * + * @param client_fd The file descriptor of the client whose request + * parsing state should be reset. + * + * @note This is typically called after a request has been fully parsed + * or after an error, so the connection can be reused for the next request + * (in keep-alive scenarios). + */ void SocketManager::resetRequestState(int client_fd) { if (!_client_info.count(client_fd)) return; - // If there is still any data in requestBuffer, treat it as a partial header: + + // If there is still data in requestBuffer, consider it a partial header if (!_client_info[client_fd].requestBuffer.empty()) { _client_info[client_fd].headerComplete = false; - // headerBytesReceived should reflect how many bytes are already in the buffer. - // But if we are just about to parse a brand‐new header, headerBytesReceived - // should have already been set by receiveFromClient(...) when those bytes first arrived. - // So here we do NOT zero it out—leave it alone so the header‐timer can still tick. + // Preserve headerBytesReceived so timeout logic remains valid return; } - // If requestBuffer is empty, then there is no partial header in progress. + + // No partial header in progress → fully reset state _client_info[client_fd].headerComplete = false; _client_info[client_fd].headerBytesReceived = 0; _client_info[client_fd].bodyBytesReceived = 0; } +/** + * @brief Queues an HTTP error response for a client. + * + * @details + * This function generates a standardized HTTP error response with the + * given status code (e.g. 400, 404, 500) and pushes it onto the client’s + * response queue. + * + * - Uses the first server in the client’s `serversOnPort` list as a fallback + * context for generating the response. + * - The request object passed to `ResponseBuilder::generateError` is empty, + * since the error may occur before a valid request could be parsed. + * + * @param fd The client’s socket file descriptor. + * @param status_code The HTTP status code to send (e.g. 400, 404, 500). + * + * @note This does not immediately send the response — it only enqueues it. + * The response will be transmitted later in the event loop + * by `sendResponse()`. + */ void SocketManager::respondError(int fd, int status_code) { HttpRequest empty; const Server& fallback = _client_info[fd].serversOnPort.front(); diff --git a/src/utils/Logger.cpp b/src/utils/Logger.cpp index 2ea627d8..e7eaba50 100644 --- a/src/utils/Logger.cpp +++ b/src/utils/Logger.cpp @@ -3,14 +3,39 @@ /* ::: :::::::: */ /* Logger.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/08/17 12:31:34 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:31:36 by irychkov ### ########.fr */ +/* Created: 2025/08/15 22:54:33 by nlouis #+# #+# */ +/* Updated: 2025/08/18 19:55:37 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file Logger.cpp + * @brief Simple color-coded logging utility. + * + * @details Implements the @ref Logger class methods and related helper + * functions for mapping log levels to human-readable strings + * and ANSI terminal colors. Supports standard log levels + * (`DEBUG`, `INFO`, `WARN`, `ERROR`) and can optionally + * prefix messages with a source identifier. + * + * Output is written to `stdout` for non-error messages and + * `stderr` for `ERROR` level messages, ensuring separation + * of normal and error output streams. ANSI escape codes are + * used for color, which may not render correctly in all + * terminals. + * + * @ingroup utils + * + * @note This logger is intended for human-readable output during + * development and runtime diagnostics, not structured logging. + * For production environments or log parsing, consider extending + * it to support formats like JSON or syslog. + */ + #include "utils/Logger.hpp" + #include <iostream> // for basic_ostream, operator<<, endl, cerr, cout // ANSI color codes diff --git a/src/utils/errorUtils.cpp b/src/utils/errorUtils.cpp index 6d7bad1e..4def22cc 100644 --- a/src/utils/errorUtils.cpp +++ b/src/utils/errorUtils.cpp @@ -3,20 +3,21 @@ /* ::: :::::::: */ /* errorUtils.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/05 21:08:50 by nlouis #+# #+# */ -/* Updated: 2025/08/17 21:06:47 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:54:49 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ /** * @file errorUtils.cpp - * @brief Implements error formatting utilities. + * @brief Implements error formatting utilities for parsing. * * @details * Provides the implementation of `formatError`, a helper function that - * generates consistent diagnostic messages with line and column info. + * generates consistent diagnostic messages with line and column info during + * parsing. * @ingroup ErrorUtils */ diff --git a/src/utils/filesystemUtils.cpp b/src/utils/filesystemUtils.cpp index 67e98145..3c725c74 100644 --- a/src/utils/filesystemUtils.cpp +++ b/src/utils/filesystemUtils.cpp @@ -3,13 +3,31 @@ /* ::: :::::::: */ /* filesystemUtils.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/13 09:39:07 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:29:59 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:55:03 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file filesystemUtils.cpp + * @brief Implements filesystem utility functions for Webserv. + * + * @details Provides helper routines for safely manipulating paths and + * filesystem resources in the context of an HTTP server. This includes: + * - Canonicalizing and joining URI and filesystem paths. + * - Mapping request URIs to location roots and upload stores. + * - Generating safe filenames for uploads. + * - Creating directories recursively. + * - Verifying file and symlink existence. + * The utilities here are designed to prevent directory traversal, + * enforce upload root boundaries, and integrate cleanly with + * Webserv's request–response pipeline. + * + * @ingroup filesystem_utils + */ + #include "utils/filesystemUtils.hpp" #include "core/Location.hpp" // for Location #include "http/HttpRequest.hpp" // for HttpRequest @@ -28,6 +46,34 @@ namespace fs = std::filesystem; +/** + * @brief Resolves a request URI to a concrete filesystem path. + * + * @details Computes the physical path for a request by: + * 1) Normalizing the request path and attempting resolution under the location's root. + * 2) If the resource is not a POST target and exists, returns that path. + * 3) Otherwise, when uploads are enabled and the URI is within the location prefix, + * rewrites the path into the configured upload store (joining with the remaining + * relative suffix). + * 4) Falls back to the root-based path even if it does not exist, allowing later + * handlers to emit the appropriate HTTP status (e.g., 404). + * + * @ingroup filesystem_utils + * + * @param req Incoming HTTP request (method and normalized URI are consulted). + * @param loc Matched location block providing root, path prefix, and upload settings. + * @return Fully-resolved absolute filesystem path. Returns an empty string when the + * normalized request path is empty (invalid or unsupported path). + * + * @throws std::runtime_error Only if helper utilities (e.g., normalization/join) are + * documented to throw on invalid inputs (none are thrown in the current logic). + * + * @note For POST requests, existing files under the root are intentionally ignored to + * avoid accidental overwrite semantics; uploads are routed to the upload store + * when configured. + * @warning Callers should not assume the returned path exists unless they checked it. + * Existence checking is deliberately limited to the non‑POST, root‑based branch. + */ std::string resolvePhysicalPath(const HttpRequest& req, const Location& loc) { // Normalize the request path std::string requestPath = normalizePath(req.getPath()); @@ -36,7 +82,7 @@ std::string resolvePhysicalPath(const HttpRequest& req, const Location& loc) { // 1) Try resolving against the real root std::string rootPath = buildFilePath(req, loc); - // If this isn't a POST *and* the file actually exists under root, use it + // If this isn't a POST and the file actually exists under root, use it if (req.getMethod() != "POST" && !rootPath.empty() && fs::exists(rootPath)) { return rootPath; } @@ -66,25 +112,70 @@ bool isFile(const std::string& path) { return fs::exists(path) && fs::is_regular_file(path); } +/** + * @brief Builds a unique temporary filename in the system temp directory. + * + * @details Generates a candidate path under `std::filesystem::temp_directory_path()` + * using a high‑resolution timestamp and a monotonically increasing counter. + * Intended for write‑to‑temp then atomic rename flows (e.g., buffering + * uploaded request bodies or CGI output before publishing). This function + * returns a name only; callers must create/open the file atomically. + * + * @ingroup filesystem_utils + * + * @param prefix Logical prefix embedded in the filename (e.g., "upload", "cgi"). + * @param counter Monotonic per‑process counter; incremented on each call. + * @return Absolute path string to a temporary file candidate (not created). + * + * @throws std::filesystem::filesystem_error If temp directory discovery fails. + * + * @note Callers should open with exclusive creation flags (e.g., O_CREAT|O_EXCL) + * and retry on collisions to guarantee uniqueness under high concurrency. + * @warning Not thread‑safe w.r.t. the shared @p counter. Use an atomic or pass a + * thread‑local counter if invoked from multiple threads. + * @todo Consider adding PID and a random component (or switch to unique_path) + * to further reduce collision probability. + */ std::string make_temp_name(const std::string& prefix, unsigned& counter) { - // 1) Where to put it (e.g. "/tmp" on Linux, or $TMPDIR) + // 1) Locate the system temp directory (respects platform defaults / env). fs::path tmpdir = fs::temp_directory_path(); - // 2) High-precision timestamp (nanoseconds since epoch) + // 2) High-precision timestamp (nanoseconds since epoch). auto now = std::chrono::high_resolution_clock::now(); auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count(); - // 3) Build "<prefix>_<pid>_<nanoseconds>_<counter>.tmp" + // 3) Build "<prefix>_<nanoseconds>_<counter>.tmp" std::ostringstream ss; ss << prefix << "_" << ns << "_" << counter++ << ".tmp"; return (tmpdir / ss.str()).string(); } +/** + * @brief Canonicalizes a URI-style path and rejects traversal. + * + * @details Collapses repeated slashes, removes "." segments, processes ".." + * by popping the previous segment, and preserves a trailing slash + * (except for root). Always returns an absolute form starting with "/". + * If a leading ".." would escape the root, returns an empty string to + * signal an invalid/unsafe path (caller should treat as 400/403). + * + * @ingroup filesystem_utils + * + * @param path Raw request path or filesystem-like path (may be relative or absolute). + * @return Canonical absolute path (e.g., "/a/b" or "/a/b/"). Returns empty string + * if normalization would traverse above root. + * + * @note Trailing slash is preserved to help callers distinguish "directory intent" + * (e.g., for index resolution or directory listing). + * @warning This function does not perform percent-decoding. Callers should decode + * the URI before normalization and validate disallowed bytes. + */ std::string normalizePath(const std::string& path) { if (path.empty()) return "/"; + // Collapse repeated slashes: "///a//b" -> "/a/b" std::string collapsed; bool prevWasSlash = false; for (char c : path) { @@ -99,10 +190,11 @@ std::string normalizePath(const std::string& path) { } } + // Split and resolve "." and ".." while tracking if caller intended a trailing '/' std::vector<std::string> segments; std::string segment; std::istringstream stream(collapsed); - bool hadTrailingSlash = collapsed.back() == '/'; + const bool hadTrailingSlash = collapsed.back() == '/'; while (std::getline(stream, segment, '/')) { if (segment.empty() || segment == ".") @@ -111,6 +203,7 @@ std::string normalizePath(const std::string& path) { if (!segments.empty()) { segments.pop_back(); } else { + // Would escape root: signal invalid path to caller return ""; } } else { @@ -118,19 +211,40 @@ std::string normalizePath(const std::string& path) { } } + // Reassemble, preserving trailing slash intent (except for "/") std::string result = "/"; for (std::size_t i = 0; i < segments.size(); ++i) { result += segments[i]; if (i + 1 < segments.size()) result += "/"; } - if (hadTrailingSlash && result != "/") result += "/"; return result; } +/** + * @brief Concatenates a base path and a suffix with a single '/' separator. + * + * @details Ensures that there is exactly one directory separator between the + * base and suffix, regardless of whether the base already ends with + * a slash. Does not normalize or validate the components; callers + * should sanitize inputs beforehand (e.g., via @ref normalizePath). + * Intended for filesystem path assembly when resolving HTTP request + * targets to physical files or upload locations. + * + * @ingroup filesystem_utils + * + * @param base Base path (absolute or relative). May end with '/'. + * @param suffix Path fragment to append (should not begin with '/' + * unless intentional). + * @return Combined path string. + * + * @note If @p base is empty, the function returns @p suffix unchanged. + * @warning This function does not handle '.' or '..' resolution; use only + * with trusted, normalized inputs to avoid directory traversal. + */ std::string joinPath(const std::string& base, const std::string& suffix) { if (base.empty()) return suffix; @@ -139,34 +253,63 @@ std::string joinPath(const std::string& base, const std::string& suffix) { return base + '/' + suffix; } +/** + * @brief Maps a request path to a filesystem path under a location root. + * + * @details Normalizes the incoming request path and the location's configured + * path prefix, verifies that the request path begins with the location + * prefix, and returns the location's root joined with the remaining + * suffix. If the request path does not match the location path, returns + * an empty string to signal no match. + * + * @ingroup filesystem_utils + * + * @param request Incoming HTTP request (URI is read from @ref HttpRequest::getPath()). + * @param loc Matched location block containing `path` (URI prefix) and `root`. + * @return Absolute or relative filesystem path under `loc.root`. Returns an empty + * string if the request path does not begin with the location's path. + * + * @note This function does not check whether the resulting file exists; callers + * may perform existence or type checks before use. + * @warning Callers must ensure that `request.getPath()` and `loc.getPath()` are + * decoded and safe. Normalization via @ref normalizePath helps prevent + * traversal, but upstream input validation is still required. + * @see normalizePath, joinPath + */ std::string buildFilePath(const HttpRequest& request, const Location& loc) { + // Normalize both the request path and the location path prefix. std::string req_path = normalizePath(request.getPath()); std::string loc_path = normalizePath(loc.getPath()); std::string loc_root = loc.getRoot(); + // Ensure the request path starts with the location prefix. if (req_path.rfind(loc_path, 0) != 0) { return ""; } + // Extract the suffix (portion after the location prefix). std::string suffix = req_path.substr(loc_path.size()); if (!suffix.empty() && suffix[0] == '/') suffix.erase(0, 1); + // Join the location's filesystem root with the relative suffix. return joinPath(loc_root, suffix); } -static std::vector<std::string> splitPath(const std::string& path) { +/* static std::vector<std::string> splitPath(const std::string& path) { std::vector<std::string> parts; std::stringstream ss(path); std::string part; + + // Extract substrings between '/' and skip empty ones. while (std::getline(ss, part, '/')) { if (!part.empty()) parts.push_back(part); } return parts; -} +} */ -bool mkdirRecursive(const std::string& path) { +/* bool mkdirRecursive(const std::string& path) { std::vector<std::string> parts = splitPath(path); std::string current = path[0] == '/' ? "/" : ""; @@ -181,6 +324,43 @@ bool mkdirRecursive(const std::string& path) { } } } + return true; +} */ + +/** + * @brief Ensures that a directory exists, creating missing parents as needed. + * + * @details Checks if the given path already exists. If it exists as a regular + * file, the function fails. If it exists as a directory (or symlink + * to a directory), the function succeeds. Otherwise, it attempts to + * create the directory and all missing parent directories using + * `std::filesystem::create_directories`. + * + * @ingroup filesystem_utils + * + * @param path Filesystem path to ensure as a directory (absolute or relative). + * @return `true` if the directory exists or was created successfully, + * `false` if the path is an existing regular file or creation failed. + * + * @note Uses `std::filesystem` and its error_code overloads to avoid exceptions. + * @warning Does not check for permissions beyond creation attempts; callers + * should handle permission errors via the return value. + */ +bool mkdirRecursive(const std::string& path) { + std::error_code ec; + + // If something exists at path and it's a *file*, fail. + if (fs::exists(path, ec)) { + if (fs::is_regular_file(path)) + return false; + return true; // already a directory (or symlink-to-dir) + } + + // Recursively create any missing directories. + if (!fs::create_directories(path, ec) && ec) { + return false; // creation failed for some reason + } + return true; } @@ -194,6 +374,25 @@ time_t getCurrentTime() { .count(); } +/** + * @brief Generates a timestamp-based fallback filename for uploads. + * + * @details Produces a string of the form `"upload_YYYYMMDDHHMMSS"` based on + * the current UTC time. Intended for use when the client request + * does not provide a valid or safe filename in an upload operation. + * This ensures that the uploaded file can still be stored with a + * unique, deterministic name that reflects its creation time. + * + * @ingroup filesystem_utils + * + * @return Fallback filename string in UTC timestamp format. + * + * @note Uses `std::gmtime`, which is not thread-safe on some platforms. + * If concurrent calls are expected, protect with a mutex or switch + * to `std::gmtime_r` (POSIX) or `std::gmtime_s` (Windows). + * @warning The generated name does not include a file extension; callers + * should append one if required. + */ static std::string makeFallbackName() { auto now = std::chrono::system_clock::now(); auto t = std::chrono::system_clock::to_time_t(now); @@ -202,19 +401,41 @@ static std::string makeFallbackName() { return oss.str(); } +/** + * @brief Produces a safe filename from untrusted input. + * + * @details Strips leading path components, removes any remaining path separators + * (`'/'` or `'\\'`) and control characters, while preserving all other + * bytes (including valid UTF-8 sequences). If the resulting name is + * empty or resolves to `"."` or `".."`, a fallback name from + * @ref makeFallbackName is used instead. Intended for securing uploaded + * filenames against directory traversal and control character injection. + * + * @ingroup filesystem_utils + * + * @param raw Untrusted filename (may include paths, separators, or control chars). + * @return Sanitized filename safe for joining with an upload directory. + * + * @note This function does not alter Unicode codepoints except to remove control + * characters; callers may wish to normalize Unicode for cross-platform + * consistency. + * @warning The sanitized name is safe for use as a single path component, but + * not necessarily unique. Collisions should be handled by the caller. + */ std::string sanitizeFilename(const std::string& raw) { namespace fs = std::filesystem; - // 1) Drop any leading path components + + // 1) Drop any leading path components (e.g., "dir/file.txt" -> "file.txt"). fs::path p(raw); std::string name = p.filename().string(); - // 2) Remove path separators and control chars, keep everything else (including Unicode bytes) + // 2) Remove path separators and control characters; keep other bytes (including Unicode). name.erase( std::remove_if(name.begin(), name.end(), [](unsigned char c) { return c == '/' || c == '\\' || std::iscntrl(c); }), name.end()); - // 3) If that produced empty or “.”/“..”, fallback + // 3) If empty or reserved ".", "..", generate a timestamp-based fallback name. if (name.empty() || name == "." || name == "..") { name = makeFallbackName(); } @@ -222,9 +443,36 @@ std::string sanitizeFilename(const std::string& raw) { return name; } -// Returns empty on any error or if the resolved path would leave uploadRoot. +/** + * @brief Resolves a safe absolute path for an uploaded file. + * + * @details Given a trusted upload root and an untrusted relative path from a + * client request, this function: + * 1. Canonicalizes the upload root path. + * 2. Splits the raw relative path on `'/'` and sanitizes each segment + * via @ref sanitizeFilename to remove dangerous characters. + * 3. Canonicalizes the parent directory (so symlinks and `..` are + * resolved) while preserving the leaf name even if it does not yet exist. + * 4. Ensures that the final candidate path is still within the canonical + * upload root, rejecting any path that would escape it. + * + * @ingroup filesystem_utils + * + * @param uploadRoot Trusted base directory for file uploads. + * @param rawRelativePath Untrusted path from client input (relative to root). + * @return Absolute canonical path string under @p uploadRoot suitable for safe writing. + * Returns an empty string on error, invalid input, or if the resolved + * path would leave @p uploadRoot. + * + * @note Uses `std::filesystem::weakly_canonical` to avoid exceptions and to + * handle partially existing paths. If the leaf file does not exist, + * only the parent path is canonicalized. + * @warning This function only ensures the path is inside @p uploadRoot; callers + * must still create missing directories and open files securely + * (e.g., with `O_CREAT | O_EXCL` to avoid races). + * @see sanitizeFilename + */ std::string makeSafeUploadPath(const std::string& uploadRoot, const std::string& rawRelativePath) { - namespace fs = std::filesystem; std::error_code ec; // 1) Canonicalize uploadRoot @@ -235,7 +483,7 @@ std::string makeSafeUploadPath(const std::string& uploadRoot, const std::string& return {}; } - // 2) Split the rawRelativePath on “/” and sanitize each segment + // 2) Split the rawRelativePath on '/' and sanitize each segment fs::path candidate = root; std::stringstream ss(rawRelativePath); std::string segment; @@ -246,8 +494,7 @@ std::string makeSafeUploadPath(const std::string& uploadRoot, const std::string& candidate /= safeSeg; } - // 3) For a non‐existent leaf, weakly_canonical will strip it off, - // so canonicalize the parent then re‐append the leaf. + // 3) Canonicalize parent while preserving leaf fs::path leaf = candidate.filename(); fs::path canonParent = fs::weakly_canonical(candidate.parent_path(), ec); if (ec) { @@ -256,7 +503,7 @@ std::string makeSafeUploadPath(const std::string& uploadRoot, const std::string& } fs::path canonCandidate = canonParent / leaf; - // 4) Bound‐check: ensure canonCandidate is inside canonRoot + // 4) Bound-check: ensure result is inside canonRoot auto rootStr = canonRoot.generic_string(); auto candStr = canonCandidate.generic_string(); if (candStr.size() < rootStr.size() || candStr.compare(0, rootStr.size(), rootStr) != 0 || diff --git a/src/utils/htmlUtils.cpp b/src/utils/htmlUtils.cpp index 9f4f8d8c..1ebef179 100644 --- a/src/utils/htmlUtils.cpp +++ b/src/utils/htmlUtils.cpp @@ -6,14 +6,41 @@ /* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/06/09 22:25:49 by nlouis #+# #+# */ -/* Updated: 2025/06/09 22:39:21 by nlouis ### ########.fr */ +/* Updated: 2025/08/15 22:43:23 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file htmlUtils.cpp + * @brief Implements HTML escaping utilities for Webserv. + * + * @details Provides helper functions for safely embedding arbitrary strings + * into HTML content. These utilities perform entity escaping of + * special characters that would otherwise be interpreted as HTML + * markup, preventing injection vulnerabilities (e.g., XSS). + * + * @ingroup html_utils + */ + #include <string> -// HTML-escape &, <, >, ", ' -// so it's safe to embed in an HTML context. +/** + * @brief Escapes special HTML characters in a string. + * + * @details Replaces the characters `&`, `<`, `>`, `"`, and `'` with their + * corresponding HTML entity representations so that the output + * can be safely embedded in an HTML context without being parsed + * as markup. This is intended to prevent HTML injection and + * cross-site scripting (XSS) when displaying untrusted data. + * + * @ingroup html_utils + * + * @param in Input string that may contain unsafe HTML characters. + * @return Escaped string safe for insertion into HTML content. + * + * @note Only escapes the five most common HTML special characters. If you + * need to handle other entities, extend this mapping accordingly. + */ std::string htmlEscape(const std::string& in) { std::string out; out.reserve(in.size()); diff --git a/src/utils/printInfo.cpp b/src/utils/printInfo.cpp index c6bb7ac4..665c3cdc 100644 --- a/src/utils/printInfo.cpp +++ b/src/utils/printInfo.cpp @@ -3,13 +3,30 @@ /* ::: :::::::: */ /* printInfo.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/03 14:01:22 by irychkov #+# #+# */ -/* Updated: 2025/08/17 12:32:55 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:50:21 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file printInfo.cpp + * @brief Human-readable printing helpers for Webserv configuration and servers. + * + * @details Implements utilities to format and print usage instructions and a + * structured, readable summary of the loaded configuration: + * - `printUsage()` prints CLI usage hints. + * - `printServerHeader()` prints a banner line for each server block. + * - `printConfig()` renders effective server/location settings + * (limits, methods, error pages, CGI, upload store, etc.). + * + * These helpers are intended for diagnostics during startup and for + * developer/operator visibility of the parsed configuration. + * + * @ingroup utils + */ + #include "utils/printInfo.hpp" #include "config/Config.hpp" // for Config #include "core/Location.hpp" // for Location diff --git a/src/utils/stringUtils.cpp b/src/utils/stringUtils.cpp index e4e6d9f5..dbb26bc5 100644 --- a/src/utils/stringUtils.cpp +++ b/src/utils/stringUtils.cpp @@ -3,13 +3,36 @@ /* ::: :::::::: */ /* stringUtils.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/05/05 20:09:37 by nlouis #+# #+# */ -/* Updated: 2025/08/17 12:33:14 by irychkov ### ########.fr */ +/* Updated: 2025/08/18 19:41:44 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file stringUtils.cpp + * @brief Utility functions for string parsing, formatting, and case conversion. + * + * @details Implements the @ref string_utils helpers used across Webserv for: + * - Parsing integers and byte sizes from configuration strings with rich error reporting. + * - Converting strings to lowercase or uppercase. + * - Formatting byte counts into human-readable units. + * - Joining lists of strings with a delimiter. + * - Trimming whitespace from strings. + * + * These functions are used in configuration parsing, logging, formatting + * HTTP responses, and general-purpose string handling. + * + * @ingroup string_utils + * + * @note Parsing helpers (`parseInt`, `parseByteSize`) throw @ref ConfigParseError + * with contextual information for configuration errors. + * @warning None of these functions are locale-sensitive beyond standard + * `std::tolower` / `std::toupper` behavior, and they do not perform + * Unicode normalization. + */ + #include "utils/stringUtils.hpp" #include "config/parser/ConfigParseError.hpp" // for ConfigParseError #include "utils/errorUtils.hpp" // for formatError @@ -20,6 +43,33 @@ #include <system_error> // for errc #include <vector> // for vector +/** + * @ingroup string_utils + * @brief Parses a non-negative integer from a string with context-aware error reporting. + * + * @details Attempts to parse the entire @p value string into a non-negative integer + * using `std::from_chars` for efficient, allocation-free conversion. + * If parsing fails, the string contains extra characters, or the result + * is negative, a @ref ConfigParseError is thrown. The error includes: + * - The field name for context. + * - The offending value. + * - The line and column numbers. + * - Additional context from @p context_provider, such as a snippet of the + * configuration file being parsed. + * + * @param value String representation of the integer to parse. + * @param field Name of the configuration field being parsed. + * @param line Line number in the configuration source. + * @param column Column number in the configuration source. + * @param context_provider Callable returning a string snippet or context for diagnostics. + * @return Parsed integer value (guaranteed to be non-negative). + * + * @throws ConfigParseError If parsing fails, if extra characters remain after parsing, + * or if the parsed value is negative. + * + * @note This function is typically used when reading numeric configuration values + * (e.g., port numbers, limits) that must be whole, non-negative integers. + */ int parseInt(const std::string& value, const std::string& field, int line, int column, const std::function<std::string()>& context_provider) { int result = 0; @@ -38,6 +88,33 @@ int parseInt(const std::string& value, const std::string& field, int line, int c return result; } +/** + * @ingroup string_utils + * @brief Parses a human-readable byte size string into a byte count. + * + * @details Converts a size string (e.g., `"10K"`, `"5M"`, `"2G"`, or raw bytes `"512"`) + * into a `std::size_t` representing the number of bytes. + * The suffix, if present, is case-insensitive and supports: + * - `K` = kibibytes (× 1024) + * - `M` = mebibytes (× 1024²) + * - `G` = gibibytes (× 1024³) + * Throws a @ref ConfigParseError if: + * - The value is empty. + * - Parsing fails or extra non-numeric characters remain. + * - The computed size exceeds the 4 GiB hard limit from the Webserv spec. + * + * @param value String containing the size to parse (may include suffix). + * @param field Name of the configuration field being parsed. + * @param line Line number in the configuration source. + * @param column Column number in the configuration source. + * @param context_provider Callable returning a string snippet or context for diagnostics. + * @return Parsed size in bytes. + * + * @throws ConfigParseError If the string is empty, cannot be parsed as a size, contains + * leftover characters, or exceeds the maximum allowed size. + * + * @note Used primarily to parse configuration directives like `client_max_body_size`. + */ std::size_t parseByteSize(const std::string& value, const std::string& field, int line, int column, const std::function<std::string()>& context_provider) { if (value.empty()) { diff --git a/src/utils/urlUtils.cpp b/src/utils/urlUtils.cpp index 8c61d6cf..2cb0e04f 100644 --- a/src/utils/urlUtils.cpp +++ b/src/utils/urlUtils.cpp @@ -3,13 +3,26 @@ /* ::: :::::::: */ /* urlUtils.cpp :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: irychkov <irychkov@student.hive.fi> +#+ +:+ +#+ */ +/* By: nlouis <nlouis@student.hive.fi> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/06/06 13:22:39 by nlouis #+# #+# */ -/* Updated: 2025/08/17 21:06:11 by irychkov ### ########.fr */ +/* Created: 2025/08/15 22:56:08 by nlouis #+# #+# */ +/* Updated: 2025/08/18 19:41:13 by nlouis ### ########.fr */ /* */ /* ************************************************************************** */ +/** + * @file urlUtils.cpp + * @brief URL and form-encoding helpers. + * + * @details Implements percent-decoding (RFC 3986), `application/x-www-form-urlencoded` + * decoding (treating '+' as space), simple key/value parsing for form bodies, + * and a safe filename extractor from a URI segment. These utilities are used + * during request parsing and upload handling to turn encoded inputs into + * validated, safe strings. + * + * @ingroup url_utils + */ + #include <cctype> // for isxdigit #include <cstddef> // for size_t #include <regex> // for regex_match, regex @@ -19,6 +32,22 @@ #include <unordered_map> // for unordered_map #include <utility> // for move +/** + * @brief Decodes percent-encoded octets in a string. + * + * @details Scans the input for sequences of the form `%HH` where `H` is a hex digit, + * converts each pair to a single byte, and returns the decoded result. + * Leaves all non-encoded characters unchanged. + * + * @ingroup url_utils + * + * @param encoded Input possibly containing percent-encoded bytes. + * @return Decoded string with `%HH` sequences replaced by their byte values. + * + * @throws std::invalid_argument If a `%` is incomplete at the end of the string or + * if the two following characters are not hex digits. + * @note This function does not perform UTF‑8 validation; it operates on bytes. + */ std::string decodePercentEncoding(const std::string& encoded) { std::ostringstream result; for (size_t i = 0; i < encoded.length(); ++i) { @@ -44,6 +73,20 @@ std::string decodePercentEncoding(const std::string& encoded) { return result.str(); } +/** + * @brief Decodes `application/x-www-form-urlencoded` field content. + * + * @details First replaces `+` with a space (per form-url-encoded rules), then applies + * percent-decoding to `%HH` sequences. This is suitable for decoding both + * keys and values extracted from a form body. + * + * @ingroup url_utils + * + * @param input Raw form field string (may include `+` and `%HH`). + * @return Decoded string. + * + * @throws std::invalid_argument Propagated from @ref decodePercentEncoding on invalid encodings. + */ std::string percentDecodeForm(const std::string& input) { std::string temp; temp.reserve(input.size()); @@ -54,6 +97,22 @@ std::string percentDecodeForm(const std::string& input) { return decodePercentEncoding(temp); } +/** + * @brief Parses an `application/x-www-form-urlencoded` body into key/value pairs. + * + * @details Splits the body on `&`, then splits each pair on the first `=`. + * Both key and value are decoded using @ref percentDecodeForm. Empty pairs + * are ignored; missing `=` results in the pair being skipped. + * + * @ingroup url_utils + * + * @param body Full form body string (e.g., `"a=1&b=two+words"`). + * @return Map of decoded keys to decoded values. Later duplicates will not overwrite + * earlier ones due to `emplace`; adjust if you want overwrite semantics. + * + * @throws std::invalid_argument Propagated from @ref percentDecodeForm (invalid `%HH`). + * @note If you expect repeated keys, consider using `std::unordered_multimap` instead. + */ std::unordered_map<std::string, std::string> parseFormUrlEncoded(const std::string& body) { std::unordered_map<std::string, std::string> form; size_t start = 0; @@ -81,6 +140,23 @@ std::unordered_map<std::string, std::string> parseFormUrlEncoded(const std::stri return form; } +/** + * @brief Extracts a safe filename from the last URI segment. + * + * @details Takes the substring after the last `/` in @p uri, attempts percent-decoding, + * and validates the result against a conservative allowlist. Rejects empty, + * too-long (>256), or suspicious names (contains `/`, equals `"."` or `".."`, + * starts with `.` or `-`, or fails the regex `^[a-zA-Z0-9._-]+$`). + * + * @ingroup url_utils + * + * @param uri Source URI or path-like string. + * @return A validated filename; returns an empty string if decoding fails or the + * candidate does not pass validation. + * + * @note Designed for deriving a download/upload filename from a URI segment without + * risking directory traversal or confusing special names. + */ std::string extractFilenameFromUri(const std::string& uri) { std::string filename; @@ -105,7 +181,7 @@ std::string extractFilenameFromUri(const std::string& uri) { if (filename == "." || filename == ".." || filename[0] == '.' || filename[0] == '-') return ""; - // Optional: enforce strict pattern + // Enforce strict pattern static const std::regex safePattern(R"(^[a-zA-Z0-9._-]+$)"); if (!std::regex_match(filename, safePattern)) return "";