diff --git a/.github/dependabot.yaml b/.github/dependabot.yml similarity index 50% rename from .github/dependabot.yaml rename to .github/dependabot.yml index 6e02f93..189df3d 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yml @@ -16,7 +16,6 @@ updates: prefix-development: "chore(deps-dev)" rebase-strategy: "auto" versioning-strategy: "auto" - # Group patch and minor updates together to reduce PR noise groups: rust-dependencies: patterns: @@ -38,8 +37,50 @@ updates: - "chore" commit-message: prefix: "chore(actions)" - # Group all GitHub Actions updates together to reduce PR noise groups: github-actions: patterns: - "*" + + # Version updates for Python packages + - package-ecosystem: "pip" + directory: "/packages/nvisy-dal" + schedule: + interval: "weekly" + timezone: "Europe/Berlin" + day: "monday" + time: "04:00" + open-pull-requests-limit: 5 + labels: + - "chore" + - "python" + commit-message: + prefix: "chore(deps)" + groups: + python-dependencies: + patterns: + - "*" + update-types: + - "minor" + - "patch" + + - package-ecosystem: "pip" + directory: "/packages/nvisy-rig" + schedule: + interval: "weekly" + timezone: "Europe/Berlin" + day: "monday" + time: "04:00" + open-pull-requests-limit: 5 + labels: + - "chore" + - "python" + commit-message: + prefix: "chore(deps)" + groups: + python-dependencies: + patterns: + - "*" + update-types: + - "minor" + - "patch" diff --git a/.gitignore b/.gitignore index c72f062..58dde47 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,14 @@ target/ **/*.rs.bk *.pdb +# Python +__pycache__/ +*.py[cod] +.venv/ +*.egg-info/ +.ruff_cache/ +.pytest_cache/ + # Generated files *.pem *.backup @@ -28,7 +36,6 @@ crates/nvisy-postgres/src/schema.rs.bak # Build output dist/ build/ -output/ # Environment files .env* @@ -50,4 +57,4 @@ temp/ .ignore*/ LLM.md .claude -pgtrgm/ +CLAUDE.md diff --git a/Cargo.lock b/Cargo.lock index 5c5aec2..8c36be0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,7 @@ dependencies = [ "cfg-if", "getrandom 0.3.4", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -189,15 +190,20 @@ dependencies = [ "password-hash", ] +[[package]] +name = "as-any" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" + [[package]] name = "async-compression" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ec5f6c2f8bc326c994cb9e241cc257ddaba9afa8555a43cffbb5dd86efaa37" +checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" dependencies = [ "compression-codecs", "compression-core", - "futures-core", "pin-project-lite", "tokio", ] @@ -298,9 +304,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-lc-rs" -version = "1.15.2" +version = "1.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" +checksum = "e84ce723ab67259cfeb9877c6a639ee9eb7a27b28123abd71db7f0d5d0cc9d86" dependencies = [ "aws-lc-sys", "untrusted 0.7.1", @@ -309,9 +315,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" +checksum = "43a442ece363113bd4bd4c8b18977a7798dd4d3c3383f34fb61936960e8f4ad8" dependencies = [ "cc", "cmake", @@ -327,7 +333,6 @@ checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "axum-macros", - "base64", "bytes", "form_urlencoded", "futures-util", @@ -347,10 +352,8 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sha1", "sync_wrapper", "tokio", - "tokio-tungstenite", "tower", "tower-layer", "tower-service", @@ -370,9 +373,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", @@ -520,9 +523,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "base64ct" -version = "1.8.1" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e050f626429857a27ddccb31e0aca21356bfa709c04041aefddac081a8f068a" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bigdecimal" @@ -544,7 +547,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "bit-vec", + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", ] [[package]] @@ -553,6 +565,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.10.0" @@ -565,7 +583,7 @@ version = "0.11.0-rc.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "679065eb2b85a078ace42411e657bef3a6afe93a40d1b9cb04e39ca303cc3f36" dependencies = [ - "digest 0.11.0-rc.4", + "digest 0.11.0-rc.5", ] [[package]] @@ -586,6 +604,12 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "borrow-or-share" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0b364ead1874514c8c2855ab558056ebfeb775653e7ae45ff72f28f8f3166c" + [[package]] name = "brotli" version = "8.0.2" @@ -613,6 +637,12 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "byteorder" version = "1.5.0" @@ -645,9 +675,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.50" +version = "1.2.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" dependencies = [ "find-msvc-tools", "jobserver", @@ -680,9 +710,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -697,7 +727,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "crypto-common 0.1.6", + "crypto-common 0.1.7", "inout", ] @@ -737,9 +767,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "client-ip" @@ -767,9 +797,9 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "compression-codecs" -version = "0.4.35" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f7ac3e5b97fdce45e8922fb05cae2c37f7bbd63d30dd94821dacfd8f3f2bf2" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" dependencies = [ "brotli", "compression-core", @@ -876,9 +906,9 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -886,13 +916,34 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.2.0-rc.5" +version = "0.2.0-rc.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919bd05924682a5480aec713596b9e2aabed3a0a6022fab6847f85a99e5f190a" +checksum = "41b8986f836d4aeb30ccf4c9d3bd562fd716074cfd7fc4a2948359fbd21ed809" dependencies = [ "hybrid-array", ] +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -991,9 +1042,9 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] name = "deadpool" @@ -1203,18 +1254,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", - "crypto-common 0.1.6", + "crypto-common 0.1.7", "subtle", ] [[package]] name = "digest" -version = "0.11.0-rc.4" +version = "0.11.0-rc.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea390c940e465846d64775e55e3115d5dc934acb953de6f6e6360bc232fe2bf7" +checksum = "ebf9423bafb058e4142194330c52273c343f8a5beb7176d052f0e73b17dd35b9" dependencies = [ "block-buffer 0.11.0", - "crypto-common 0.2.0-rc.5", + "crypto-common 0.2.0-rc.9", "subtle", ] @@ -1340,6 +1391,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "eventsource-stream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" +dependencies = [ + "futures-core", + "nom", + "pin-project-lite", +] + [[package]] name = "expect-json" version = "1.9.0" @@ -1381,7 +1443,18 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ - "bit-set", + "bit-set 0.5.3", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fancy-regex" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +dependencies = [ + "bit-set 0.8.0", "regex-automata", "regex-syntax", ] @@ -1400,21 +1473,26 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "filetime" -version = "0.2.26" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" dependencies = [ "cfg-if", "libc", "libredox", - "windows-sys 0.60.2", ] [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + +[[package]] +name = "fixedbitset" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flate2" @@ -1427,12 +1505,29 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "fluent-uri" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1918b65d96df47d3591bed19c5cca17e3fa5d0707318e4b5ef2eae01764df7e5" +dependencies = [ + "borrow-or-share", + "ref-cast", + "serde", +] + [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1442,11 +1537,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f158e3ff0a1b334408dc9fb811cd99b446986f4d8b741bb08f9df1604085ae7" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "fs-err" -version = "3.2.1" +version = "3.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824f08d01d0f496b3eca4f001a13cf17690a6ee930043d20817f547455fd98f8" +checksum = "baf68cef89750956493a66a10f512b9e58d9db21f2a573c079c0bdf1207a54a7" dependencies = [ "autocfg", "tokio", @@ -1529,6 +1634,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -1547,6 +1658,21 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows-link", + "windows-result", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1559,14 +1685,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -1611,9 +1737,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -1628,6 +1754,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + [[package]] name = "hashbrown" version = "0.16.1" @@ -1692,6 +1827,16 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hipstr" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07a5072958d04f9147e517881d929d3f4706612712f8f4cfcd247f2b716d5262" +dependencies = [ + "loom", + "serde", +] + [[package]] name = "hmac" version = "0.12.1" @@ -1798,7 +1943,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots 1.0.4", + "webpki-roots 1.0.5", ] [[package]] @@ -1895,9 +2040,9 @@ dependencies = [ [[package]] name = "icu_locale_data" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03e2fcaefecdf05619f3d6f91740e79ab969b4dd54f77cbf546b1d0d28e3147" +checksum = "1c5f1d16b4c3a2642d3a719f18f6b06070ab0aef246a6418130c955ae08aa831" [[package]] name = "icu_normalizer" @@ -2007,16 +2152,25 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", "serde", "serde_core", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "inout" version = "0.1.4" @@ -2046,9 +2200,9 @@ dependencies = [ [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" dependencies = [ "memchr", "serde", @@ -2080,9 +2234,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee5b5339afb4c41626dde77b7a611bd4f2c202b897852b4bcf5d03eddc61010" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" @@ -2147,14 +2301,39 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", ] +[[package]] +name = "jsonschema" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "161c33c3ec738cfea3288c5c53dfcdb32fd4fc2954de86ea06f71b5a1a40bfcd" +dependencies = [ + "ahash", + "base64", + "bytecount", + "email_address", + "fancy-regex 0.14.0", + "fraction", + "idna", + "itoa", + "num-cmp", + "once_cell", + "percent-encoding", + "referencing", + "regex-syntax", + "reqwest", + "serde", + "serde_json", + "uuid-simd", +] + [[package]] name = "jsonwebtoken" version = "10.2.0" @@ -2163,7 +2342,7 @@ checksum = "c76e1c7d7df3e34443b3621b459b066a7b79644f059fc8b2db7070c825fd417e" dependencies = [ "aws-lc-rs", "base64", - "getrandom 0.2.16", + "getrandom 0.2.17", "js-sys", "pem", "serde", @@ -2186,9 +2365,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.178" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libm" @@ -2198,13 +2377,13 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df15f6eac291ed1cf25865b1ee60399f57e7c227e7f51bdbd4c5270396a9ed50" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ "bitflags", "libc", - "redox_syscall 0.6.0", + "redox_syscall 0.7.0", ] [[package]] @@ -2234,6 +2413,19 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lru-slab" version = "0.1.2" @@ -2242,14 +2434,34 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lzma-rust2" -version = "0.15.4" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48172246aa7c3ea28e423295dd1ca2589a24617cc4e588bb8cfe177cb2c54d95" +checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69" dependencies = [ "crc", "sha2", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "markdown" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5cab8f2cadc416a82d2e783a1946388b31654d391d1c7d92cc1f03e295b1deb" +dependencies = [ + "unicode-id", +] + [[package]] name = "matchers" version = "0.2.0" @@ -2281,6 +2493,15 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "migrations_internals" version = "2.3.0" @@ -2318,6 +2539,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2335,7 +2562,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -2365,12 +2592,22 @@ dependencies = [ "data-encoding", "ed25519", "ed25519-dalek", - "getrandom 0.2.16", + "getrandom 0.2.17", "log", "rand 0.8.5", "signatory", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -2413,6 +2650,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + [[package]] name = "num-complex" version = "0.4.6" @@ -2494,6 +2737,7 @@ dependencies = [ "serde", "thiserror 2.0.18", "tokio", + "tokio-util", "tracing", "tracing-subscriber", ] @@ -2502,22 +2746,42 @@ dependencies = [ name = "nvisy-core" version = "0.1.0" dependencies = [ + "async-trait", "jiff", "schemars 0.9.0", "serde", "serde_json", + "strum", "thiserror 2.0.18", "tracing", ] +[[package]] +name = "nvisy-dal" +version = "0.1.0" +dependencies = [ + "async-stream", + "async-trait", + "bytes", + "derive_more", + "futures", + "jiff", + "nvisy-core", + "pyo3", + "pyo3-async-runtimes", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "uuid", +] + [[package]] name = "nvisy-nats" version = "0.1.0" dependencies = [ "async-nats", - "async-stream", "base64", - "bytes", "clap", "derive_more", "futures", @@ -2528,7 +2792,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "strum", "thiserror 2.0.18", "tokio", "tracing", @@ -2575,12 +2838,16 @@ dependencies = [ "derive_more", "futures", "jiff", + "jsonschema", + "nvisy-core", "nvisy-nats", "nvisy-postgres", "rig-core", + "schemars 0.9.0", "serde", "serde_json", "sha2", + "strum", "text-splitter", "thiserror 2.0.18", "tokio", @@ -2589,52 +2856,196 @@ dependencies = [ ] [[package]] -name = "nvisy-server" +name = "nvisy-rt-archive" version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ - "aide", - "anyhow", - "argon2", - "async-trait", - "axum", - "axum-client-ip", - "axum-extra 0.12.5", - "axum-test", - "base64", - "bigdecimal", - "clap", + "bytes", + "bzip2", "derive_more", - "dotenvy", "flate2", - "futures", - "ipnet", - "jiff", - "jsonwebtoken", - "nvisy-nats", - "nvisy-postgres", - "nvisy-rig", - "nvisy-webhook", - "rand 0.10.0-rc.6", - "regex", - "schemars 0.9.0", - "serde", - "serde_json", - "sha2", + "nvisy-rt-core", "strum", "tar", "tempfile", - "thiserror 2.0.18", "tokio", - "tokio-util", - "tower", - "tower-http", "tracing", - "tracing-subscriber", - "url", - "uuid", - "validator", - "woothee", + "xz2", "zip", +] + +[[package]] +name = "nvisy-rt-core" +version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "bytes", + "derive_more", + "hex", + "hipstr", + "jiff", + "serde", + "sha2", + "strum", + "thiserror 2.0.18", + "tokio", + "uuid", +] + +[[package]] +name = "nvisy-rt-document" +version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "base64", + "bytes", + "derive_more", + "jiff", + "nvisy-rt-core", + "serde", + "serde_json", + "strum", + "thiserror 2.0.18", + "tokio", + "uuid", +] + +[[package]] +name = "nvisy-rt-docx" +version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "nvisy-rt-document", + "thiserror 2.0.18", +] + +[[package]] +name = "nvisy-rt-engine" +version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "bytes", + "jiff", + "nvisy-rt-archive", + "nvisy-rt-document", + "nvisy-rt-docx", + "nvisy-rt-image", + "nvisy-rt-pdf", + "nvisy-rt-text", + "serde", + "tracing", + "uuid", +] + +[[package]] +name = "nvisy-rt-image" +version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "nvisy-rt-document", + "thiserror 2.0.18", +] + +[[package]] +name = "nvisy-rt-pdf" +version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "nvisy-rt-document", + "thiserror 2.0.18", +] + +[[package]] +name = "nvisy-rt-text" +version = "0.1.0" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "csv", + "markdown", + "nvisy-rt-document", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "nvisy-runtime" +version = "0.1.0" +dependencies = [ + "async-trait", + "derive_builder", + "derive_more", + "futures", + "jiff", + "nvisy-core", + "nvisy-dal", + "nvisy-rig", + "nvisy-rt-core", + "nvisy-rt-engine", + "petgraph", + "semver", + "serde", + "serde_json", + "strum", + "thiserror 2.0.18", + "tokio", + "tracing", + "uuid", +] + +[[package]] +name = "nvisy-server" +version = "0.1.0" +dependencies = [ + "aide", + "anyhow", + "argon2", + "async-trait", + "axum", + "axum-client-ip", + "axum-extra 0.12.5", + "axum-test", + "base64", + "bigdecimal", + "clap", + "derive_more", + "dotenvy", + "futures", + "ipnet", + "jiff", + "jsonwebtoken", + "nvisy-nats", + "nvisy-postgres", + "nvisy-runtime", + "nvisy-webhook", + "rand 0.10.0-rc.6", + "regex", + "schemars 0.9.0", + "serde", + "serde_json", + "sha2", + "strum", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tokio-util", + "tower", + "tower-http", + "tracing", + "tracing-subscriber", + "url", + "uuid", + "validator", + "woothee", "zxcvbn", ] @@ -2689,13 +3100,19 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "ordered-float" -version = "4.6.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" dependencies = [ "num-traits", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "parking_lot" version = "0.12.5" @@ -2721,11 +3138,11 @@ dependencies = [ [[package]] name = "password-hash" -version = "0.6.0-rc.6" +version = "0.6.0-rc.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383d290055c99f2dd7dece082088d89494dff6d79277fbac4a7da21c1bf2ab6b" +checksum = "f77af9403a6489b7b51f552693bd48d8e81a710c92d3d77648b203558578762d" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.0-rc.0", "phc", "rand_core 0.10.0-rc-3", ] @@ -2765,6 +3182,19 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap", + "serde", + "serde_derive", +] + [[package]] name = "pgtrgm" version = "0.4.0" @@ -2785,12 +3215,12 @@ dependencies = [ [[package]] name = "phc" -version = "0.6.0-rc.0" +version = "0.6.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f960577aaac5c259bc0866d685ba315c0ed30793c602d7287f54980913863" +checksum = "71d390c5fe8d102c2c18ff39f1e72b9ad5996de282c2d831b0312f56910f5508" dependencies = [ "base64ct", - "getrandom 0.3.4", + "getrandom 0.4.0-rc.0", "rand_core 0.10.0-rc-3", "subtle", ] @@ -2864,9 +3294,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f59e70c4aef1e55797c2e8fd94a4f2a973fc972cfde0e0b05f683667b0cd39dd" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" [[package]] name = "portable-atomic-util" @@ -2879,9 +3309,9 @@ dependencies = [ [[package]] name = "postgres-protocol" -version = "0.6.9" +version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbef655056b916eb868048276cfd5d6a7dea4f81560dfd047f97c8c6fe3fcfd4" +checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491" dependencies = [ "base64", "byteorder", @@ -2897,9 +3327,9 @@ dependencies = [ [[package]] name = "postgres-types" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4605b7c057056dd35baeb6ac0c0338e4975b1f2bef0f65da953285eb007095" +checksum = "54b858f82211e84682fecd373f68e1ceae642d8d751a1ebd13f33de6257b3e20" dependencies = [ "bytes", "fallible-iterator", @@ -2990,6 +3420,80 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pyo3" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d" +dependencies = [ + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-async-runtimes" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57ddb5b570751e93cc6777e81fee8087e59cd53b5043292f2a6d59d5bd80fdfd" +dependencies = [ + "futures", + "once_cell", + "pin-project-lite", + "pyo3", + "tokio", +] + +[[package]] +name = "pyo3-build-config" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6" +dependencies = [ + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + [[package]] name = "quinn" version = "0.11.9" @@ -3078,7 +3582,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3110,7 +3614,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3119,14 +3623,14 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -3148,9 +3652,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96166dafa0886eb81fe1c0a388bece180fbef2135f97c1e2cf8302e74b43b5" +checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" dependencies = [ "bitflags", ] @@ -3175,6 +3679,20 @@ dependencies = [ "syn", ] +[[package]] +name = "referencing" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a64b3a635fad9000648b4d8a59c8710c523ab61a23d392a7d91d47683f5adc" +dependencies = [ + "ahash", + "fluent-uri", + "once_cell", + "parking_lot", + "percent-encoding", + "serde_json", +] + [[package]] name = "regex" version = "1.12.2" @@ -3213,6 +3731,7 @@ dependencies = [ "base64", "bytes", "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2", @@ -3246,7 +3765,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.4", + "webpki-roots 1.0.5", ] [[package]] @@ -3260,23 +3779,33 @@ dependencies = [ [[package]] name = "rig-core" -version = "0.12.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed332151c57f658a89fc58cbd274aa6743ae2ad3026a17630ec2bfc77eff96ac" +checksum = "7207790134ee24d87ac3d022c308e1a7c871219d139acf70d13be76c1f6919c5" dependencies = [ + "as-any", "async-stream", "base64", "bytes", + "eventsource-stream", + "fastrand", "futures", + "futures-timer", "glob", + "http", + "mime", "mime_guess", "ordered-float", + "pin-project-lite", "reqwest", - "schemars 0.8.22", + "schemars 1.2.0", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.18", + "tokio", "tracing", + "tracing-futures", + "url", ] [[package]] @@ -3287,7 +3816,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted 0.9.0", "windows-sys 0.52.0", @@ -3310,9 +3839,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc-hash" @@ -3344,15 +3873,15 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.9", "subtle", "zeroize", ] @@ -3381,9 +3910,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -3401,9 +3930,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "aws-lc-rs", "ring", @@ -3419,9 +3948,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" [[package]] name = "schannel" @@ -3434,37 +3963,38 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.22" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" dependencies = [ "dyn-clone", - "schemars_derive 0.8.22", + "indexmap", + "jiff", + "ref-cast", + "schemars_derive 0.9.0", "serde", "serde_json", + "uuid", ] [[package]] name = "schemars" -version = "0.9.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2" dependencies = [ "dyn-clone", - "indexmap", - "jiff", "ref-cast", - "schemars_derive 0.9.0", + "schemars_derive 1.2.0", "serde", "serde_json", - "uuid", ] [[package]] name = "schemars_derive" -version = "0.8.22" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +checksum = "5016d94c77c6d32f0b8e08b781f7dc8a90c2007d4e77472cc2807bc10a8438fe" dependencies = [ "proc-macro2", "quote", @@ -3474,9 +4004,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.9.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5016d94c77c6d32f0b8e08b781f7dc8a90c2007d4e77472cc2807bc10a8438fe" +checksum = "4908ad288c5035a8eb12cfdf0d49270def0a268ee162b75eeee0f85d155a7c45" dependencies = [ "proc-macro2", "quote", @@ -3493,6 +4023,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -3527,6 +4063,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "serde" @@ -3699,10 +4239,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -3903,6 +4444,12 @@ dependencies = [ "xattr", ] +[[package]] +name = "target-lexicon" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" + [[package]] name = "tempfile" version = "3.24.0" @@ -3984,30 +4531,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" dependencies = [ "num-conv", "time-core", @@ -4068,9 +4615,9 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.15" +version = "0.7.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b40d66d9b2cfe04b628173409368e58247e8eddbbd3b0e6c6ba1d09f20f6c9e" +checksum = "dcea47c8f71744367793f16c2db1f11cb859d28f436bdb4ca9193eb1f787ee42" dependencies = [ "async-trait", "byteorder", @@ -4104,27 +4651,15 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", "tokio", ] -[[package]] -name = "tokio-tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite", -] - [[package]] name = "tokio-util" version = "0.7.18" @@ -4161,9 +4696,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.10+spec-1.1.0" +version = "0.9.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48" +checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46" dependencies = [ "serde_core", "serde_spanned", @@ -4286,6 +4821,18 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "futures", + "futures-task", + "pin-project", + "tracing", +] + [[package]] name = "tracing-log" version = "0.2.0" @@ -4344,23 +4891,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" -dependencies = [ - "bytes", - "data-encoding", - "http", - "httparse", - "log", - "rand 0.9.2", - "sha1", - "thiserror 2.0.18", - "utf-8", -] - [[package]] name = "typeid" version = "1.0.3" @@ -4399,9 +4929,9 @@ dependencies = [ [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-bidi" @@ -4409,6 +4939,12 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" +[[package]] +name = "unicode-id" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ba288e709927c043cbe476718d37be306be53fb1fafecd0dbe36d072be2580" + [[package]] name = "unicode-ident" version = "1.0.22" @@ -4442,6 +4978,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + [[package]] name = "untrusted" version = "0.7.1" @@ -4467,12 +5009,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -4497,6 +5033,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "uuid-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b082222b4f6619906941c17eb2297fff4c2fb96cb60164170522942a200bd8" +dependencies = [ + "outref", + "uuid", + "vsimd", +] + [[package]] name = "validator" version = "0.20.0" @@ -4545,6 +5092,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "want" version = "0.3.1" @@ -4560,26 +5113,38 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasite" -version = "0.1.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -4590,11 +5155,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -4603,9 +5169,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4613,9 +5179,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -4626,9 +5192,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] @@ -4648,9 +5214,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -4672,23 +5238,23 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.4", + "webpki-roots 1.0.5", ] [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" dependencies = [ "rustls-pki-types", ] [[package]] name = "whoami" -version = "1.6.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +checksum = "ace4d5c7b5ab3d99629156d4e0997edbe98a4beb6d5ba99e2cae830207a81983" dependencies = [ "libredox", "wasite", @@ -4929,9 +5495,9 @@ checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "woothee" @@ -4959,6 +5525,15 @@ dependencies = [ "rustix", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yansi" version = "1.0.1" @@ -4990,18 +5565,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", @@ -5040,9 +5615,9 @@ dependencies = [ [[package]] name = "zeroize_derive" -version = "1.4.2" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", @@ -5118,9 +5693,9 @@ checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" [[package]] name = "zmij" -version = "1.0.0" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d6085d62852e35540689d1f97ad663e3971fc19cf5eceab364d62c646ea167" +checksum = "94f63c051f4fe3c1509da62131a678643c5b6fbdc9273b2b79d4378ebda003d2" [[package]] name = "zopfli" @@ -5170,7 +5745,7 @@ checksum = "ad76e35b00ad53688d6b90c431cabe3cbf51f7a4a154739e04b63004ab1c736c" dependencies = [ "chrono", "derive_builder", - "fancy-regex", + "fancy-regex 0.13.0", "itertools 0.13.0", "lazy_static", "regex", diff --git a/Cargo.toml b/Cargo.toml index 6ba9965..994c5fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,9 +5,11 @@ resolver = "2" members = [ "./crates/nvisy-cli", "./crates/nvisy-core", + "./crates/nvisy-dal", "./crates/nvisy-nats", "./crates/nvisy-postgres", "./crates/nvisy-rig", + "./crates/nvisy-runtime", "./crates/nvisy-server", "./crates/nvisy-webhook", ] @@ -32,18 +34,27 @@ documentation = "https://docs.rs/nvisy-server" # Internal crates nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } +nvisy-dal = { path = "./crates/nvisy-dal", version = "0.1.0" } nvisy-nats = { path = "./crates/nvisy-nats", version = "0.1.0" } nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } +nvisy-runtime = { path = "./crates/nvisy-runtime", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } nvisy-webhook = { path = "./crates/nvisy-webhook", version = "0.1.0" } +# Runtime crates (from github.com/nvisycom/runtime) +nvisy-rt-archive = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-core = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-document = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-engine = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } + # CLI clap = { version = "4.5", features = [] } # Async runtime tokio = { version = "1.49", features = [] } tokio-util = { version = "0.7", features = ["io"] } +tokio-stream = { version = "0.1", features = [] } futures = { version = "0.3", features = [] } futures-util = { version = "0.3", features = [] } async-stream = { version = "0.3", features = [] } @@ -64,6 +75,7 @@ tower-http = { version = "0.6", features = ["full"] } # OpenAPI/Documentation aide = { version = "0.15", features = ["axum", "macros", "scalar"] } schemars = { version = "0.9", features = ["uuid1", "jiff02"] } +jsonschema = { version = "0.29", features = [] } # Authentication & Security jsonwebtoken = { version = "10.2", features = ["aws_lc_rs"] } @@ -115,6 +127,8 @@ uuid = { version = "1.18", features = ["serde", "v4", "v7"] } ipnet = { version = "2.11", features = [] } bigdecimal = { version = "0.4", features = ["serde"] } bytes = { version = "1.10", features = ["serde"] } +petgraph = { version = "0.8", features = ["serde-1"] } +semver = { version = "1.0", features = ["serde"] } url = { version = "2.5", features = [] } # Text processing @@ -123,9 +137,9 @@ text-splitter = { version = "0.29", features = [] } woothee = { version = "0.13", features = [] } # AI/ML frameworks -rig-core = { version = "0.12", default-features = false, features = ["reqwest-rustls"] } +rig-core = { version = "0.29", default-features = false, features = ["reqwest-rustls"] } + -# Archive/Compression -tar = { version = "0.4", features = [] } -flate2 = { version = "1.1", features = [] } -zip = { version = "7.1", features = [] } +# Python interop +pyo3 = { version = "0.27", features = [] } +pyo3-async-runtimes = { version = "0.27", features = [] } diff --git a/LICENSE.txt b/LICENSE.txt index 8015683..b91b9eb 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,21 +1,201 @@ -MIT License - -Copyright (c) 2025 Nvisy Software - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2026 Nvisy Software + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index bbbbbde..2405be6 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ High-performance backend server for the Nvisy document processing platform. - **High-Performance** - Async HTTP server with Axum and Tokio - **LLM Annotations** - AI-driven document edits via structured annotations - **RAG Pipeline** - Build knowledge bases with document embeddings and semantic search -- **Real-Time Updates** - Live collaboration via NATS pub/sub and WebSocket +- **Real-Time Updates** - AI streaming via SSE and job processing via NATS - **Interactive Docs** - Auto-generated OpenAPI with Scalar UI ## Architecture @@ -33,14 +33,13 @@ server/ ## Quick Start ```bash -# Install tools and generate keys +# Install tools and make scripts executable make install-all -make generate-keys -# Run database migrations -make generate-migrations +# Generate keys, env and migration files +make generate-all -# Start the server +# Start the server with dotenv feature cargo run --features dotenv ``` @@ -72,7 +71,7 @@ See [CHANGELOG.md](CHANGELOG.md) for release notes and version history. ## License -MIT License - see [LICENSE.txt](LICENSE.txt) +Apache 2.0 License - see [LICENSE.txt](LICENSE.txt) ## Support diff --git a/crates/README.md b/crates/README.md new file mode 100644 index 0000000..d092687 --- /dev/null +++ b/crates/README.md @@ -0,0 +1,49 @@ +# Crates + +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + +This directory contains the workspace crates for Nvisy Server. + +## Core + +### nvisy-cli + +Server entry point and CLI configuration. Parses command-line arguments, loads environment configuration, and bootstraps the application by initializing all services and starting the HTTP server. + +### nvisy-core + +Shared foundation used across all crates. Contains common error types with retry support, utility functions, and base traits. Provides the `Error` and `ErrorKind` types used throughout the application. + +### nvisy-server + +HTTP API layer built on Axum. Implements REST endpoints for documents, workspaces, accounts, and studio sessions. Includes middleware for authentication (JWT/Ed25519), request validation, and OpenAPI documentation via Aide. + +## Data Layer + +### nvisy-postgres + +PostgreSQL persistence layer using Diesel async. Defines ORM models, query builders, and repository patterns for all database entities. Handles connection pooling via deadpool and compile-time SQL validation. + +### nvisy-nats + +NATS messaging client for real-time features. Provides JetStream for durable message streams, KV store for distributed state, and object storage for large files. Used for pub/sub events and cross-service communication. + +## Workflows + +### nvisy-dal + +Data Abstraction Layer for workflow inputs and outputs. Provides unified interfaces for reading/writing data across storage backends (S3, GCS, Azure Blob, PostgreSQL, MySQL) and vector databases (Qdrant, Pinecone, Milvus, pgvector). Defines core data types: Blob, Document, Embedding, Graph, Record, Message. + +### nvisy-runtime + +Workflow execution engine. Defines workflow graphs with input, transformer, and output nodes. Manages provider credentials, node execution, and data flow between pipeline stages. Integrates with nvisy-dal for storage operations. + +### nvisy-rig + +AI services powered by rig-core. Provides chat completions, RAG pipelines with pgvector embeddings, and document processing. Supports multiple LLM providers (OpenAI, Anthropic, OpenRouter) for studio sessions. + +## Integration + +### nvisy-webhook + +Webhook delivery system. Defines traits and types for sending HTTP callbacks on events. Used to notify external systems about document processing completion, workflow status changes, and other application events. diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml index cac19eb..943df77 100644 --- a/crates/nvisy-cli/Cargo.toml +++ b/crates/nvisy-cli/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-cli" +description = "Command-line interface and HTTP server for the Nvisy platform" +readme = "./README.md" +keywords = ["cli", "server", "http", "nvisy", "document"] +categories = ["command-line-utilities", "web-programming::http-server"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } @@ -43,6 +47,7 @@ nvisy-server = { workspace = true, features = ["config"] } # Async runtime, environment and CLI. tokio = { workspace = true, features = ["rt-multi-thread", "macros", "signal"] } +tokio-util = { workspace = true, features = [] } clap = { workspace = true, features = ["derive", "env"] } dotenvy = { workspace = true, features = [], optional = true } diff --git a/crates/nvisy-cli/README.md b/crates/nvisy-cli/README.md index 05147b3..3afecd6 100644 --- a/crates/nvisy-cli/README.md +++ b/crates/nvisy-cli/README.md @@ -1,9 +1,8 @@ # nvisy-cli -Command-line interface and HTTP server for the Nvisy platform. +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) -[![Rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![Axum](https://img.shields.io/badge/Axum-0.8+-000000?style=flat-square&logo=rust&logoColor=white)](https://github.com/tokio-rs/axum) +Command-line interface and HTTP server for the Nvisy platform. ## Features @@ -23,3 +22,18 @@ Command-line interface and HTTP server for the Nvisy platform. - **tls** - HTTPS support with rustls - **dotenv** - Load configuration from `.env` files + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-cli/src/config/mod.rs b/crates/nvisy-cli/src/config/mod.rs index 53b8598..c00b006 100644 --- a/crates/nvisy-cli/src/config/mod.rs +++ b/crates/nvisy-cli/src/config/mod.rs @@ -30,7 +30,6 @@ mod server; use std::process; use clap::Parser; -use nvisy_server::pipeline::PipelineConfig; use nvisy_server::service::{ServiceConfig, ServiceState}; use nvisy_webhook::WebhookService; use nvisy_webhook::reqwest::{ReqwestClient, ReqwestConfig}; @@ -71,10 +70,6 @@ pub struct Cli { #[clap(flatten)] pub service: ServiceConfig, - /// Pipeline configuration for document processing workers. - #[clap(flatten)] - pub pipeline: PipelineConfig, - /// HTTP client configuration for webhook delivery. #[clap(flatten)] pub reqwest: ReqwestConfig, diff --git a/crates/nvisy-cli/src/main.rs b/crates/nvisy-cli/src/main.rs index 0b53e18..10c0824 100644 --- a/crates/nvisy-cli/src/main.rs +++ b/crates/nvisy-cli/src/main.rs @@ -10,8 +10,9 @@ use std::process; use axum::Router; use nvisy_server::handler::{CustomRoutes, routes}; use nvisy_server::middleware::*; -use nvisy_server::pipeline::{PipelineState, WorkerHandles}; use nvisy_server::service::ServiceState; +use nvisy_server::worker::WebhookWorker; +use tokio_util::sync::CancellationToken; use crate::config::{Cli, MiddlewareConfig}; use crate::server::TRACING_TARGET_SHUTDOWN; @@ -45,20 +46,35 @@ async fn run() -> anyhow::Result<()> { // Initialize application state let state = cli.service_state().await?; - // Spawn pipeline workers - let pipeline_state = PipelineState::new(&state, cli.pipeline.clone()); - let workers = WorkerHandles::spawn(&pipeline_state); - // Build router - let router = create_router(state, &cli.middleware); + let router = create_router(state.clone(), &cli.middleware); + + // Create cancellation token for graceful shutdown of workers + let cancel = CancellationToken::new(); + + // Spawn webhook worker (logs lifecycle events internally) + let webhook_worker = WebhookWorker::new(state.nats.clone(), state.webhook.clone()); + let worker_cancel = cancel.clone(); + let worker_handle = tokio::spawn(async move { + let _ = webhook_worker.run(worker_cancel).await; + }); // Run the HTTP server - let result = server::serve(router, cli.server).await; + let server_result = server::serve(router, cli.server).await; + + // Signal workers to stop + cancel.cancel(); - // Shutdown workers - workers.shutdown(); + // Wait for worker to finish + if let Err(err) = worker_handle.await { + tracing::error!( + target: TRACING_TARGET_SHUTDOWN, + error = %err, + "Webhook worker task panicked" + ); + } - result?; + server_result?; Ok(()) } diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml index 4afb491..4986871 100644 --- a/crates/nvisy-core/Cargo.toml +++ b/crates/nvisy-core/Cargo.toml @@ -1,11 +1,15 @@ [package] name = "nvisy-core" +description = "Core types and utilities shared across nvisy crates" +readme = "./README.md" +keywords = ["core", "types", "error", "utilities", "nvisy"] +categories = ["rust-patterns", "development-tools"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } @@ -24,9 +28,15 @@ default = [] schema = ["dep:schemars"] [dependencies] +# Async +async-trait = { workspace = true } + # Error handling thiserror = { workspace = true } +# Derive macros +strum = { workspace = true } + # Observability tracing = { workspace = true } diff --git a/crates/nvisy-core/README.md b/crates/nvisy-core/README.md index 75b613e..e8bddd5 100644 --- a/crates/nvisy-core/README.md +++ b/crates/nvisy-core/README.md @@ -1,5 +1,7 @@ # nvisy-core +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Core types and utilities shared across nvisy crates. ## Overview @@ -20,3 +22,18 @@ use nvisy_core::types::{ServiceHealth, ServiceStatus}; ## Features - `schema` - Enable JSON Schema derives for API documentation + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-core/src/error.rs b/crates/nvisy-core/src/error.rs index c8df0cc..7f7a52e 100644 --- a/crates/nvisy-core/src/error.rs +++ b/crates/nvisy-core/src/error.rs @@ -1,7 +1,6 @@ //! Common error type definitions. -use std::time::Duration; - +use strum::{AsRefStr, IntoStaticStr}; use thiserror::Error; /// Type alias for boxed dynamic errors that can be sent across threads. @@ -15,7 +14,8 @@ pub type BoxedError = Box; pub type Result = std::result::Result; /// Categories of errors that can occur in nvisy-core operations. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, AsRefStr, IntoStaticStr)] +#[strum(serialize_all = "snake_case")] pub enum ErrorKind { /// Input validation failed. InvalidInput, @@ -79,140 +79,4 @@ impl Error { self.source = Some(Box::new(source)); self } - - /// Creates a new invalid input error. - pub fn invalid_input() -> Self { - Self::new(ErrorKind::InvalidInput) - } - - /// Creates a new network error. - pub fn network_error() -> Self { - Self::new(ErrorKind::NetworkError) - } - - /// Creates a new authentication error. - pub fn authentication() -> Self { - Self::new(ErrorKind::Authentication) - } - - /// Creates a new authorization error. - pub fn authorization() -> Self { - Self::new(ErrorKind::Authorization) - } - - /// Creates a new rate limited error. - pub fn rate_limited() -> Self { - Self::new(ErrorKind::RateLimited) - } - - /// Creates a new service unavailable error. - pub fn service_unavailable() -> Self { - Self::new(ErrorKind::ServiceUnavailable) - } - - /// Creates a new internal error. - pub fn internal_error() -> Self { - Self::new(ErrorKind::InternalError) - } - - /// Creates a new external error. - pub fn external_error() -> Self { - Self::new(ErrorKind::ExternalError) - } - - /// Creates a new configuration error. - pub fn configuration() -> Self { - Self::new(ErrorKind::Configuration) - } - - /// Creates a new not found error. - pub fn not_found() -> Self { - Self::new(ErrorKind::NotFound) - } - - /// Creates a new timeout error. - pub fn timeout() -> Self { - Self::new(ErrorKind::Timeout) - } - - /// Creates a new serialization error. - pub fn serialization() -> Self { - Self::new(ErrorKind::Serialization) - } - - /// Creates a new unknown error. - pub fn unknown() -> Self { - Self::new(ErrorKind::Unknown) - } - - /// Returns true if this is a client error (4xx equivalent). - pub fn is_client_error(&self) -> bool { - matches!( - self.kind, - ErrorKind::InvalidInput - | ErrorKind::Authentication - | ErrorKind::Authorization - | ErrorKind::NotFound - | ErrorKind::RateLimited - ) - } - - /// Returns true if this is a server error (5xx equivalent). - pub fn is_server_error(&self) -> bool { - matches!( - self.kind, - ErrorKind::ServiceUnavailable - | ErrorKind::InternalError - | ErrorKind::ExternalError - | ErrorKind::Configuration - | ErrorKind::Timeout - | ErrorKind::Serialization - | ErrorKind::Unknown - ) - } - - /// Returns true if this error is potentially retryable. - pub fn is_retryable(&self) -> bool { - matches!( - self.kind, - ErrorKind::NetworkError - | ErrorKind::RateLimited - | ErrorKind::ServiceUnavailable - | ErrorKind::Timeout - ) - } - - /// Returns the recommended retry delay for this error. - pub fn retry_delay(&self) -> Option { - match self.kind { - ErrorKind::RateLimited => Some(Duration::from_secs(60)), - ErrorKind::ServiceUnavailable => Some(Duration::from_secs(30)), - ErrorKind::NetworkError => Some(Duration::from_secs(5)), - ErrorKind::Timeout => Some(Duration::from_secs(10)), - _ => None, - } - } - - /// Returns true if this is an authentication error. - pub fn is_auth_error(&self) -> bool { - matches!( - self.kind, - ErrorKind::Authentication | ErrorKind::Authorization - ) - } - - /// Returns true if this is a rate limiting error. - pub fn is_rate_limit_error(&self) -> bool { - matches!(self.kind, ErrorKind::RateLimited) - } - - /// Returns true if this is a timeout error. - pub fn is_timeout_error(&self) -> bool { - matches!(self.kind, ErrorKind::Timeout) - } - - /// Returns true if this is a network error. - pub fn is_network_error(&self) -> bool { - matches!(self.kind, ErrorKind::NetworkError) - } } diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs index 9ba7bf3..ca397d5 100644 --- a/crates/nvisy-core/src/lib.rs +++ b/crates/nvisy-core/src/lib.rs @@ -3,8 +3,8 @@ #![doc = include_str!("../README.md")] mod error; -#[doc(hidden)] -pub mod prelude; +mod provider; pub mod types; pub use error::{BoxedError, Error, ErrorKind, Result}; +pub use provider::Provider; diff --git a/crates/nvisy-core/src/prelude.rs b/crates/nvisy-core/src/prelude.rs deleted file mode 100644 index fb04549..0000000 --- a/crates/nvisy-core/src/prelude.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Convenient re-exports for common use. - -pub use crate::error::{BoxedError, Error, ErrorKind, Result}; -pub use crate::types::{ServiceHealth, ServiceStatus, Timing}; diff --git a/crates/nvisy-core/src/provider.rs b/crates/nvisy-core/src/provider.rs new file mode 100644 index 0000000..ebb8a79 --- /dev/null +++ b/crates/nvisy-core/src/provider.rs @@ -0,0 +1,48 @@ +//! Provider creation trait. + +use crate::Result; + +/// Trait for creating a provider from parameters and credentials. +/// +/// This trait bridges non-sensitive parameters (like bucket name, table, model) +/// with sensitive credentials (like API keys, secrets) to construct +/// a fully configured provider instance. +/// +/// # Type Parameters +/// +/// - `Params`: Non-sensitive configuration (e.g., bucket name, model name) +/// - `Credentials`: Sensitive authentication data (e.g., API keys, secrets) +/// +/// # Example +/// +/// ```ignore +/// #[async_trait::async_trait] +/// impl IntoProvider for S3Provider { +/// type Params = S3Params; +/// type Credentials = S3Credentials; +/// +/// async fn create(params: Self::Params, credentials: Self::Credentials) -> Result { +/// // Build provider from params and credentials +/// } +/// } +/// ``` +#[async_trait::async_trait] +pub trait Provider: Send { + /// Non-sensitive parameters (bucket, prefix, table, model, etc.). + type Params: Send; + /// Sensitive credentials (API keys, secrets, etc.). + type Credentials: Send; + + /// Creates a new provider from parameters and credentials. + async fn connect(params: Self::Params, credentials: Self::Credentials) -> Result + where + Self: Sized; + + /// Disconnects and cleans up the provider. + async fn disconnect(self) -> Result<()> + where + Self: Sized, + { + Ok(()) + } +} diff --git a/crates/nvisy-dal/Cargo.toml b/crates/nvisy-dal/Cargo.toml new file mode 100644 index 0000000..2331349 --- /dev/null +++ b/crates/nvisy-dal/Cargo.toml @@ -0,0 +1,56 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-dal" +description = "Data abstraction layer for workflow inputs and outputs" +readme = "./README.md" +keywords = ["dal", "storage", "database", "vector", "workflow"] +categories = ["database", "api-bindings"] + +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +# Internal crates +nvisy-core = { workspace = true, features = [] } + +# Async runtime +tokio = { workspace = true, features = ["rt", "sync"] } +futures = { workspace = true, features = [] } + +# (De)serialization +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = [] } + +# Derive macros & utilities +async-trait = { workspace = true, features = [] } +derive_more = { workspace = true, features = ["from"] } +thiserror = { workspace = true, features = [] } + +# Data types +bytes = { workspace = true, features = [] } +uuid = { workspace = true, features = ["v4", "v7"] } +jiff = { workspace = true, features = ["serde"] } + +# Python interop +pyo3 = { workspace = true, features = ["auto-initialize"] } +pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] } +async-stream = { workspace = true } + +[features] +default = [] + +[dev-dependencies] +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-dal/README.md b/crates/nvisy-dal/README.md new file mode 100644 index 0000000..97e08af --- /dev/null +++ b/crates/nvisy-dal/README.md @@ -0,0 +1,65 @@ +# nvisy-dal + +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + +Data Abstraction Layer for workflow inputs and outputs. + +## Overview + +This crate provides a unified interface for reading and writing data across various storage backends. It supports blob storage, relational databases, and vector databases. + +## Modules + +- **`context`** - Context types for data operations (target, cursor, limit) +- **`datatype`** - Data types that flow through the DAL (Blob, Document, Embedding, Record, Graph, Message) +- **`provider`** - Storage and database providers +- **`stream`** - Stream types (`InputStream`, `OutputStream`) wrapping `BoxStream` +- **`traits`** - Core traits (`DataInput`, `DataOutput`) + +## Data Types + +All types implement the `DataType` marker trait: + +- **Blob** - Binary data with path and optional content type +- **Document** - Structured documents with title, content, and metadata +- **Embedding** - Vector embeddings with metadata for similarity search +- **Record** - Tabular data as key-value maps +- **Graph** - Graph structures with nodes and edges +- **Message** - Messages for queue-based systems + +## Streams + +The DAL uses wrapped stream types for better ergonomics with pagination support and streaming I/O operations. + +## Usage + +The DAL provides a consistent interface across all provider types. Create a provider with appropriate credentials and configuration, then use the `DataInput` and `DataOutput` traits for reading and writing data with proper context and stream handling. + +## Traits + +### DataInput + +Provides async read operations that return paginated streams of data. + +### DataOutput + +Provides async write operations for batches of data items. + +## Context + +The `Context` struct provides configuration for read/write operations including target specification (collection, table, bucket prefix), pagination cursors, and data limits. + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-dal/src/core/contexts.rs b/crates/nvisy-dal/src/core/contexts.rs new file mode 100644 index 0000000..cf31741 --- /dev/null +++ b/crates/nvisy-dal/src/core/contexts.rs @@ -0,0 +1,75 @@ +//! Context types for data operations. +//! +//! Contexts carry state from previous runs to enable pagination and resumption. + +use derive_more::From; +use serde::{Deserialize, Serialize}; + +/// Context for object storage operations (S3, GCS, Azure Blob). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ObjectContext { + /// Path prefix for listing objects. + pub prefix: Option, + /// Continuation token for pagination. + pub token: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +/// Context for relational database operations (Postgres, MySQL). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct RelationalContext { + /// Last seen cursor value (for keyset pagination). + pub cursor: Option, + /// Tiebreaker value for resolving cursor conflicts. + pub tiebreaker: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +/// Context for vector database operations (Qdrant, Pinecone, pgvector). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct VectorContext { + /// Continuation token or offset for pagination. + pub token: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +/// Type-erased context for runtime dispatch. +#[derive(Debug, Clone, Default, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyContext { + /// No context / empty state. + #[default] + None, + /// Object storage context. + Object(ObjectContext), + /// Relational database context. + Relational(RelationalContext), + /// Vector database context. + Vector(VectorContext), +} + +impl AnyContext { + /// Returns the limit if set in any context type. + pub fn limit(&self) -> Option { + match self { + Self::None => None, + Self::Object(ctx) => ctx.limit, + Self::Relational(ctx) => ctx.limit, + Self::Vector(ctx) => ctx.limit, + } + } + + /// Sets the limit on the inner context. + pub fn with_limit(mut self, limit: usize) -> Self { + match &mut self { + Self::None => {} + Self::Object(ctx) => ctx.limit = Some(limit), + Self::Relational(ctx) => ctx.limit = Some(limit), + Self::Vector(ctx) => ctx.limit = Some(limit), + } + self + } +} diff --git a/crates/nvisy-dal/src/core/datatypes.rs b/crates/nvisy-dal/src/core/datatypes.rs new file mode 100644 index 0000000..6de0879 --- /dev/null +++ b/crates/nvisy-dal/src/core/datatypes.rs @@ -0,0 +1,176 @@ +//! Data types for the DAL. +//! +//! These types represent the data items that flow through providers: +//! - `Object` for object storage (S3, GCS, Azure Blob) +//! - `Document` for JSON documents +//! - `Embedding` for vector embeddings +//! - `Record` for relational rows +//! - `Message` for queue/stream messages +//! - `Graph`, `Node`, `Edge` for graph data + +use std::collections::HashMap; + +use bytes::Bytes; +use derive_more::From; +use jiff::Timestamp; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +/// Metadata associated with data items. +pub type Metadata = HashMap; + +/// Marker trait for data types that can be read/written through the DAL. +pub trait DataType: Send + Sync + 'static {} + +/// Type-erased data value for runtime dispatch. +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyDataValue { + /// Object storage item (S3, GCS, etc.). + Object(Object), + /// JSON document. + Document(Document), + /// Vector embedding. + Embedding(Embedding), + /// Graph with nodes and edges. + Graph(Graph), + /// Relational record/row. + Record(Record), + /// Queue/stream message. + Message(Message), +} + +/// An object representing a file or binary data (S3, GCS, Azure Blob). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Object { + /// Path or key identifying this object. + pub path: String, + /// Raw binary data. + #[serde(with = "serde_bytes")] + pub data: Bytes, + /// Content type (MIME type). + #[serde(skip_serializing_if = "Option::is_none")] + pub content_type: Option, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl DataType for Object {} + +/// A document with flexible JSON content. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Document { + /// Unique identifier. + pub id: String, + /// Document content as JSON. + pub content: Value, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl DataType for Document {} + +/// A vector embedding with metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Embedding { + /// Unique identifier. + pub id: String, + /// The embedding vector. + pub vector: Vec, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl DataType for Embedding {} + +/// A record representing a row in a relational table. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Record { + /// Column values keyed by column name. + pub columns: HashMap, +} + +impl DataType for Record {} + +/// A message from a queue or stream. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + /// Unique identifier. + pub id: String, + /// Message payload. + #[serde(with = "serde_bytes")] + pub payload: Bytes, + /// Message headers. + #[serde(default)] + pub headers: HashMap, + /// Timestamp when the message was created. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, +} + +impl DataType for Message {} + +/// A graph containing nodes and edges. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Graph { + /// Nodes in the graph. + #[serde(default)] + pub nodes: Vec, + /// Edges in the graph. + #[serde(default)] + pub edges: Vec, +} + +impl DataType for Graph {} + +/// A node in a graph. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Node { + /// Unique identifier. + pub id: String, + /// Node labels (types). + #[serde(default)] + pub labels: Vec, + /// Node properties. + #[serde(default)] + pub properties: HashMap, +} + +/// An edge in a graph. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Edge { + /// Unique identifier. + pub id: String, + /// Source node ID. + pub from: String, + /// Target node ID. + pub to: String, + /// Edge label (relationship type). + pub label: String, + /// Edge properties. + #[serde(default)] + pub properties: HashMap, +} + +mod serde_bytes { + use bytes::Bytes; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + + pub fn serialize(bytes: &Bytes, serializer: S) -> Result + where + S: Serializer, + { + bytes.as_ref().serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let vec = Vec::::deserialize(deserializer)?; + Ok(Bytes::from(vec)) + } +} diff --git a/crates/nvisy-dal/src/core/mod.rs b/crates/nvisy-dal/src/core/mod.rs new file mode 100644 index 0000000..986279d --- /dev/null +++ b/crates/nvisy-dal/src/core/mod.rs @@ -0,0 +1,45 @@ +//! Core types and traits for data operations. + +mod contexts; +mod datatypes; +mod params; +mod streams; + +pub use contexts::{AnyContext, ObjectContext, RelationalContext, VectorContext}; +pub use datatypes::{ + AnyDataValue, DataType, Document, Edge, Embedding, Graph, Message, Metadata, Node, Object, + Record, +}; +pub use nvisy_core::Provider; +pub use params::{DistanceMetric, ObjectParams, RelationalParams, VectorParams}; +pub use streams::{InputStream, ItemSink, ItemStream, OutputStream}; + +use crate::Result; + +/// Trait for reading data from a source. +/// +/// Implementations provide streaming access to data with optional pagination. +#[async_trait::async_trait] +pub trait DataInput: Send + Sync { + /// The item type produced by this provider. + type Item; + /// The context type for read operations. + type Context; + + /// Reads items from the source. + /// + /// Returns an input stream containing items. + async fn read(&self, ctx: &Self::Context) -> Result>; +} + +/// Trait for writing data to a sink. +/// +/// Implementations accept batches of items for writing. +#[async_trait::async_trait] +pub trait DataOutput: Send + Sync { + /// The item type accepted by this provider. + type Item; + + /// Writes a batch of items to the sink. + async fn write(&self, items: Vec) -> Result<()>; +} diff --git a/crates/nvisy-dal/src/core/params.rs b/crates/nvisy-dal/src/core/params.rs new file mode 100644 index 0000000..2f34efe --- /dev/null +++ b/crates/nvisy-dal/src/core/params.rs @@ -0,0 +1,71 @@ +//! Parameter types for provider configuration. +//! +//! Params define how providers operate (columns, batch sizes, etc.), +//! while contexts carry runtime state (cursors, tokens, limits). + +use serde::{Deserialize, Serialize}; + +/// Common parameters for relational database operations. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct RelationalParams { + /// Target table name. + #[serde(skip_serializing_if = "Option::is_none")] + pub table: Option, + /// Column to use for cursor-based pagination (e.g., "id", "created_at"). + #[serde(skip_serializing_if = "Option::is_none")] + pub cursor_column: Option, + /// Column to use as tiebreaker when cursor values are not unique (e.g., "id"). + #[serde(skip_serializing_if = "Option::is_none")] + pub tiebreaker_column: Option, + /// Default batch size for bulk operations. + #[serde(default = "default_batch_size")] + pub batch_size: usize, +} + +/// Common parameters for object storage operations. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ObjectParams { + /// Bucket name (S3 bucket, GCS bucket, Azure container). + #[serde(skip_serializing_if = "Option::is_none")] + pub bucket: Option, + /// Default prefix for object keys. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, + /// Default batch size for bulk operations. + #[serde(default = "default_batch_size")] + pub batch_size: usize, +} + +/// Common parameters for vector database operations. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct VectorParams { + /// Collection or index name (Pinecone index, Qdrant collection). + #[serde(skip_serializing_if = "Option::is_none")] + pub collection: Option, + /// Dimension of vectors (required for some providers). + #[serde(skip_serializing_if = "Option::is_none")] + pub dimension: Option, + /// Distance metric for similarity search. + #[serde(default)] + pub metric: DistanceMetric, + /// Default batch size for bulk operations. + #[serde(default = "default_batch_size")] + pub batch_size: usize, +} + +/// Distance metric for vector similarity search. +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DistanceMetric { + /// Cosine similarity (default). + #[default] + Cosine, + /// Euclidean distance (L2). + Euclidean, + /// Dot product. + DotProduct, +} + +fn default_batch_size() -> usize { + 1000 +} diff --git a/crates/nvisy-dal/src/core/streams.rs b/crates/nvisy-dal/src/core/streams.rs new file mode 100644 index 0000000..07f1aa5 --- /dev/null +++ b/crates/nvisy-dal/src/core/streams.rs @@ -0,0 +1,95 @@ +//! Stream types for reading and writing data. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::stream::BoxStream; +use futures::{Sink, Stream}; + +use crate::{Error, Result}; + +/// A boxed stream of items with a lifetime. +pub type ItemStream<'a, T> = BoxStream<'a, Result>; + +/// A boxed sink for items with a lifetime. +pub type ItemSink<'a, T> = Pin + Send + 'a>>; + +/// Input stream wrapper for reading data. +pub struct InputStream { + stream: ItemStream<'static, T>, +} + +impl InputStream { + /// Creates a new input stream. + pub fn new(stream: ItemStream<'static, T>) -> Self { + Self { stream } + } + + /// Consumes the stream and returns the inner boxed stream. + pub fn into_inner(self) -> ItemStream<'static, T> { + self.stream + } +} + +impl Stream for InputStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.stream).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option) { + self.stream.size_hint() + } +} + +impl std::fmt::Debug for InputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InputStream").finish_non_exhaustive() + } +} + +/// Output stream wrapper for writing data. +/// +/// Wraps a boxed sink for streaming writes. +pub struct OutputStream { + sink: ItemSink<'static, T>, +} + +impl OutputStream { + /// Creates a new output stream. + pub fn new(sink: ItemSink<'static, T>) -> Self { + Self { sink } + } + + /// Consumes the stream and returns the inner boxed sink. + pub fn into_inner(self) -> ItemSink<'static, T> { + self.sink + } +} + +impl Sink for OutputStream { + type Error = Error; + + fn poll_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_ready(cx) + } + + fn start_send(mut self: Pin<&mut Self>, item: T) -> Result<(), Self::Error> { + self.sink.as_mut().start_send(item) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_flush(cx) + } + + fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_close(cx) + } +} + +impl std::fmt::Debug for OutputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OutputStream").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/error.rs b/crates/nvisy-dal/src/error.rs new file mode 100644 index 0000000..4be3f5e --- /dev/null +++ b/crates/nvisy-dal/src/error.rs @@ -0,0 +1,104 @@ +//! Error types for data operations. + +/// Boxed error type for dynamic error handling. +pub type BoxError = Box; + +/// Result type for data operations. +pub type Result = std::result::Result; + +/// Error type for data operations. +#[derive(Debug, thiserror::Error)] +#[error("{kind}: {message}")] +pub struct Error { + kind: ErrorKind, + message: String, + #[source] + source: Option, +} + +/// The kind of data error. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ErrorKind { + /// Connection error. + Connection, + /// Resource not found. + NotFound, + /// Invalid input. + InvalidInput, + /// Provider error. + Provider, +} + +impl Error { + /// Creates a new error. + pub fn new(kind: ErrorKind, message: impl Into) -> Self { + Self { + kind, + message: message.into(), + source: None, + } + } + + /// Adds a source error. + pub fn with_source(mut self, source: impl std::error::Error + Send + Sync + 'static) -> Self { + self.source = Some(Box::new(source)); + self + } + + /// Returns the error kind. + pub fn kind(&self) -> ErrorKind { + self.kind + } + + /// Creates a connection error. + pub fn connection(message: impl Into) -> Self { + Self::new(ErrorKind::Connection, message) + } + + /// Creates a not found error. + pub fn not_found(message: impl Into) -> Self { + Self::new(ErrorKind::NotFound, message) + } + + /// Creates an invalid input error. + pub fn invalid_input(message: impl Into) -> Self { + Self::new(ErrorKind::InvalidInput, message) + } + + /// Creates a provider error. + pub fn provider(message: impl Into) -> Self { + Self::new(ErrorKind::Provider, message) + } +} + +impl std::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Connection => write!(f, "connection"), + Self::NotFound => write!(f, "not found"), + Self::InvalidInput => write!(f, "invalid input"), + Self::Provider => write!(f, "provider"), + } + } +} + +impl From for nvisy_core::Error { + fn from(err: Error) -> Self { + let kind = match err.kind { + ErrorKind::Connection => nvisy_core::ErrorKind::NetworkError, + ErrorKind::NotFound => nvisy_core::ErrorKind::NotFound, + ErrorKind::InvalidInput => nvisy_core::ErrorKind::InvalidInput, + ErrorKind::Provider => nvisy_core::ErrorKind::ExternalError, + }; + + nvisy_core::Error::new(kind) + .with_message(&err.message) + .with_source(err) + } +} + +impl From for Error { + fn from(err: serde_json::Error) -> Self { + Error::new(ErrorKind::InvalidInput, err.to_string()) + } +} diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs new file mode 100644 index 0000000..4949cd5 --- /dev/null +++ b/crates/nvisy-dal/src/lib.rs @@ -0,0 +1,29 @@ +//! Data Abstraction Layer for workflow inputs and outputs. +//! +//! This crate provides a unified interface for reading and writing data +//! across various storage backends. +//! +//! # Architecture +//! +//! The DAL is split into two parts: +//! - **Rust**: Streaming, observability, unified interface, server integration +//! - **Python**: Provider implementations, client libraries, external integrations + +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] + +pub mod core; +pub mod provider; + +mod python; + +mod error; + +pub use core::{ + AnyContext, AnyDataValue, DataInput, DataOutput, DataType, Document, Edge, Embedding, Graph, + InputStream, ItemSink, ItemStream, Message, Metadata, Node, Object, ObjectContext, + OutputStream, Provider, Record, RelationalContext, VectorContext, +}; + +pub use error::{BoxError, Error, ErrorKind, Result}; +pub use provider::{AnyCredentials, AnyParams, AnyProvider}; diff --git a/crates/nvisy-dal/src/provider/mod.rs b/crates/nvisy-dal/src/provider/mod.rs new file mode 100644 index 0000000..9db8751 --- /dev/null +++ b/crates/nvisy-dal/src/provider/mod.rs @@ -0,0 +1,65 @@ +//! Provider implementations for external services. +//! +//! Each provider module exports credentials and params types +//! along with the main provider struct. +//! +//! Data types for input/output are in the `core` module: +//! - `Record` for PostgreSQL rows +//! - `Object` for S3 objects +//! - `Embedding` for Pinecone vectors +//! +//! Context types for pagination are in the `core` module: +//! - `RelationalContext` for relational databases +//! - `ObjectContext` for object storage +//! - `VectorContext` for vector databases +//! +//! Available providers: +//! - `postgres`: PostgreSQL relational database +//! - `s3`: AWS S3 / MinIO object storage +//! - `pinecone`: Pinecone vector database + +use derive_more::From; +use serde::{Deserialize, Serialize}; + +mod pinecone; +mod postgres; +mod s3; + +pub use self::pinecone::{PineconeCredentials, PineconeParams, PineconeProvider}; +pub use self::postgres::{PostgresCredentials, PostgresParams, PostgresProvider}; +pub use self::s3::{S3Credentials, S3Params, S3Provider}; + +/// Type-erased credentials for any provider. +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyCredentials { + /// PostgreSQL credentials. + Postgres(PostgresCredentials), + /// S3 credentials. + S3(S3Credentials), + /// Pinecone credentials. + Pinecone(PineconeCredentials), +} + +/// Type-erased parameters for any provider. +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyParams { + /// PostgreSQL parameters. + Postgres(PostgresParams), + /// S3 parameters. + S3(S3Params), + /// Pinecone parameters. + Pinecone(PineconeParams), +} + +/// Type-erased provider instance. +#[derive(Debug, From)] +pub enum AnyProvider { + /// PostgreSQL provider. + Postgres(PostgresProvider), + /// S3 provider. + S3(S3Provider), + /// Pinecone provider. + Pinecone(PineconeProvider), +} diff --git a/crates/nvisy-dal/src/provider/pinecone.rs b/crates/nvisy-dal/src/provider/pinecone.rs new file mode 100644 index 0000000..e78ba1d --- /dev/null +++ b/crates/nvisy-dal/src/provider/pinecone.rs @@ -0,0 +1,67 @@ +//! Pinecone vector database provider. +//! +//! Provides vector upsert operations for the Pinecone vector database. + +use serde::{Deserialize, Serialize}; + +use crate::Result; +use crate::core::{DataOutput, Embedding, Provider}; +use crate::python::{self, PyDataOutput, PyProvider}; + +/// Credentials for Pinecone connection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PineconeCredentials { + /// Pinecone API key. + pub api_key: String, +} + +/// Parameters for Pinecone operations. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PineconeParams { + /// Index name. + pub index_name: String, + /// Namespace within the index. + pub namespace: String, +} + +/// Pinecone provider for vector upsert operations. +pub struct PineconeProvider { + inner: PyProvider, + output: PyDataOutput, +} + +#[async_trait::async_trait] +impl Provider for PineconeProvider { + type Credentials = PineconeCredentials; + type Params = PineconeParams; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let inner = python::connect("pinecone", credentials, params).await?; + Ok(Self { + output: inner.as_data_output(), + inner, + }) + } + + async fn disconnect(self) -> nvisy_core::Result<()> { + self.inner.disconnect().await.map_err(Into::into) + } +} + +#[async_trait::async_trait] +impl DataOutput for PineconeProvider { + type Item = Embedding; + + async fn write(&self, items: Vec) -> Result<()> { + self.output.write(items).await + } +} + +impl std::fmt::Debug for PineconeProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PineconeProvider").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/provider/postgres.rs b/crates/nvisy-dal/src/provider/postgres.rs new file mode 100644 index 0000000..d7bcc82 --- /dev/null +++ b/crates/nvisy-dal/src/provider/postgres.rs @@ -0,0 +1,89 @@ +//! PostgreSQL provider. +//! +//! Provides relational data operations using a connection pool. + +use serde::{Deserialize, Serialize}; + +use crate::Result; +use crate::core::{ + DataInput, DataOutput, InputStream, Provider, Record, RelationalContext, RelationalParams, +}; +use crate::python::{self, PyDataInput, PyDataOutput, PyProvider}; + +/// Credentials for PostgreSQL connection. +/// +/// Uses a connection string (DSN) format: `postgres://user:pass@host:port/database` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PostgresCredentials { + /// PostgreSQL connection string (DSN). + pub dsn: String, +} + +/// Parameters for PostgreSQL operations. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PostgresParams { + /// Schema name (defaults to "public"). + #[serde(default = "default_schema")] + pub schema: String, + /// Relational parameters (table, pagination). + #[serde(flatten)] + pub relational: RelationalParams, +} + +fn default_schema() -> String { + "public".to_string() +} + +/// PostgreSQL provider for relational data operations. +pub struct PostgresProvider { + inner: PyProvider, + input: PyDataInput, + output: PyDataOutput, +} + +#[async_trait::async_trait] +impl Provider for PostgresProvider { + type Credentials = PostgresCredentials; + type Params = PostgresParams; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let inner = python::connect("postgres", credentials, params).await?; + Ok(Self { + input: inner.as_data_input(), + output: inner.as_data_output(), + inner, + }) + } + + async fn disconnect(self) -> nvisy_core::Result<()> { + self.inner.disconnect().await.map_err(Into::into) + } +} + +#[async_trait::async_trait] +impl DataInput for PostgresProvider { + type Context = RelationalContext; + type Item = Record; + + async fn read(&self, ctx: &Self::Context) -> Result> { + self.input.read(ctx).await + } +} + +#[async_trait::async_trait] +impl DataOutput for PostgresProvider { + type Item = Record; + + async fn write(&self, items: Vec) -> Result<()> { + self.output.write(items).await + } +} + +impl std::fmt::Debug for PostgresProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PostgresProvider").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/provider/s3.rs b/crates/nvisy-dal/src/provider/s3.rs new file mode 100644 index 0000000..75ded4d --- /dev/null +++ b/crates/nvisy-dal/src/provider/s3.rs @@ -0,0 +1,86 @@ +//! S3 provider. +//! +//! Provides object storage operations for AWS S3 and S3-compatible services. + +use serde::{Deserialize, Serialize}; + +use crate::Result; +use crate::core::{DataInput, DataOutput, InputStream, Object, ObjectContext, Provider}; +use crate::python::{self, PyDataInput, PyDataOutput, PyProvider}; + +/// Credentials for S3 connection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3Credentials { + /// AWS access key ID. + pub access_key_id: String, + /// AWS secret access key. + pub secret_access_key: String, + /// AWS region. + pub region: String, + /// Custom endpoint URL (for MinIO, LocalStack, etc.). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub endpoint_url: Option, +} + +/// Parameters for S3 operations. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3Params { + /// Target bucket name. + pub bucket: String, + /// Key prefix for all operations. + pub prefix: String, +} + +/// S3 provider for object storage operations. +pub struct S3Provider { + inner: PyProvider, + input: PyDataInput, + output: PyDataOutput, +} + +#[async_trait::async_trait] +impl Provider for S3Provider { + type Credentials = S3Credentials; + type Params = S3Params; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let inner = python::connect("s3", credentials, params).await?; + Ok(Self { + input: inner.as_data_input(), + output: inner.as_data_output(), + inner, + }) + } + + async fn disconnect(self) -> nvisy_core::Result<()> { + self.inner.disconnect().await.map_err(Into::into) + } +} + +#[async_trait::async_trait] +impl DataInput for S3Provider { + type Context = ObjectContext; + type Item = Object; + + async fn read(&self, ctx: &Self::Context) -> Result> { + self.input.read(ctx).await + } +} + +#[async_trait::async_trait] +impl DataOutput for S3Provider { + type Item = Object; + + async fn write(&self, items: Vec) -> Result<()> { + self.output.write(items).await + } +} + +impl std::fmt::Debug for S3Provider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("S3Provider").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/python/error.rs b/crates/nvisy-dal/src/python/error.rs new file mode 100644 index 0000000..5730cd9 --- /dev/null +++ b/crates/nvisy-dal/src/python/error.rs @@ -0,0 +1,92 @@ +//! Error types for Python interop. + +use pyo3::PyErr; +use thiserror::Error; + +use crate::error::{Error, ErrorKind}; + +/// Result type for Python interop operations. +pub type PyResult = std::result::Result; + +/// Error type for Python interop operations. +#[derive(Debug, Error)] +#[error("{message}")] +pub struct PyError { + kind: PyErrorKind, + message: String, + #[source] + source: Option, +} + +#[derive(Debug, Clone, Copy)] +pub enum PyErrorKind { + /// Failed to initialize Python interpreter. + InitializationFailed, + /// Failed to import the nvisy_dal module. + ModuleNotFound, + /// Provider not found in the Python package. + ProviderNotFound, + /// Failed to call a Python method. + CallFailed, + /// Type conversion error between Rust and Python. + ConversionError, +} + +impl PyError { + pub fn new(kind: PyErrorKind, message: impl Into) -> Self { + Self { + kind, + message: message.into(), + source: None, + } + } + + pub fn with_source(mut self, source: PyErr) -> Self { + self.source = Some(source); + self + } + + pub fn initialization(message: impl Into) -> Self { + Self::new(PyErrorKind::InitializationFailed, message) + } + + pub fn module_not_found(message: impl Into) -> Self { + Self::new(PyErrorKind::ModuleNotFound, message) + } + + pub fn provider_not_found(name: &str) -> Self { + Self::new( + PyErrorKind::ProviderNotFound, + format!("Provider '{}' not found in nvisy_dal", name), + ) + } + + pub fn call_failed(message: impl Into) -> Self { + Self::new(PyErrorKind::CallFailed, message) + } + + pub fn conversion(message: impl Into) -> Self { + Self::new(PyErrorKind::ConversionError, message) + } +} + +impl From for PyError { + fn from(err: PyErr) -> Self { + Self::new(PyErrorKind::CallFailed, err.to_string()).with_source(err) + } +} + +impl From for Error { + fn from(err: PyError) -> Self { + let kind = match err.kind { + PyErrorKind::InitializationFailed | PyErrorKind::ModuleNotFound => { + ErrorKind::Connection + } + PyErrorKind::ProviderNotFound => ErrorKind::NotFound, + PyErrorKind::ConversionError => ErrorKind::InvalidInput, + PyErrorKind::CallFailed => ErrorKind::Provider, + }; + + Error::new(kind, err.message) + } +} diff --git a/crates/nvisy-dal/src/python/loader.rs b/crates/nvisy-dal/src/python/loader.rs new file mode 100644 index 0000000..ff5d0f3 --- /dev/null +++ b/crates/nvisy-dal/src/python/loader.rs @@ -0,0 +1,233 @@ +//! Python package loader for nvisy_dal providers. + +use std::sync::OnceLock; + +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList, PyModule}; + +use super::{PyError, PyProvider, PyResult}; + +/// Global reference to the nvisy_dal Python module. +static NVISY_DAL_MODULE: OnceLock> = OnceLock::new(); + +/// Loader for Python-based data providers. +/// +/// Handles initialization of the Python interpreter and loading +/// of provider classes from the `nvisy_dal` package. +#[derive(Debug)] +pub struct PyProviderLoader { + _private: (), +} + +impl PyProviderLoader { + /// Creates a new provider loader. + /// + /// Initializes the Python interpreter if not already done. + pub fn new() -> PyResult { + // Ensure Python is initialized (pyo3 auto-initialize feature handles this) + Self::ensure_module_loaded()?; + Ok(Self { _private: () }) + } + + /// Ensures the nvisy_dal module is loaded and cached. + fn ensure_module_loaded() -> PyResult<()> { + if NVISY_DAL_MODULE.get().is_some() { + return Ok(()); + } + + Python::attach(|py| { + let module = py.import("nvisy_dal").map_err(|e| { + PyError::module_not_found("Failed to import nvisy_dal").with_source(e) + })?; + + // Store a reference to the module + let _ = NVISY_DAL_MODULE.set(module.unbind()); + Ok(()) + }) + } + + /// Loads a provider by name and connects with pre-serialized JSON values. + /// + /// # Arguments + /// + /// * `name` - Provider name (e.g., "qdrant", "pinecone", "s3") + /// * `credentials` - JSON credentials + /// * `params` - JSON connection parameters + pub async fn load( + &self, + name: &str, + credentials: serde_json::Value, + params: serde_json::Value, + ) -> PyResult { + let name = name.to_owned(); + + // Get the provider class and prepare arguments + let (provider_class, creds_dict, params_dict) = Python::attach(|py| { + let module = self.get_module(py)?; + + // Import the specific provider module + let providers_mod = module + .getattr("providers") + .map_err(|e| PyError::module_not_found("providers").with_source(e))?; + let provider_mod = providers_mod + .getattr(name.as_str()) + .map_err(|e| PyError::provider_not_found(&name).with_source(e))?; + + // Get the Provider class + let provider_class = provider_mod + .getattr("Provider") + .map_err(|e| PyError::provider_not_found(&name).with_source(e))?; + + // Convert credentials and params to Python dicts + let creds_dict = json_to_pydict(py, &credentials)?; + let params_dict = json_to_pydict(py, ¶ms)?; + + Ok::<_, PyError>(( + provider_class.unbind(), + creds_dict.unbind(), + params_dict.unbind(), + )) + })?; + + // Call the async connect method + let coro = Python::attach(|py| { + let provider_class = provider_class.bind(py); + let creds = creds_dict.bind(py); + let params = params_dict.bind(py); + + let coro = provider_class.call_method1("connect", (creds, params))?; + pyo3_async_runtimes::tokio::into_future(coro) + })?; + + let instance = coro.await.map_err(PyError::from)?; + Ok(PyProvider::new(instance)) + } + + fn get_module<'py>(&self, py: Python<'py>) -> PyResult> { + NVISY_DAL_MODULE + .get() + .map(|m| m.bind(py).clone()) + .ok_or_else(|| PyError::module_not_found("nvisy_dal module not loaded")) + } +} + +impl Default for PyProviderLoader { + fn default() -> Self { + Self::new().expect("Failed to initialize PyProviderLoader") + } +} + +/// Converts a serde_json::Value to a Python dict. +pub fn json_to_pydict<'py>( + py: Python<'py>, + value: &serde_json::Value, +) -> PyResult> { + let dict = PyDict::new(py); + + if let serde_json::Value::Object(map) = value { + for (key, val) in map { + let py_val = json_to_pyobject(py, val)?; + dict.set_item(key, py_val) + .map_err(|e| PyError::conversion("Failed to set dict item").with_source(e))?; + } + } + + Ok(dict) +} + +/// Converts a serde_json::Value to a Python object. +pub fn json_to_pyobject<'py>( + py: Python<'py>, + value: &serde_json::Value, +) -> PyResult> { + let obj: Bound<'py, PyAny> = match value { + serde_json::Value::Null => py.None().into_bound(py), + serde_json::Value::Bool(b) => (*b) + .into_pyobject(py) + .expect("infallible") + .to_owned() + .into_any(), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + i.into_pyobject(py) + .expect("infallible") + .to_owned() + .into_any() + } else if let Some(f) = n.as_f64() { + f.into_pyobject(py) + .expect("infallible") + .to_owned() + .into_any() + } else { + return Err(PyError::conversion("Unsupported number type")); + } + } + serde_json::Value::String(s) => { + s.as_str().into_pyobject(py).expect("infallible").into_any() + } + serde_json::Value::Array(arr) => { + let list = PyList::empty(py); + for item in arr { + let py_item = json_to_pyobject(py, item)?; + list.append(py_item) + .map_err(|e| PyError::conversion("Failed to append to list").with_source(e))?; + } + list.into_any() + } + serde_json::Value::Object(_) => json_to_pydict(py, value)?.into_any(), + }; + + Ok(obj) +} + +/// Converts a Python object to a serde_json::Value. +pub fn pyobject_to_json(obj: &Bound<'_, PyAny>) -> PyResult { + if obj.is_none() { + return Ok(serde_json::Value::Null); + } + + if let Ok(b) = obj.extract::() { + return Ok(serde_json::Value::Bool(b)); + } + + if let Ok(i) = obj.extract::() { + return Ok(serde_json::json!(i)); + } + + if let Ok(f) = obj.extract::() { + return Ok(serde_json::json!(f)); + } + + if let Ok(s) = obj.extract::() { + return Ok(serde_json::Value::String(s)); + } + + if let Ok(list) = obj.cast::() { + let mut arr = Vec::new(); + for item in list.iter() { + arr.push(pyobject_to_json(item.as_any())?); + } + return Ok(serde_json::Value::Array(arr)); + } + + if let Ok(dict) = obj.cast::() { + let mut map = serde_json::Map::new(); + for (key, value) in dict.iter() { + let key_str: String = key + .extract() + .map_err(|e| PyError::conversion("Dict key must be string").with_source(e))?; + map.insert(key_str, pyobject_to_json(&value)?); + } + return Ok(serde_json::Value::Object(map)); + } + + let type_name = obj + .get_type() + .name() + .map(|s| s.to_string()) + .unwrap_or_else(|_| "unknown".to_string()); + Err(PyError::conversion(format!( + "Unsupported Python type: {}", + type_name + ))) +} diff --git a/crates/nvisy-dal/src/python/mod.rs b/crates/nvisy-dal/src/python/mod.rs new file mode 100644 index 0000000..7b6a8f6 --- /dev/null +++ b/crates/nvisy-dal/src/python/mod.rs @@ -0,0 +1,37 @@ +//! Python interop for data providers. +//! +//! This module provides integration with the `nvisy_dal` Python package, +//! allowing Rust code to load and use Python-based providers. + +mod error; +mod loader; +mod provider; + +pub(crate) use error::{PyError, PyResult}; +pub(crate) use loader::PyProviderLoader; +pub(crate) use provider::{PyDataInput, PyDataOutput, PyProvider}; + +/// Connects to a Python provider by name with the given credentials and parameters. +/// +/// # Arguments +/// +/// * `name` - Provider name (e.g., "postgres", "pinecone", "s3") +/// * `credentials` - Serializable credentials +/// * `params` - Serializable connection parameters +pub(crate) async fn connect( + name: &str, + credentials: C, + params: P, +) -> crate::Result +where + C: serde::Serialize, + P: serde::Serialize, +{ + let loader = PyProviderLoader::new().map_err(crate::Error::from)?; + let creds_json = serde_json::to_value(credentials).map_err(crate::Error::from)?; + let params_json = serde_json::to_value(params).map_err(crate::Error::from)?; + loader + .load(name, creds_json, params_json) + .await + .map_err(crate::Error::from) +} diff --git a/crates/nvisy-dal/src/python/provider.rs b/crates/nvisy-dal/src/python/provider.rs new file mode 100644 index 0000000..0a6f67e --- /dev/null +++ b/crates/nvisy-dal/src/python/provider.rs @@ -0,0 +1,202 @@ +//! Python provider wrapper implementing Rust traits. + +use std::marker::PhantomData; + +use async_stream::try_stream; +use futures::Stream; +use pyo3::prelude::*; + +use super::PyError; +use super::loader::pyobject_to_json; +use crate::Result; +use crate::core::{DataInput, DataOutput, InputStream}; + +/// A wrapper around a Python provider instance. +/// +/// Implements the Rust `DataInput` and `DataOutput` traits by delegating +/// to the underlying Python provider's `read` and `write` methods. +pub struct PyProvider { + instance: Py, +} + +impl PyProvider { + /// Creates a new PyProvider from a connected Python provider instance. + pub fn new(instance: Py) -> Self { + Self { instance } + } + + /// Clones the underlying Python object reference. + pub fn clone_py_object(&self) -> Py { + Python::attach(|py| self.instance.clone_ref(py)) + } + + /// Creates a typed `DataInput` wrapper from this provider. + pub fn as_data_input(&self) -> PyDataInput { + PyDataInput::new(Self::new(self.clone_py_object())) + } + + /// Creates a typed `DataOutput` wrapper from this provider. + pub fn as_data_output(&self) -> PyDataOutput { + PyDataOutput::new(Self::new(self.clone_py_object())) + } + + /// Disconnects the provider. + pub async fn disconnect(&self) -> Result<()> { + let coro = Python::attach(|py| { + let coro = self + .instance + .bind(py) + .call_method0("disconnect") + .map_err(|e| PyError::call_failed(format!("Failed to call disconnect: {}", e)))?; + pyo3_async_runtimes::tokio::into_future(coro) + .map_err(|e| PyError::call_failed(format!("Failed to convert to future: {}", e))) + })?; + + coro.await + .map_err(|e| PyError::call_failed(format!("Failed to disconnect: {}", e)))?; + + Ok(()) + } +} + +impl std::fmt::Debug for PyProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PyProvider").finish_non_exhaustive() + } +} + +/// Typed wrapper for Python providers implementing DataInput. +pub struct PyDataInput { + provider: PyProvider, + _marker: PhantomData<(T, Ctx)>, +} + +impl PyDataInput { + /// Creates a new typed input wrapper. + pub fn new(provider: PyProvider) -> Self { + Self { + provider, + _marker: PhantomData, + } + } +} + +#[async_trait::async_trait] +impl DataInput for PyDataInput +where + T: for<'de> serde::Deserialize<'de> + Send + Sync + 'static, + Ctx: serde::Serialize + Send + Sync, +{ + type Context = Ctx; + type Item = T; + + async fn read(&self, ctx: &Self::Context) -> Result> { + let ctx_json = serde_json::to_value(ctx) + .map_err(|e| PyError::conversion(format!("Failed to serialize context: {}", e)))?; + + // Call Python read method which returns an async iterator + let coro = Python::attach(|py| { + let bound = self.provider.instance.bind(py); + let ctx_dict = super::loader::json_to_pydict(py, &ctx_json)?; + let coro = bound + .call_method1("read", (ctx_dict,)) + .map_err(|e| PyError::call_failed(format!("Failed to call read: {}", e)))?; + pyo3_async_runtimes::tokio::into_future(coro) + .map_err(|e| PyError::call_failed(format!("Failed to convert to future: {}", e))) + })?; + + let py_iterator = coro + .await + .map_err(|e| PyError::call_failed(format!("Failed to call read: {}", e)))?; + + // Create a stream that pulls from the Python async iterator + let stream = py_async_iterator_to_stream::(py_iterator); + Ok(InputStream::new(Box::pin(stream))) + } +} + +/// Typed wrapper for Python providers implementing DataOutput. +pub struct PyDataOutput { + provider: PyProvider, + _marker: PhantomData, +} + +impl PyDataOutput { + /// Creates a new typed output wrapper. + pub fn new(provider: PyProvider) -> Self { + Self { + provider, + _marker: PhantomData, + } + } +} + +#[async_trait::async_trait] +impl DataOutput for PyDataOutput +where + T: serde::Serialize + Send + Sync, +{ + type Item = T; + + async fn write(&self, items: Vec) -> Result<()> { + let items_json = serde_json::to_value(&items) + .map_err(|e| PyError::conversion(format!("Failed to serialize items: {}", e)))?; + + let coro = Python::attach(|py| { + let bound = self.provider.instance.bind(py); + let items_list = super::loader::json_to_pyobject(py, &items_json)?; + let coro = bound + .call_method1("write", (items_list,)) + .map_err(|e| PyError::call_failed(format!("Failed to call write: {}", e)))?; + pyo3_async_runtimes::tokio::into_future(coro) + .map_err(|e| PyError::call_failed(format!("Failed to convert to future: {}", e))) + })?; + + coro.await + .map_err(|e| PyError::call_failed(format!("Failed to call write: {}", e)))?; + + Ok(()) + } +} + +/// Converts a Python async iterator to a Rust Stream. +fn py_async_iterator_to_stream(iterator: Py) -> impl Stream> +where + T: for<'de> serde::Deserialize<'de> + Send + 'static, +{ + try_stream! { + loop { + // Get the next coroutine from __anext__ + let next_coro = Python::attach(|py| { + let bound = iterator.bind(py); + match bound.call_method0("__anext__") { + Ok(coro) => { + let future = pyo3_async_runtimes::tokio::into_future(coro)?; + Ok(Some(future)) + } + Err(e) => { + if e.is_instance_of::(py) { + Ok(None) + } else { + Err(PyError::from(e)) + } + } + } + })?; + + let Some(coro) = next_coro else { + break; + }; + + // Await the coroutine + let result = coro.await.map_err(PyError::from)?; + + // Convert result to Rust type + let json_value = Python::attach(|py| pyobject_to_json(result.bind(py)))?; + let item: T = serde_json::from_value(json_value) + .map_err(|e| PyError::conversion(format!("Failed to deserialize item: {}", e)))?; + + yield item; + } + } +} diff --git a/crates/nvisy-nats/Cargo.toml b/crates/nvisy-nats/Cargo.toml index d68c982..5f43ba0 100644 --- a/crates/nvisy-nats/Cargo.toml +++ b/crates/nvisy-nats/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-nats" +description = "Task-focused NATS client for the Nvisy platform with comprehensive JetStream support" +readme = "./README.md" +keywords = ["nats", "messaging", "jetstream", "pubsub", "streaming"] +categories = ["network-programming", "api-bindings"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } @@ -22,6 +26,9 @@ rustdoc-args = ["--cfg", "docsrs"] # Default feature set (none for minimal dependencies) default = [] +# JSON Schema support: enables JsonSchema derives on Nats types +# This allows Nats types to be used directly in API documentation without +# manual schema implementations, while keeping the dependency optional schema = ["dep:schemars"] # CLI configuration support: enables clap derives for config types @@ -34,8 +41,8 @@ async-nats = { workspace = true } # Async runtime tokio = { workspace = true, features = ["rt-multi-thread", "macros", "time"] } +pin-project-lite = { workspace = true, features = [] } futures = { workspace = true, features = [] } -async-stream = { workspace = true, features = [] } # Observability tracing = { workspace = true, features = [] } @@ -44,28 +51,21 @@ tracing = { workspace = true, features = [] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = [] } -# Error handling -thiserror = { workspace = true, features = [] } - # Derive macros & utilities +thiserror = { workspace = true, features = [] } derive_more = { workspace = true, features = ["deref", "deref_mut", "from", "into", "display"] } -strum = { workspace = true, features = [] } # Data types uuid = { workspace = true, features = ["serde", "v4", "v7"] } jiff = { workspace = true, features = ["serde"] } -bytes = { workspace = true, features = [] } # Cryptography sha2 = { workspace = true, features = [] } hex = { workspace = true, features = [] } +base64 = { workspace = true, features = [] } -# Utilities -pin-project-lite = { workspace = true, features = [] } - -# Optional: JSON schema generation +# JSON schema generation (Optional) schemars = { workspace = true, features = [], optional = true } -# CLI (optional) +# CLI (Optional) clap = { workspace = true, features = ["derive", "env"], optional = true } -base64.workspace = true diff --git a/crates/nvisy-nats/README.md b/crates/nvisy-nats/README.md index c0103ae..d6966d2 100644 --- a/crates/nvisy-nats/README.md +++ b/crates/nvisy-nats/README.md @@ -1,11 +1,10 @@ # nvisy-nats +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Task-focused NATS client for the Nvisy platform with comprehensive JetStream support and unified streaming infrastructure. -[![Rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![NATS](https://img.shields.io/badge/NATS-JetStream-000000?style=flat-square&logo=nats&logoColor=white)](https://nats.io/) - ## Features - **Type-Safe Operations** - Generic KV store with compile-time type safety @@ -33,3 +32,18 @@ The crate provides specialized modules for common NATS use cases: All modules maintain type safety through generic parameters and provide access to the underlying NATS client for extensibility. + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-nats/src/client/mod.rs b/crates/nvisy-nats/src/client/mod.rs index 05526a5..0d2c51b 100644 --- a/crates/nvisy-nats/src/client/mod.rs +++ b/crates/nvisy-nats/src/client/mod.rs @@ -3,5 +3,5 @@ mod nats_client; mod nats_config; -pub use nats_client::{NatsClient, NatsConnection}; +pub use nats_client::NatsClient; pub use nats_config::NatsConfig; diff --git a/crates/nvisy-nats/src/client/nats_client.rs b/crates/nvisy-nats/src/client/nats_client.rs index 5454b35..64f3e58 100644 --- a/crates/nvisy-nats/src/client/nats_client.rs +++ b/crates/nvisy-nats/src/client/nats_client.rs @@ -33,16 +33,19 @@ use std::sync::Arc; use std::time::Duration; use async_nats::{Client, ConnectOptions, jetstream}; -use bytes::Bytes; +use serde::Serialize; +use serde::de::DeserializeOwned; use tokio::time::timeout; use super::nats_config::NatsConfig; -use crate::kv::{ApiTokenStore, CacheStore, ChatHistoryStore}; -use crate::object::{DocumentBucket, DocumentStore}; -use crate::stream::{ - DocumentJobPublisher, DocumentJobSubscriber, Stage, WorkspaceEventPublisher, - WorkspaceEventSubscriber, +use crate::kv::{ + ApiToken, ApiTokensBucket, ChatHistoryBucket, KvBucket, KvKey, KvStore, SessionKey, TokenKey, }; +use crate::object::{ + AccountKey, AvatarsBucket, FileKey, FilesBucket, IntermediatesBucket, ObjectBucket, ObjectKey, + ObjectStore, ThumbnailsBucket, +}; +use crate::stream::{EventPublisher, EventStream, EventSubscriber, FileStream, WebhookStream}; use crate::{Error, Result, TRACING_TARGET_CLIENT, TRACING_TARGET_CONNECTION}; /// NATS client wrapper with connection management. @@ -160,226 +163,155 @@ impl NatsClient { async_nats::connection::State::Connected ) } +} - /// Get or create an ApiTokenStore +// Key-value store getters +impl NatsClient { + /// Get or create a KV store for the specified key, value, and bucket types. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn api_token_store(&self, ttl: Option) -> Result { - ApiTokenStore::new(&self.inner.jetstream, ttl).await + pub async fn kv_store(&self) -> Result> + where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, + { + KvStore::new(&self.inner.jetstream).await } - /// Get or create a document store for the specified bucket type. + /// Get or create a KV store with custom TTL. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn document_store(&self) -> Result> { - DocumentStore::new(&self.inner.jetstream).await + pub async fn kv_store_with_ttl(&self, ttl: Duration) -> Result> + where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, + { + KvStore::with_ttl(&self.inner.jetstream, ttl).await } - /// Create a document job publisher for a specific stage. + /// Get or create an API token store. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn document_job_publisher(&self) -> Result> { - DocumentJobPublisher::new(&self.inner.jetstream).await + pub async fn api_token_store( + &self, + ttl: Duration, + ) -> Result> { + self.kv_store_with_ttl(ttl).await } - /// Create a document job subscriber for a specific stage. + /// Get or create a chat history store with default TTL. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn document_job_subscriber( - &self, - consumer_name: &str, - ) -> Result> { - DocumentJobSubscriber::new(&self.inner.jetstream, consumer_name).await + pub async fn chat_history_store(&self) -> Result> + where + V: Serialize + DeserializeOwned + Send + Sync + 'static, + { + self.kv_store().await } - /// Create a workspace event publisher. + /// Get or create a chat history store with custom TTL. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn workspace_event_publisher(&self) -> Result { - WorkspaceEventPublisher::new(&self.inner.jetstream).await + pub async fn chat_history_store_with_ttl( + &self, + ttl: Duration, + ) -> Result> + where + V: Serialize + DeserializeOwned + Send + Sync + 'static, + { + self.kv_store_with_ttl(ttl).await } +} - /// Create a workspace event subscriber. +// Object store getters +impl NatsClient { + /// Get or create an object store for the specified bucket and key types. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn workspace_event_subscriber( - &self, - consumer_name: &str, - ) -> Result { - WorkspaceEventSubscriber::new(&self.inner.jetstream, consumer_name).await + pub async fn object_store(&self) -> Result> + where + B: ObjectBucket, + K: ObjectKey, + { + ObjectStore::new(&self.inner.jetstream).await } - /// Create a workspace event subscriber filtered to a specific workspace. + /// Get or create a file store for primary file storage. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn workspace_event_subscriber_for_workspace( - &self, - consumer_name: &str, - workspace_id: uuid::Uuid, - ) -> Result { - WorkspaceEventSubscriber::new_for_workspace( - &self.inner.jetstream, - consumer_name, - workspace_id, - ) - .await + pub async fn file_store(&self) -> Result> { + self.object_store().await } - /// Get or create a CacheStore for a specific namespace + /// Get or create an intermediates store for temporary processing artifacts. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn cache_store( - &self, - namespace: &str, - ttl: Option, - ) -> Result> - where - T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone + Send + Sync + 'static, - { - CacheStore::new(&self.inner.jetstream, namespace, ttl).await + pub async fn intermediates_store(&self) -> Result> { + self.object_store().await } - /// Get or create a ChatHistoryStore for ephemeral sessions. + /// Get or create a thumbnail store for document thumbnails. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn chat_history_store(&self, ttl: Option) -> Result> - where - T: serde::Serialize + for<'de> serde::Deserialize<'de> + Send + Sync + 'static, - { - match ttl { - Some(ttl) => ChatHistoryStore::with_ttl(&self.inner.jetstream, ttl).await, - None => ChatHistoryStore::new(&self.inner.jetstream).await, - } + pub async fn thumbnail_store(&self) -> Result> { + self.object_store().await } -} -/// A NATS connection wrapper for basic pub/sub operations -#[derive(Debug, Clone)] -pub struct NatsConnection { - client: Client, - request_timeout: Duration, + /// Get or create an avatar store for account avatars. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn avatar_store(&self) -> Result> { + self.object_store().await + } } -impl NatsConnection { - /// Publish a message to a subject - #[tracing::instrument(skip(self, payload))] - pub async fn publish(&self, subject: &str, payload: impl Into) -> Result<()> { - timeout( - self.request_timeout, - self.client.publish(subject.to_string(), payload.into()), - ) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::delivery_failed(subject, e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - "Published message" - ); - Ok(()) +// Stream getters +impl NatsClient { + /// Create an event publisher for the specified stream type. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn event_publisher(&self) -> Result> + where + T: Serialize + Send + Sync + 'static, + S: EventStream, + { + EventPublisher::new(&self.inner.jetstream).await } - /// Publish a message with a reply subject - #[tracing::instrument(skip(self, payload), target = TRACING_TARGET_CLIENT)] - pub async fn publish_with_reply( - &self, - subject: &str, - reply: &str, - payload: impl Into, - ) -> Result<()> { - timeout( - self.request_timeout, - self.client - .publish_with_reply(subject.to_string(), reply.to_string(), payload.into()), - ) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::delivery_failed(subject, e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - reply = %reply, - "Published message with reply" - ); - Ok(()) + /// Create an event subscriber for the specified stream type. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn event_subscriber(&self) -> Result> + where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, + { + EventSubscriber::new(&self.inner.jetstream).await } - /// Send a request and wait for a response - #[tracing::instrument(skip(self, payload), target = TRACING_TARGET_CLIENT)] - pub async fn request( - &self, - subject: &str, - payload: impl Into, - ) -> Result { - let response = timeout( - self.request_timeout, - self.client.request(subject.to_string(), payload.into()), - ) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::delivery_failed(subject, e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - payload_size = response.payload.len(), - "Received response for request" - ); - Ok(response) + /// Create a file job publisher. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn file_publisher(&self) -> Result> + where + T: Serialize + Send + Sync + 'static, + { + self.event_publisher().await } - /// Subscribe to a subject + /// Create a file job subscriber. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn subscribe(&self, subject: &str) -> Result { - let subscriber = self - .client - .subscribe(subject.to_string()) - .await - .map_err(|e| Error::Connection(Box::new(e)))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - "Subscribed to subject" - ); - Ok(subscriber) + pub async fn file_subscriber(&self) -> Result> + where + T: DeserializeOwned + Send + Sync + 'static, + { + self.event_subscriber().await } - /// Subscribe to a subject with a queue group + /// Create a webhook publisher. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn queue_subscribe( - &self, - subject: &str, - queue: &str, - ) -> Result { - let subscriber = self - .client - .queue_subscribe(subject.to_string(), queue.to_string()) - .await - .map_err(|e| Error::Connection(Box::new(e)))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - queue = %queue, - "Subscribed to subject with queue group" - ); - Ok(subscriber) + pub async fn webhook_publisher(&self) -> Result> + where + T: Serialize + Send + Sync + 'static, + { + self.event_publisher().await } - /// Flush pending messages + /// Create a webhook subscriber. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn flush(&self) -> Result<()> { - timeout(self.request_timeout, self.client.flush()) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::Connection(Box::new(e)))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - "Flushed pending messages" - ); - Ok(()) + pub async fn webhook_subscriber(&self) -> Result> + where + T: DeserializeOwned + Send + Sync + 'static, + { + self.event_subscriber().await } } diff --git a/crates/nvisy-nats/src/kv/api_token.rs b/crates/nvisy-nats/src/kv/api_token.rs index 6d848dd..6c82dd0 100644 --- a/crates/nvisy-nats/src/kv/api_token.rs +++ b/crates/nvisy-nats/src/kv/api_token.rs @@ -1,4 +1,4 @@ -//! API authentication token data structure. +//! API authentication token type. use std::time::Duration; @@ -20,8 +20,6 @@ pub enum ApiTokenType { } /// API authentication token data structure. -/// -/// Simplified token model for session management. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct ApiToken { /// Unique token identifier used for authentication @@ -166,8 +164,6 @@ mod tests { fn test_token_expiry() { let now = Timestamp::now(); let mut token = create_test_token(); - - // Set token to expire in the past token.expired_at = now .checked_sub(jiff::SignedDuration::from_secs(3600)) .unwrap(); @@ -184,7 +180,6 @@ mod tests { assert!(!token.is_valid()); assert!(token.is_deleted()); - assert!(token.deleted_at.is_some()); } #[test] @@ -197,41 +192,6 @@ mod tests { assert!(!token.can_be_refreshed()); } - #[test] - fn test_token_touch() { - let mut token = create_test_token(); - let original_last_used = token.last_used_at; - - // Small delay to ensure timestamp difference - std::thread::sleep(std::time::Duration::from_millis(10)); - - token.touch(); - assert!(token.last_used_at > original_last_used); - } - - #[test] - fn test_token_short_display() { - let token = create_test_token(); - let short_access = token.access_seq_short(); - - assert_eq!(short_access.len(), 11); // 8 chars + "..." - assert!(short_access.ends_with("...")); - } - - #[test] - fn test_is_expiring_soon() { - let now = Timestamp::now(); - let mut token = create_test_token(); - - // Set expiry to 10 minutes from now - token.expired_at = now - .checked_add(jiff::SignedDuration::from_secs(600)) - .unwrap(); - - assert!(token.is_expiring_soon(15)); // Within 15 minutes - assert!(!token.is_expiring_soon(5)); // Not within 5 minutes - } - #[test] fn test_api_token_type_serialization() { let web = ApiTokenType::Web; @@ -241,9 +201,5 @@ mod tests { let api = ApiTokenType::Api; let serialized = serde_json::to_string(&api).unwrap(); assert_eq!(serialized, "\"api\""); - - let cli = ApiTokenType::Cli; - let serialized = serde_json::to_string(&cli).unwrap(); - assert_eq!(serialized, "\"cli\""); } } diff --git a/crates/nvisy-nats/src/kv/api_token_store.rs b/crates/nvisy-nats/src/kv/api_token_store.rs deleted file mode 100644 index b00e715..0000000 --- a/crates/nvisy-nats/src/kv/api_token_store.rs +++ /dev/null @@ -1,355 +0,0 @@ -//! API token store operations using NATS KV. - -use std::time::Duration; - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; -use jiff::Timestamp; -use uuid::Uuid; - -use crate::kv::KvStore; -use crate::kv::api_token::{ApiToken, ApiTokenType}; -use crate::{Result, TRACING_TARGET_KV}; - -/// API token store for authentication token management. -/// -/// Provides operations for creating, retrieving, updating, and managing -/// API authentication tokens with automatic expiry handling. -#[derive(Deref, DerefMut)] -pub struct ApiTokenStore { - #[deref] - #[deref_mut] - store: KvStore, - default_ttl: Duration, -} - -impl ApiTokenStore { - /// Create a new API token store. - /// - /// # Arguments - /// * `jetstream` - JetStream context for NATS operations - /// * `ttl` - Default time-to-live for tokens (defaults to 24 hours) - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new(jetstream: &jetstream::Context, ttl: Option) -> Result { - let default_ttl = ttl.unwrap_or(Duration::from_secs(86400)); // 24 hours default - - let store = KvStore::new( - jetstream, - "api_tokens", - Some("API authentication tokens"), - Some(default_ttl), - ) - .await?; - - tracing::info!( - target: TRACING_TARGET_KV, - ttl_hours = default_ttl.as_secs() / 3600, - bucket = %store.bucket_name(), - "Created API token store" - ); - - Ok(Self { store, default_ttl }) - } - - /// Create and store a new API token. - /// - /// # Arguments - /// * `account_id` - Account UUID this token belongs to - /// * `token_type` - Type of token (web, mobile, api) - /// * `ip_address` - IP address where token originated - /// * `user_agent` - User agent string from client - /// * `ttl` - Token lifetime (uses default if None) - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn create_token( - &self, - account_id: Uuid, - token_type: ApiTokenType, - ip_address: String, - user_agent: String, - ttl: Option, - ) -> Result { - let token_ttl = ttl.unwrap_or(self.default_ttl); - let now = Timestamp::now(); - let expires_at = now - .checked_add(jiff::SignedDuration::from_secs(token_ttl.as_secs() as i64)) - .unwrap_or( - now.checked_add(jiff::SignedDuration::from_secs(86400)) - .unwrap_or(now), - ); - - let token = ApiToken { - access_seq: Uuid::new_v4(), - account_id, - ip_address, - user_agent, - token_type, - is_suspicious: false, - issued_at: now, - expired_at: expires_at, - last_used_at: Some(now), - deleted_at: None, - }; - - let token_key = token.access_seq.to_string(); - self.store.put(&token_key, &token).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - access_seq = %token.access_seq, - account_id = %token.account_id, - token_type = ?token.token_type, - expires_at = %token.expired_at, - ip_address = %token.ip_address, - "Created new API token" - ); - - Ok(token) - } - - /// Retrieve and validate an API token by access sequence. - /// - /// Returns None if token doesn't exist, is expired, or is deleted. - /// Does NOT automatically update last_used_at to avoid write amplification. - /// Use `touch_token()` separately if you need to update the timestamp. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get_token(&self, access_seq: &Uuid) -> Result> { - let token_key = access_seq.to_string(); - - match self.store.get(&token_key).await? { - Some(kv_token) => { - let token = kv_token.value; - - // Check if token is deleted - if token.is_deleted() { - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - deleted_at = ?token.deleted_at, - "Token is soft-deleted" - ); - return Ok(None); - } - - // Check if token is expired - if token.is_expired() { - tracing::warn!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - expired_at = %token.expired_at, - "Token has expired" - ); - - // Soft delete expired token - self.delete_token(access_seq).await?; - return Ok(None); - } - - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - account_id = %token.account_id, - last_used_at = ?token.last_used_at, - "Retrieved API token" - ); - - Ok(Some(token)) - } - None => { - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - "Token not found" - ); - Ok(None) - } - } - } - - /// Update the last_used_at timestamp for a token. - /// - /// Call this periodically (e.g., every 5 minutes) instead of on every access - /// to avoid write amplification while still tracking activity. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn touch_token(&self, access_seq: &Uuid) -> Result { - let token_key = access_seq.to_string(); - - if let Some(kv_token) = self.store.get(&token_key).await? { - let mut token = kv_token.value; - - if token.is_valid() { - token.touch(); - self.store.put(&token_key, &token).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - last_used_at = ?token.last_used_at, - "Updated token last_used_at" - ); - - return Ok(true); - } - } - - Ok(false) - } - - /// Mark a token as deleted (soft delete). - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete_token(&self, access_seq: &Uuid) -> Result<()> { - let token_key = access_seq.to_string(); - - if let Some(kv_token) = self.store.get(&token_key).await? { - let mut token = kv_token.value; - token.mark_deleted(); - - self.store.put(&token_key, &token).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - account_id = %token.account_id, - deleted_at = ?token.deleted_at, - "Soft-deleted API token" - ); - } - - Ok(()) - } - - /// Delete all tokens for a specific account. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete_account_tokens(&self, account_id: &Uuid) -> Result { - let all_keys = self.store.keys().await?; - let mut deleted_count = 0; - - for key in all_keys { - if let Ok(Some(kv_token)) = self.store.get(&key).await - && kv_token.value.account_id == *account_id - && !kv_token.value.is_deleted() - && let Ok(access_seq) = Uuid::parse_str(&key) - { - self.delete_token(&access_seq).await?; - deleted_count += 1; - } - } - - tracing::info!( - target: TRACING_TARGET_KV, - account_id = %account_id, - deleted_count = deleted_count, - "Deleted all account tokens" - ); - - Ok(deleted_count) - } - - /// Get all active tokens for a specific account. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get_account_tokens(&self, account_id: &Uuid) -> Result> { - let all_keys = self.store.keys().await?; - let mut tokens = Vec::new(); - - for key in all_keys { - if let Ok(Some(kv_token)) = self.store.get(&key).await { - let token = kv_token.value; - if token.account_id == *account_id && token.is_valid() { - tokens.push(token); - } - } - } - - // Sort by most recently used - tokens.sort_by(|a, b| { - b.last_used_at - .unwrap_or(b.issued_at) - .cmp(&a.last_used_at.unwrap_or(a.issued_at)) - }); - - tracing::debug!( - target: TRACING_TARGET_KV, - account_id = %account_id, - active_tokens = tokens.len(), - "Retrieved account tokens" - ); - - Ok(tokens) - } - - /// Mark a token as suspicious. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn mark_suspicious(&self, access_seq: &Uuid) -> Result { - let token_key = access_seq.to_string(); - - if let Some(kv_token) = self.store.get(&token_key).await? { - let mut token = kv_token.value; - - if !token.is_suspicious { - token.mark_suspicious(); - self.store.put(&token_key, &token).await?; - - tracing::warn!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - account_id = %token.account_id, - "Marked token as suspicious" - ); - - return Ok(true); - } - } - - Ok(false) - } - - /// Clean up expired and deleted tokens (maintenance operation). - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn cleanup_expired(&self) -> Result { - let all_keys = self.store.keys().await?; - let mut cleaned_count = 0; - let now = Timestamp::now(); - - // Define cleanup threshold (delete tokens that have been soft-deleted for more than 7 days) - let cleanup_threshold = now - .checked_sub(jiff::SignedDuration::from_secs(7 * 24 * 3600)) - .unwrap_or(now); - - for key in all_keys { - if let Ok(Some(kv_token)) = self.store.get(&key).await { - let token = kv_token.value; - - // Hard delete tokens that have been soft-deleted for more than the threshold - if let Some(deleted_at) = token.deleted_at - && deleted_at < cleanup_threshold - { - self.store.delete(&key).await?; - cleaned_count += 1; - continue; - } - - // Hard delete expired tokens that haven't been accessed in the threshold period - if token.is_expired() { - let last_activity = token.last_used_at.unwrap_or(token.issued_at); - if last_activity < cleanup_threshold { - self.store.delete(&key).await?; - cleaned_count += 1; - } - } - } - } - - tracing::info!( - target: TRACING_TARGET_KV, - cleaned_count = cleaned_count, - cleanup_threshold = %cleanup_threshold, - "Cleaned up expired tokens" - ); - - Ok(cleaned_count) - } - - /// Get the default TTL for tokens. - pub fn default_ttl(&self) -> Duration { - self.default_ttl - } -} diff --git a/crates/nvisy-nats/src/kv/cache.rs b/crates/nvisy-nats/src/kv/cache.rs deleted file mode 100644 index 29247fc..0000000 --- a/crates/nvisy-nats/src/kv/cache.rs +++ /dev/null @@ -1,324 +0,0 @@ -//! Type-safe generic caching using NATS KV store. - -use std::marker::PhantomData; -use std::time::Duration; - -use async_nats::jetstream; -use serde::Serialize; -use serde::de::DeserializeOwned; - -use super::KvStore; -use crate::{Result, TRACING_TARGET_KV}; - -/// Type-safe generic cache store wrapper around KvStore. -/// -/// Provides cache-specific semantics and operations while maintaining -/// compile-time type safety for cached values of type T. -#[derive(Clone)] -pub struct CacheStore { - store: KvStore, - namespace: String, - _marker: PhantomData, -} - -impl CacheStore -where - T: Serialize + DeserializeOwned + Clone + Send + Sync + 'static, -{ - /// Create a new type-safe cache store for the given namespace. - /// - /// # Arguments - /// * `jetstream` - JetStream context for NATS operations - /// * `namespace` - Cache namespace (becomes part of bucket name) - /// * `ttl` - Optional time-to-live for cache entries - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new( - jetstream: &jetstream::Context, - namespace: &str, - ttl: Option, - ) -> Result { - let bucket_name = format!("cache_{}", namespace); - let description = format!("Type-safe cache for {}", namespace); - - let store = KvStore::new(jetstream, &bucket_name, Some(&description), ttl).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - namespace = %namespace, - bucket = %bucket_name, - ttl_secs = ttl.map(|d| d.as_secs()), - type_name = std::any::type_name::(), - "Created type-safe cache store" - ); - - Ok(Self { - store, - namespace: namespace.to_string(), - _marker: PhantomData, - }) - } - - /// Set a value in the cache. - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn set(&self, key: &str, value: &T) -> Result<()> { - self.store.set(key, value).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Cached value" - ); - Ok(()) - } - - /// Get a value from the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get(&self, key: &str) -> Result> { - let result = self.store.get_value(key).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - cache_hit = result.is_some(), - "Retrieved cached value" - ); - Ok(result) - } - - /// Delete a value from the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete(&self, key: &str) -> Result<()> { - self.store.delete(key).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Deleted cached value" - ); - Ok(()) - } - - /// Check if a key exists in the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn exists(&self, key: &str) -> Result { - self.store.exists(key).await - } - - /// Get or compute a value using the cache-aside pattern. - /// - /// If the key exists in cache, returns the cached value. - /// If not, computes the value using the provided function, - /// stores it in cache, and returns it. - #[tracing::instrument(skip(self, compute_fn), target = TRACING_TARGET_KV)] - pub async fn get_or_compute(&self, key: &str, compute_fn: F) -> Result - where - F: FnOnce() -> Fut + Send, - Fut: std::future::Future> + Send, - { - // Check cache first - if let Some(cached) = self.get(key).await? { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Cache hit" - ); - return Ok(cached); - } - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Cache miss, computing value" - ); - - // Compute new value - let value = compute_fn().await?; - - // Store in cache - self.set(key, &value).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Computed and cached new value" - ); - - Ok(value) - } - - /// Set multiple values in the cache as a batch operation. - #[tracing::instrument(skip(self, items), target = TRACING_TARGET_KV)] - pub async fn set_batch(&self, items: &[(&str, &T)]) -> Result<()> { - self.store.put_batch(items).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - count = items.len(), - namespace = %self.namespace, - "Batch cached values" - ); - Ok(()) - } - - /// Get multiple values from the cache as a batch operation. - #[tracing::instrument(skip(self, keys), target = TRACING_TARGET_KV)] - pub async fn get_batch(&self, keys: &[&str]) -> Result>> { - let kv_results = self.store.get_batch(keys).await?; - let mut results = Vec::with_capacity(keys.len()); - - for key in keys { - if let Some(kv_value) = kv_results.get(*key) { - results.push(Some(kv_value.value.clone())); - } else { - results.push(None); - } - } - - let hit_count = results.iter().filter(|r| r.is_some()).count(); - tracing::debug!( - target: TRACING_TARGET_KV, - requested = keys.len(), - found = hit_count, - hit_rate = format!("{:.1}%", (hit_count as f64 / keys.len() as f64) * 100.0), - namespace = %self.namespace, - "Batch retrieved cached values" - ); - - Ok(results) - } - - /// Clear all entries from the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn clear(&self) -> Result<()> { - self.store.purge_all().await?; - tracing::info!( - target: TRACING_TARGET_KV, - namespace = %self.namespace, - bucket = %self.store.bucket_name(), - "Cleared all cache entries" - ); - Ok(()) - } - - /// Get all keys currently in the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn keys(&self) -> Result> { - self.store.keys().await - } - - /// Get cache statistics. - pub async fn stats(&self) -> Result { - let keys = self.store.keys().await?; - - let stats = CacheStats { - entry_count: keys.len(), - bucket_name: self.store.bucket_name().to_string(), - namespace: self.namespace.clone(), - type_name: std::any::type_name::().to_string(), - }; - - tracing::debug!( - target: TRACING_TARGET_KV, - namespace = %self.namespace, - entry_count = stats.entry_count, - type_name = %stats.type_name, - "Retrieved cache statistics" - ); - - Ok(stats) - } - - /// Get the cache namespace. - pub fn namespace(&self) -> &str { - &self.namespace - } - - /// Get the underlying KV store. - pub fn inner(&self) -> &KvStore { - &self.store - } - - /// Get the bucket name used by this cache. - pub fn bucket_name(&self) -> &str { - self.store.bucket_name() - } -} - -/// Cache statistics and metadata. -#[derive(Debug, Clone)] -pub struct CacheStats { - /// Number of entries currently in cache - pub entry_count: usize, - /// NATS KV bucket name - pub bucket_name: String, - /// Cache namespace - pub namespace: String, - /// Rust type name of cached values - pub type_name: String, -} - -impl CacheStats { - /// Check if cache is empty. - pub fn is_empty(&self) -> bool { - self.entry_count == 0 - } - - /// Get a human-readable summary of cache stats. - pub fn summary(&self) -> String { - format!( - "Cache '{}' contains {} {} entries in bucket '{}'", - self.namespace, self.entry_count, self.type_name, self.bucket_name - ) - } -} - -#[cfg(test)] -mod tests { - use serde::{Deserialize, Serialize}; - - use super::*; - - #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] - #[allow(dead_code)] - struct TestData { - id: u64, - name: String, - } - - #[test] - fn test_cache_stats() { - let stats = CacheStats { - entry_count: 5, - bucket_name: "cache_test".to_string(), - namespace: "test".to_string(), - type_name: "TestData".to_string(), - }; - - assert!(!stats.is_empty()); - assert!(stats.summary().contains("5 TestData entries")); - - let empty_stats = CacheStats { - entry_count: 0, - bucket_name: "cache_empty".to_string(), - namespace: "empty".to_string(), - type_name: "TestData".to_string(), - }; - - assert!(empty_stats.is_empty()); - assert!(empty_stats.summary().contains("0 TestData entries")); - } - - #[test] - fn test_cache_namespace_formatting() { - // Test that namespace is correctly formatted into bucket name - let namespace = "user_sessions"; - let expected_bucket = "cache_user_sessions"; - let actual_bucket = format!("cache_{}", namespace); - assert_eq!(actual_bucket, expected_bucket); - } - - // Note: Integration tests requiring NATS server would go in a separate test module - // or be marked with #[ignore] attribute for optional execution -} diff --git a/crates/nvisy-nats/src/kv/chat_history.rs b/crates/nvisy-nats/src/kv/chat_history.rs deleted file mode 100644 index 79ce7f3..0000000 --- a/crates/nvisy-nats/src/kv/chat_history.rs +++ /dev/null @@ -1,156 +0,0 @@ -//! Chat history store for ephemeral sessions with TTL. - -use std::time::Duration; - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; -use serde::Serialize; -use serde::de::DeserializeOwned; -use uuid::Uuid; - -use super::KvStore; -use crate::{Result, TRACING_TARGET_KV}; - -/// Default session TTL (30 minutes). -pub const DEFAULT_SESSION_TTL: Duration = Duration::from_secs(30 * 60); - -/// NATS KV bucket name for chat history. -const CHAT_HISTORY_BUCKET: &str = "chat_history"; - -/// Chat history store backed by NATS KV. -/// -/// Provides ephemeral session storage with automatic TTL expiration. -#[derive(Clone, Deref, DerefMut)] -pub struct ChatHistoryStore -where - T: Serialize + DeserializeOwned + Send + Sync + 'static, -{ - #[deref] - #[deref_mut] - store: KvStore, - ttl: Duration, -} - -impl ChatHistoryStore -where - T: Serialize + DeserializeOwned + Send + Sync + 'static, -{ - /// Creates a new chat history store with default TTL (30 minutes). - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new(jetstream: &jetstream::Context) -> Result { - Self::with_ttl(jetstream, DEFAULT_SESSION_TTL).await - } - - /// Creates a new chat history store with custom TTL. - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn with_ttl(jetstream: &jetstream::Context, ttl: Duration) -> Result { - let store = KvStore::new( - jetstream, - CHAT_HISTORY_BUCKET, - Some("Ephemeral chat sessions"), - Some(ttl), - ) - .await?; - - tracing::info!( - target: TRACING_TARGET_KV, - ttl_secs = ttl.as_secs(), - bucket = %store.bucket_name(), - "Created chat history store" - ); - - Ok(Self { store, ttl }) - } - - /// Returns the configured TTL. - pub fn ttl(&self) -> Duration { - self.ttl - } - - /// Creates a new session. - #[tracing::instrument(skip(self, session), target = TRACING_TARGET_KV)] - pub async fn create(&self, session_id: Uuid, session: &T) -> Result<()> { - let key = session_key(session_id); - - if self.store.exists(&key).await? { - return Err(crate::Error::operation( - "chat_history_create", - format!("session already exists: {session_id}"), - )); - } - - self.store.put(&key, session).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Created chat session" - ); - - Ok(()) - } - - /// Gets a session by ID. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get(&self, session_id: Uuid) -> Result> { - let key = session_key(session_id); - self.store.get_value(&key).await - } - - /// Updates an existing session (also resets TTL). - #[tracing::instrument(skip(self, session), target = TRACING_TARGET_KV)] - pub async fn update(&self, session_id: Uuid, session: &T) -> Result<()> { - let key = session_key(session_id); - - if !self.store.exists(&key).await? { - return Err(crate::Error::operation( - "chat_history_update", - format!("session not found: {session_id}"), - )); - } - - self.store.put(&key, session).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Updated chat session" - ); - - Ok(()) - } - - /// Touches a session to reset its TTL. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn touch(&self, session_id: Uuid) -> Result<()> { - let key = session_key(session_id); - self.store.touch(&key).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Touched chat session" - ); - - Ok(()) - } - - /// Deletes a session. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete(&self, session_id: Uuid) -> Result<()> { - let key = session_key(session_id); - self.store.delete(&key).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Deleted chat session" - ); - - Ok(()) - } -} - -fn session_key(session_id: Uuid) -> String { - format!("session.{session_id}") -} diff --git a/crates/nvisy-nats/src/kv/kv_bucket.rs b/crates/nvisy-nats/src/kv/kv_bucket.rs new file mode 100644 index 0000000..b8b22b1 --- /dev/null +++ b/crates/nvisy-nats/src/kv/kv_bucket.rs @@ -0,0 +1,59 @@ +//! Key-value bucket configuration traits. + +use std::time::Duration; + +/// Marker trait for KV bucket configuration. +/// +/// This trait defines the configuration for a NATS KV bucket, +/// similar to `ObjectBucket` for object stores. +pub trait KvBucket: Clone + Send + Sync + 'static { + /// Bucket name used in NATS KV. + const NAME: &'static str; + + /// Human-readable description for the bucket. + const DESCRIPTION: &'static str; + + /// Default TTL for entries in this bucket. + /// Returns `None` for buckets where entries should not expire. + const TTL: Option; +} + +/// Bucket for API authentication tokens. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct ApiTokensBucket; + +impl KvBucket for ApiTokensBucket { + const DESCRIPTION: &'static str = "API authentication tokens"; + const NAME: &'static str = "api_tokens"; + const TTL: Option = Some(Duration::from_secs(24 * 60 * 60)); // 24 hours +} + +/// Bucket for ephemeral chat history sessions. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct ChatHistoryBucket; + +impl KvBucket for ChatHistoryBucket { + const DESCRIPTION: &'static str = "Ephemeral chat sessions"; + const NAME: &'static str = "chat_history"; + const TTL: Option = Some(Duration::from_secs(30 * 60)); // 30 minutes +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_api_tokens_bucket() { + assert_eq!(ApiTokensBucket::NAME, "api_tokens"); + assert_eq!( + ApiTokensBucket::TTL, + Some(Duration::from_secs(24 * 60 * 60)) + ); + } + + #[test] + fn test_chat_history_bucket() { + assert_eq!(ChatHistoryBucket::NAME, "chat_history"); + assert_eq!(ChatHistoryBucket::TTL, Some(Duration::from_secs(30 * 60))); + } +} diff --git a/crates/nvisy-nats/src/kv/kv_key.rs b/crates/nvisy-nats/src/kv/kv_key.rs new file mode 100644 index 0000000..c063c54 --- /dev/null +++ b/crates/nvisy-nats/src/kv/kv_key.rs @@ -0,0 +1,92 @@ +//! Key-value key types and traits. + +use std::fmt; +use std::str::FromStr; + +use uuid::Uuid; + +use crate::Error; + +/// Marker trait for KV key types. +/// +/// This trait defines how keys are formatted for storage in NATS KV. +pub trait KvKey: fmt::Debug + fmt::Display + FromStr + Clone + Send + Sync + 'static {} + +/// Key for chat history sessions. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SessionKey(pub Uuid); + +impl KvKey for SessionKey {} + +impl fmt::Display for SessionKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for SessionKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let id = + Uuid::parse_str(s).map_err(|e| Error::operation("parse_session_key", e.to_string()))?; + Ok(Self(id)) + } +} + +impl From for SessionKey { + fn from(id: Uuid) -> Self { + Self(id) + } +} + +/// Key for API tokens. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TokenKey(pub Uuid); + +impl KvKey for TokenKey {} + +impl fmt::Display for TokenKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for TokenKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let id = + Uuid::parse_str(s).map_err(|e| Error::operation("parse_token_key", e.to_string()))?; + Ok(Self(id)) + } +} + +impl From for TokenKey { + fn from(id: Uuid) -> Self { + Self(id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_session_key_roundtrip() { + let id = Uuid::nil(); + let key = SessionKey(id); + let s = key.to_string(); + let parsed: SessionKey = s.parse().unwrap(); + assert_eq!(key, parsed); + } + + #[test] + fn test_token_key_roundtrip() { + let id = Uuid::nil(); + let key = TokenKey(id); + let s = key.to_string(); + let parsed: TokenKey = s.parse().unwrap(); + assert_eq!(key, parsed); + } +} diff --git a/crates/nvisy-nats/src/kv/kv_store.rs b/crates/nvisy-nats/src/kv/kv_store.rs new file mode 100644 index 0000000..f61dc1b --- /dev/null +++ b/crates/nvisy-nats/src/kv/kv_store.rs @@ -0,0 +1,337 @@ +//! Type-safe NATS KV store wrapper. + +use std::marker::PhantomData; +use std::time::Duration; + +use async_nats::jetstream::{self, kv}; +use futures::StreamExt; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; + +use super::{KvBucket, KvKey}; +use crate::{Error, Result, TRACING_TARGET_KV}; + +/// Type-safe NATS KV store wrapper. +/// +/// This store is generic over: +/// - `K`: The key type (determines prefix) +/// - `V`: The value type to store (must be serializable) +/// - `B`: The bucket configuration (determines name, description, TTL) +#[derive(Clone)] +pub struct KvStore +where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, +{ + store: kv::Store, + _key: PhantomData, + _value: PhantomData, + _bucket: PhantomData, +} + +impl KvStore +where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, +{ + /// Create or get a KV bucket using the bucket configuration. + #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] + pub(crate) async fn new(jetstream: &jetstream::Context) -> Result { + Self::with_ttl(jetstream, B::TTL.unwrap_or_default()).await + } + + /// Create or get a KV bucket with custom TTL. + #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] + pub(crate) async fn with_ttl(jetstream: &jetstream::Context, ttl: Duration) -> Result { + let config = kv::Config { + bucket: B::NAME.to_string(), + description: B::DESCRIPTION.to_string(), + max_age: ttl, + ..Default::default() + }; + + let store = match jetstream.get_key_value(B::NAME).await { + Ok(store) => { + tracing::debug!( + target: TRACING_TARGET_KV, + bucket = %B::NAME, + "Using existing KV bucket" + ); + store + } + Err(_) => { + tracing::debug!( + target: TRACING_TARGET_KV, + bucket = %B::NAME, + ttl_secs = ttl.as_secs(), + "Creating new KV bucket" + ); + jetstream + .create_key_value(config) + .await + .map_err(|e| Error::operation("kv_create", e.to_string()))? + } + }; + + Ok(Self { + store, + _key: PhantomData, + _value: PhantomData, + _bucket: PhantomData, + }) + } + + /// Returns the bucket name. + #[inline] + pub fn bucket_name(&self) -> &'static str { + B::NAME + } + + /// Put a value into the store. + #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] + pub async fn put(&self, key: &K, value: &V) -> Result { + let key_str = key.to_string(); + let json = serde_json::to_vec(value)?; + let size = json.len(); + let revision = self + .store + .put(&key_str, json.into()) + .await + .map_err(|e| Error::operation("kv_put", e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + revision = revision, + size_bytes = size, + "Put value to KV store" + ); + + Ok(KvEntry { + key: key_str, + revision, + size: size as u64, + }) + } + + /// Get a value from the store. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn get(&self, key: &K) -> Result>> { + let key_str = key.to_string(); + match self.store.entry(&key_str).await { + Ok(Some(entry)) => { + let size = entry.value.len(); + let deserialized = serde_json::from_slice(&entry.value)?; + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + size_bytes = size, + revision = entry.revision, + "Retrieved value from KV store" + ); + Ok(Some(KvValue { + key: key_str, + value: deserialized, + revision: entry.revision, + size: size as u64, + created: entry.created.into(), + })) + } + Ok(None) => { + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + "Key not found in KV store" + ); + Ok(None) + } + Err(e) => Err(Error::operation("kv_get", e.to_string())), + } + } + + /// Get a value, returning just the data. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn get_value(&self, key: &K) -> Result> { + Ok(self.get(key).await?.map(|kv| kv.value)) + } + + /// Delete a key from the store. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn delete(&self, key: &K) -> Result<()> { + let key_str = key.to_string(); + self.store + .purge(&key_str) + .await + .map_err(|e| Error::operation("kv_delete", e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + "Deleted key from KV store" + ); + Ok(()) + } + + /// Check if a key exists in the store. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn exists(&self, key: &K) -> Result { + let key_str = key.to_string(); + match self.store.get(&key_str).await { + Ok(Some(_)) => Ok(true), + Ok(None) => Ok(false), + Err(e) => Err(Error::operation("kv_exists", e.to_string())), + } + } + + /// Touches a key to reset its TTL by re-putting the same value. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn touch(&self, key: &K) -> Result { + let kv_value = self + .get(key) + .await? + .ok_or_else(|| Error::operation("kv_touch", format!("key not found: {key}")))?; + + self.put(key, &kv_value.value).await + } + + /// Get all keys in the bucket with the expected prefix. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn keys(&self) -> Result> { + let mut keys = Vec::new(); + let mut key_stream = self + .store + .keys() + .await + .map_err(|e| Error::operation("kv_keys", e.to_string()))?; + + while let Some(key_result) = key_stream.next().await { + match key_result { + Ok(key_str) => { + if let Ok(key) = key_str.parse::() { + keys.push(key); + } + } + Err(e) => { + tracing::warn!( + target: TRACING_TARGET_KV, + error = %e, + "Error reading key from bucket" + ); + } + } + } + + tracing::debug!( + target: TRACING_TARGET_KV, + count = keys.len(), + bucket = %B::NAME, + "Retrieved keys from bucket" + ); + Ok(keys) + } + + /// Purge all keys in the bucket. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn purge_all(&self) -> Result<()> { + let keys = self.keys().await?; + let count = keys.len(); + for key in keys { + self.delete(&key).await?; + } + tracing::debug!( + target: TRACING_TARGET_KV, + count = count, + bucket = %B::NAME, + "Purged all keys from bucket" + ); + Ok(()) + } + + /// Update a value only if the revision matches (optimistic concurrency). + #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] + pub async fn update(&self, key: &K, value: &V, revision: u64) -> Result { + let key_str = key.to_string(); + let json = serde_json::to_vec(value)?; + let size = json.len(); + let new_revision = self + .store + .update(&key_str, json.into(), revision) + .await + .map_err(|e| Error::operation("kv_update", e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + old_revision = revision, + new_revision = new_revision, + size_bytes = size, + "Updated value in KV store" + ); + + Ok(KvEntry { + key: key_str, + revision: new_revision, + size: size as u64, + }) + } + + /// Get or compute a value using the cache-aside pattern. + #[tracing::instrument(skip(self, compute_fn), target = TRACING_TARGET_KV)] + pub async fn get_or_compute(&self, key: &K, compute_fn: F) -> Result + where + F: FnOnce() -> Fut + Send, + Fut: std::future::Future> + Send, + V: Clone, + { + if let Some(existing) = self.get_value(key).await? { + return Ok(existing); + } + + let value = compute_fn().await?; + self.put(key, &value).await?; + Ok(value) + } + + /// Get the underlying store reference. + pub fn inner(&self) -> &kv::Store { + &self.store + } +} + +/// KV entry metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KvEntry { + pub key: String, + pub revision: u64, + pub size: u64, +} + +/// KV value with metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KvValue { + pub key: String, + pub value: V, + pub revision: u64, + pub size: u64, + pub created: std::time::SystemTime, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_kv_entry_creation() { + let entry = KvEntry { + key: "test_key".to_string(), + revision: 1, + size: 100, + }; + + assert_eq!(entry.key, "test_key"); + assert_eq!(entry.revision, 1); + assert_eq!(entry.size, 100); + } +} diff --git a/crates/nvisy-nats/src/kv/mod.rs b/crates/nvisy-nats/src/kv/mod.rs index 2f910fa..d742ca1 100644 --- a/crates/nvisy-nats/src/kv/mod.rs +++ b/crates/nvisy-nats/src/kv/mod.rs @@ -1,22 +1,31 @@ -//! NATS Key-Value store operations for caching, API tokens, chat history, and generic KV storage. +//! NATS Key-Value store operations. //! -//! This module provides type-safe abstractions over NATS KV for different use cases: -//! - `KvStore`: Generic type-safe key-value operations -//! - `CacheStore`: Type-safe caching with cache-aside patterns -//! - `ApiTokenStore`: API authentication token management -//! - `ChatHistoryStore`: Ephemeral chat session storage with TTL +//! This module provides type-safe abstractions over NATS KV: +//! - `KvStore`: Generic type-safe key-value operations +//! - `KvKey`: Trait for key types with prefix support +//! - `KvBucket`: Trait for bucket configuration //! -//! All stores provide compile-time type safety through generic parameters and -//! comprehensive observability through structured logging. +//! # Example +//! +//! ```ignore +//! // Create a session store +//! let store: KvStore = +//! nats_client.kv_store().await?; +//! +//! // Put a session +//! let key = SessionKey::from(Uuid::new_v4()); +//! store.put(&key, &session).await?; +//! +//! // Get the session back +//! let session = store.get_value(&key).await?; +//! ``` mod api_token; -mod api_token_store; -mod cache; -mod chat_history; -mod store; +mod kv_bucket; +mod kv_key; +mod kv_store; pub use api_token::{ApiToken, ApiTokenType}; -pub use api_token_store::ApiTokenStore; -pub use cache::{CacheStats, CacheStore}; -pub use chat_history::{ChatHistoryStore, DEFAULT_SESSION_TTL}; -pub use store::{KvEntry, KvStore, KvValue}; +pub use kv_bucket::{ApiTokensBucket, ChatHistoryBucket, KvBucket}; +pub use kv_key::{KvKey, SessionKey, TokenKey}; +pub use kv_store::{KvEntry, KvStore, KvValue}; diff --git a/crates/nvisy-nats/src/kv/store.rs b/crates/nvisy-nats/src/kv/store.rs deleted file mode 100644 index 0a4f478..0000000 --- a/crates/nvisy-nats/src/kv/store.rs +++ /dev/null @@ -1,433 +0,0 @@ -//! Type-safe NATS KV store wrapper with improved API design. - -use std::collections::HashMap; -use std::marker::PhantomData; -use std::time::Duration; - -use async_nats::jetstream::{self, kv}; -use futures::StreamExt; -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; - -use crate::{Error, Result, TRACING_TARGET_KV}; - -/// Type-safe NATS KV store wrapper with improved API design -/// -/// This store provides a generic interface over NATS KV for a specific -/// serializable data type T, with consistent error handling and -/// comprehensive operations. The type parameter ensures compile-time -/// type safety for all operations. -#[derive(Clone)] -pub struct KvStore { - store: kv::Store, - bucket_name: String, - _marker: PhantomData, -} - -impl KvStore -where - T: Serialize + DeserializeOwned + Send + Sync + 'static, -{ - /// Create or get a KV bucket for the specified type T. - /// - /// # Arguments - /// * `jetstream` - JetStream context for NATS operations - /// * `bucket_name` - Name of the KV bucket to create or access - /// * `description` - Optional description for the bucket - /// * `ttl` - Optional time-to-live for entries in the bucket - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new( - jetstream: &jetstream::Context, - bucket_name: &str, - description: Option<&str>, - ttl: Option, - ) -> Result { - let mut config = kv::Config { - bucket: bucket_name.to_string(), - description: description.unwrap_or("").to_string(), - max_age: ttl.unwrap_or(Duration::from_secs(0)), - ..Default::default() - }; - - if let Some(ttl_duration) = ttl { - config.max_age = ttl_duration; - } - - // Try to get existing bucket first - let store = match jetstream.get_key_value(bucket_name).await { - Ok(store) => { - tracing::debug!( - target: TRACING_TARGET_KV, - bucket = %bucket_name, - "Using existing KV bucket" - ); - store - } - Err(_) => { - // Bucket doesn't exist, create it - tracing::debug!( - target: TRACING_TARGET_KV, - bucket = %bucket_name, - ttl_secs = ttl.map(|d| d.as_secs()), - "Creating new KV bucket" - ); - jetstream - .create_key_value(config) - .await - .map_err(|e| Error::operation("kv_create", e.to_string()))? - } - }; - - Ok(Self { - store, - bucket_name: bucket_name.to_string(), - _marker: PhantomData, - }) - } - - /// Get the bucket name - pub fn bucket_name(&self) -> &str { - &self.bucket_name - } - - /// Put a value into the store (serializes to JSON). - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn put(&self, key: &str, value: &T) -> Result { - let json = serde_json::to_vec(value)?; - let size = json.len(); - let revision = self - .store - .put(key, json.into()) - .await - .map_err(|e| Error::operation("kv_put", e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - revision = revision, - size_bytes = size, - "Put value to KV store" - ); - - Ok(KvEntry { - key: key.to_string(), - revision, - size: size as u64, - }) - } - - /// Get a value from the store (deserializes from JSON). - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get(&self, key: &str) -> Result>> { - match self.store.entry(key).await { - Ok(Some(entry)) => { - let size = entry.value.len(); - let deserialized = serde_json::from_slice(&entry.value)?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - size_bytes = size, - revision = entry.revision, - "Retrieved value from KV store" - ); - Ok(Some(KvValue { - key: key.to_string(), - value: deserialized, - revision: entry.revision, - size: size as u64, - created: entry.created.into(), - })) - } - Ok(None) => { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Key not found in KV store" - ); - Ok(None) - } - Err(e) => Err(Error::operation("kv_get", e.to_string())), - } - } - - /// Delete a key from the store. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete(&self, key: &str) -> Result<()> { - self.store - .purge(key) - .await - .map_err(|e| Error::operation("kv_delete", e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Deleted key from KV store" - ); - Ok(()) - } - - /// Check if a key exists in the store. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn exists(&self, key: &str) -> Result { - match self.store.get(key).await { - Ok(Some(_)) => { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - exists = true, - "Checked key existence" - ); - Ok(true) - } - Ok(None) => { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - exists = false, - "Checked key existence" - ); - Ok(false) - } - Err(e) => Err(Error::operation("kv_exists", e.to_string())), - } - } - - /// Touches a key to reset its TTL by re-putting the same value. - /// - /// Returns an error if the key doesn't exist. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn touch(&self, key: &str) -> Result { - let kv_value = self - .get(key) - .await? - .ok_or_else(|| Error::operation("kv_touch", format!("key not found: {key}")))?; - - let entry = self.put(key, &kv_value.value).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - revision = entry.revision, - "Touched key (TTL reset)" - ); - - Ok(entry) - } - - /// Get all keys in the bucket. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn keys(&self) -> Result> { - let mut keys = Vec::new(); - let mut key_stream = self - .store - .keys() - .await - .map_err(|e| Error::operation("kv_keys", e.to_string()))?; - - while let Some(key) = key_stream.next().await { - match key { - Ok(k) => keys.push(k), - Err(e) => { - tracing::warn!( - target: TRACING_TARGET_KV, - error = %e, - "Error reading key from bucket" - ); - } - } - } - - tracing::debug!( - target: TRACING_TARGET_KV, - count = keys.len(), - bucket = %self.store.name, - "Retrieved keys from bucket" - ); - Ok(keys) - } - - /// Purge all keys in the bucket. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn purge_all(&self) -> Result<()> { - let keys = self.keys().await?; - let count = keys.len(); - for key in keys { - self.delete(&key).await?; - } - tracing::debug!( - target: TRACING_TARGET_KV, - count = count, - bucket = %self.store.name, - "Purged all keys from bucket" - ); - Ok(()) - } - - /// Get the underlying store reference - pub fn inner(&self) -> &kv::Store { - &self.store - } - - /// Set/update a value (alias for put for consistency with cache interface). - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn set(&self, key: &str, value: &T) -> Result { - self.put(key, value).await - } - - /// Get a value and extract just the data (convenience method) - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get_value(&self, key: &str) -> Result> { - Ok(self.get(key).await?.map(|kv_value| kv_value.value)) - } - - /// Put multiple values in a batch operation. - #[tracing::instrument(skip(self, items), target = TRACING_TARGET_KV)] - pub async fn put_batch(&self, items: &[(&str, &T)]) -> Result> { - let mut results = Vec::with_capacity(items.len()); - - for (key, value) in items { - let entry = self.put(key, value).await?; - results.push(entry); - } - - tracing::debug!( - target: TRACING_TARGET_KV, - count = items.len(), - "Batch put completed" - ); - - Ok(results) - } - - /// Get multiple values in a batch operation. - #[tracing::instrument(skip(self, keys), target = TRACING_TARGET_KV)] - pub async fn get_batch(&self, keys: &[&str]) -> Result>> { - let mut results = HashMap::with_capacity(keys.len()); - - for key in keys { - if let Some(value) = self.get(key).await? { - results.insert(key.to_string(), value); - } - } - - tracing::debug!( - target: TRACING_TARGET_KV, - requested = keys.len(), - found = results.len(), - "Batch get completed" - ); - - Ok(results) - } - - /// Update a value only if the revision matches (optimistic concurrency). - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn update(&self, key: &str, value: &T, revision: u64) -> Result { - let json = serde_json::to_vec(value)?; - let size = json.len(); - let new_revision = self - .store - .update(key, json.into(), revision) - .await - .map_err(|e| Error::operation("kv_update", e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - old_revision = revision, - new_revision = new_revision, - size_bytes = size, - "Updated value in KV store" - ); - - Ok(KvEntry { - key: key.to_string(), - revision: new_revision, - size: size as u64, - }) - } - - /// Get or compute a value using the cache-aside pattern. - #[tracing::instrument(skip(self, compute_fn), target = TRACING_TARGET_KV)] - pub async fn get_or_compute(&self, key: &str, compute_fn: F) -> Result - where - F: FnOnce() -> Fut + Send, - Fut: std::future::Future> + Send, - T: Clone, - { - // Try to get from store first - if let Some(existing) = self.get_value(key).await? { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Found existing value in store" - ); - return Ok(existing); - } - - // Value not found, compute it - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Value not found, computing new value" - ); - let value = compute_fn().await?; - - // Store the computed value - self.put(key, &value).await?; - - Ok(value) - } -} - -/// KV entry metadata -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct KvEntry { - pub key: String, - pub revision: u64, - pub size: u64, -} - -/// KV value with metadata -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct KvValue { - pub key: String, - pub value: T, - pub revision: u64, - pub size: u64, - pub created: std::time::SystemTime, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - #[allow(dead_code)] - struct TestData { - id: u64, - name: String, - } - - // Note: These tests would require a running NATS server with JetStream enabled - // They're marked as ignored for now - - #[test] - #[ignore] - fn test_kv_operations() { - // Would test put/get/delete operations - } - - #[test] - fn test_kv_entry_creation() { - let entry = KvEntry { - key: "test_key".to_string(), - revision: 1, - size: 100, - }; - - assert_eq!(entry.key, "test_key"); - assert_eq!(entry.revision, 1); - assert_eq!(entry.size, 100); - } -} diff --git a/crates/nvisy-nats/src/lib.rs b/crates/nvisy-nats/src/lib.rs index 90b15cc..add120c 100644 --- a/crates/nvisy-nats/src/lib.rs +++ b/crates/nvisy-nats/src/lib.rs @@ -35,5 +35,5 @@ pub mod stream; // Re-export async_nats types needed by consumers pub use async_nats::jetstream; -pub use client::{NatsClient, NatsConfig, NatsConnection}; +pub use client::{NatsClient, NatsConfig}; pub use error::{Error, Result}; diff --git a/crates/nvisy-nats/src/object/avatar_bucket.rs b/crates/nvisy-nats/src/object/avatar_bucket.rs deleted file mode 100644 index 02e0df9..0000000 --- a/crates/nvisy-nats/src/object/avatar_bucket.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Avatar bucket constants for NATS object storage. - -use std::time::Duration; - -/// Bucket name for account avatars. -pub const AVATAR_BUCKET: &str = "ACCOUNT_AVATARS"; - -/// Maximum age for avatars (none - retained indefinitely). -pub const AVATAR_MAX_AGE: Option = None; diff --git a/crates/nvisy-nats/src/object/avatar_key.rs b/crates/nvisy-nats/src/object/avatar_key.rs deleted file mode 100644 index 5d08f55..0000000 --- a/crates/nvisy-nats/src/object/avatar_key.rs +++ /dev/null @@ -1,91 +0,0 @@ -//! Avatar key for NATS object storage. - -use std::fmt; -use std::str::FromStr; - -use uuid::Uuid; - -use crate::{Error, Result}; - -/// A validated key for avatar objects in NATS object storage. -/// -/// The key format is simply the account ID as a string, since avatars -/// are uniquely identified by their owning account. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct AvatarKey { - account_id: Uuid, -} - -impl AvatarKey { - /// Creates a new avatar key for an account. - pub fn new(account_id: Uuid) -> Self { - Self { account_id } - } - - /// Returns the account ID. - pub fn account_id(&self) -> Uuid { - self.account_id - } -} - -impl fmt::Display for AvatarKey { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.account_id) - } -} - -impl FromStr for AvatarKey { - type Err = Error; - - fn from_str(s: &str) -> Result { - let account_id = Uuid::parse_str(s) - .map_err(|e| Error::operation("parse_key", format!("Invalid account UUID: {}", e)))?; - Ok(Self::new(account_id)) - } -} - -impl From for AvatarKey { - fn from(account_id: Uuid) -> Self { - Self::new(account_id) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_avatar_key_new() { - let account_id = Uuid::new_v4(); - let key = AvatarKey::new(account_id); - assert_eq!(key.account_id(), account_id); - } - - #[test] - fn test_avatar_key_display() { - let account_id = Uuid::new_v4(); - let key = AvatarKey::new(account_id); - assert_eq!(key.to_string(), account_id.to_string()); - } - - #[test] - fn test_avatar_key_roundtrip() { - let account_id = Uuid::new_v4(); - let key = AvatarKey::new(account_id); - let encoded = key.to_string(); - let decoded: AvatarKey = encoded.parse().unwrap(); - assert_eq!(decoded.account_id(), account_id); - } - - #[test] - fn test_avatar_key_from_uuid() { - let account_id = Uuid::new_v4(); - let key: AvatarKey = account_id.into(); - assert_eq!(key.account_id(), account_id); - } - - #[test] - fn test_avatar_key_from_str_invalid() { - assert!(AvatarKey::from_str("not-a-uuid").is_err()); - } -} diff --git a/crates/nvisy-nats/src/object/avatar_store.rs b/crates/nvisy-nats/src/object/avatar_store.rs deleted file mode 100644 index 61b5529..0000000 --- a/crates/nvisy-nats/src/object/avatar_store.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Avatar store for NATS object storage. - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; - -use super::avatar_bucket::{AVATAR_BUCKET, AVATAR_MAX_AGE}; -use super::avatar_key::AvatarKey; -use super::object_data::{GetResult, PutResult}; -use super::object_store::ObjectStore; -use crate::Result; - -/// An avatar store that manages profile images in NATS object storage. -/// -/// Uses [`AvatarKey`] for addressing (account ID based). -#[derive(Clone, Deref, DerefMut)] -pub struct AvatarStore { - #[deref] - #[deref_mut] - inner: ObjectStore, -} - -impl AvatarStore { - /// Creates a new avatar store. - pub async fn new(jetstream: &jetstream::Context) -> Result { - let inner = ObjectStore::new(jetstream, AVATAR_BUCKET, AVATAR_MAX_AGE).await?; - Ok(Self { inner }) - } - - /// Streams avatar data to the store while computing SHA-256 hash on-the-fly. - pub async fn put(&self, key: &AvatarKey, reader: R) -> Result - where - R: tokio::io::AsyncRead + Unpin, - { - self.inner.put(&key.to_string(), reader).await - } - - /// Gets an avatar from the store as a stream. - /// - /// Returns `None` if the avatar doesn't exist. - pub async fn get(&self, key: &AvatarKey) -> Result> { - self.inner.get(&key.to_string()).await - } - - /// Deletes an avatar from the store. - pub async fn delete(&self, key: &AvatarKey) -> Result<()> { - self.inner.delete(&key.to_string()).await - } - - /// Checks if an avatar exists. - pub async fn exists(&self, key: &AvatarKey) -> Result { - self.inner.exists(&key.to_string()).await - } - - /// Returns the bucket name. - #[inline] - pub fn bucket(&self) -> &'static str { - AVATAR_BUCKET - } -} diff --git a/crates/nvisy-nats/src/object/document_bucket.rs b/crates/nvisy-nats/src/object/document_bucket.rs deleted file mode 100644 index e459a0e..0000000 --- a/crates/nvisy-nats/src/object/document_bucket.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Document bucket configuration for NATS object storage. - -use std::time::Duration; - -/// Marker trait for document storage buckets. -/// -/// This trait defines the configuration for a NATS object storage bucket, -/// including its name and optional TTL for objects. -pub trait DocumentBucket: Clone + Send + Sync + 'static { - /// Bucket name used in NATS object storage. - const NAME: &'static str; - - /// Maximum age for objects in this bucket. - /// Returns `None` for buckets where objects should not expire. - const MAX_AGE: Option; -} - -/// Primary document storage for uploaded and processed files. -/// -/// No expiration, files are retained indefinitely. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] -pub struct Files; - -impl DocumentBucket for Files { - const MAX_AGE: Option = None; - const NAME: &'static str = "DOCUMENT_FILES"; -} - -/// Temporary storage for intermediate processing artifacts. -/// -/// Files expire after 7 days. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] -pub struct Intermediates; - -impl DocumentBucket for Intermediates { - const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); - const NAME: &'static str = "DOCUMENT_INTERMEDIATES"; // 7 days -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_bucket_names() { - assert_eq!(Files::NAME, "DOCUMENT_FILES"); - assert_eq!(Intermediates::NAME, "DOCUMENT_INTERMEDIATES"); - } - - #[test] - fn test_bucket_max_age() { - assert_eq!(Files::MAX_AGE, None); - assert_eq!( - Intermediates::MAX_AGE, - Some(Duration::from_secs(7 * 24 * 60 * 60)) - ); - } -} diff --git a/crates/nvisy-nats/src/object/document_key.rs b/crates/nvisy-nats/src/object/document_key.rs deleted file mode 100644 index 0861e95..0000000 --- a/crates/nvisy-nats/src/object/document_key.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Document key for NATS object storage. - -use std::fmt; -use std::str::FromStr; - -use base64::prelude::*; -use uuid::Uuid; - -use crate::{Error, Result}; - -/// A validated key for document objects in NATS object storage. -/// -/// The key is encoded as URL-safe base64 of the concatenated workspace ID and object ID. -/// This produces a compact 43-character key from two UUIDs (32 bytes → base64). -/// -/// The `object_id` is a UUID v7 generated at upload time, providing: -/// - Time-ordered keys for efficient storage and retrieval -/// - Guaranteed uniqueness within the workspace -/// - No collision with database-generated IDs -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DocumentKey { - workspace_id: Uuid, - object_id: Uuid, -} - -impl DocumentKey { - /// Generates a new document key with a fresh UUID v7 object ID. - /// - /// Uses UUID v7 which is time-ordered and contains randomness, - /// making keys both sortable and collision-resistant. - pub fn generate(workspace_id: Uuid) -> Self { - Self { - workspace_id, - object_id: Uuid::now_v7(), - } - } - - /// Creates a document key from existing IDs (for parsing stored keys). - pub fn from_parts(workspace_id: Uuid, object_id: Uuid) -> Self { - Self { - workspace_id, - object_id, - } - } - - /// Returns the workspace ID. - pub fn workspace_id(&self) -> Uuid { - self.workspace_id - } - - /// Returns the object ID (the UUID used for NATS storage). - pub fn object_id(&self) -> Uuid { - self.object_id - } - - /// Encodes the key as URL-safe base64. - fn encode(&self) -> String { - let mut bytes = [0u8; 32]; - bytes[..16].copy_from_slice(self.workspace_id.as_bytes()); - bytes[16..].copy_from_slice(self.object_id.as_bytes()); - BASE64_URL_SAFE_NO_PAD.encode(bytes) - } - - /// Decodes a key from URL-safe base64. - fn decode(s: &str) -> Result { - let bytes = BASE64_URL_SAFE_NO_PAD.decode(s).map_err(|e| { - Error::operation("parse_key", format!("Invalid base64 encoding: {}", e)) - })?; - - if bytes.len() != 32 { - return Err(Error::operation( - "parse_key", - format!("Invalid key length: expected 32 bytes, got {}", bytes.len()), - )); - } - - let workspace_id = Uuid::from_slice(&bytes[..16]) - .map_err(|e| Error::operation("parse_key", format!("Invalid workspace UUID: {}", e)))?; - - let object_id = Uuid::from_slice(&bytes[16..]) - .map_err(|e| Error::operation("parse_key", format!("Invalid object UUID: {}", e)))?; - - Ok(Self::from_parts(workspace_id, object_id)) - } -} - -impl fmt::Display for DocumentKey { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.encode()) - } -} - -impl FromStr for DocumentKey { - type Err = Error; - - fn from_str(s: &str) -> Result { - Self::decode(s) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_document_key_generate() { - let workspace_id = Uuid::new_v4(); - - let key = DocumentKey::generate(workspace_id); - - assert_eq!(key.workspace_id(), workspace_id); - // object_id should be a valid UUID v7 (starts with version nibble 7) - assert_eq!(key.object_id().get_version_num(), 7); - } - - #[test] - fn test_document_key_from_parts() { - let workspace_id = Uuid::new_v4(); - let object_id = Uuid::new_v4(); - - let key = DocumentKey::from_parts(workspace_id, object_id); - - assert_eq!(key.workspace_id(), workspace_id); - assert_eq!(key.object_id(), object_id); - } - - #[test] - fn test_document_key_display_is_base64() { - let workspace_id = Uuid::new_v4(); - - let key = DocumentKey::generate(workspace_id); - let encoded = key.to_string(); - - // URL-safe base64 without padding: 32 bytes → 43 chars - assert_eq!(encoded.len(), 43); - // Should only contain URL-safe base64 characters - assert!( - encoded - .chars() - .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') - ); - } - - #[test] - fn test_document_key_roundtrip() { - let workspace_id = Uuid::new_v4(); - let object_id = Uuid::new_v4(); - - let key = DocumentKey::from_parts(workspace_id, object_id); - let encoded = key.to_string(); - let decoded: DocumentKey = encoded.parse().unwrap(); - - assert_eq!(decoded.workspace_id(), workspace_id); - assert_eq!(decoded.object_id(), object_id); - assert_eq!(key, decoded); - } - - #[test] - fn test_document_key_uniqueness() { - let workspace_id = Uuid::new_v4(); - - // Generate multiple keys for the same workspace - let key1 = DocumentKey::generate(workspace_id); - let key2 = DocumentKey::generate(workspace_id); - - // Each should have a unique object_id - assert_ne!(key1.object_id(), key2.object_id()); - assert_ne!(key1.to_string(), key2.to_string()); - } - - #[test] - fn test_document_key_from_str_invalid() { - // Invalid base64 - assert!(DocumentKey::from_str("not-valid-base64!!!").is_err()); - - // Too short - assert!(DocumentKey::from_str("abc").is_err()); - - // Valid base64 but wrong length - assert!(DocumentKey::from_str("YWJjZGVm").is_err()); - } -} diff --git a/crates/nvisy-nats/src/object/document_store.rs b/crates/nvisy-nats/src/object/document_store.rs deleted file mode 100644 index a47b54d..0000000 --- a/crates/nvisy-nats/src/object/document_store.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Document file store for NATS object storage. - -use std::marker::PhantomData; - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; - -use super::document_bucket::DocumentBucket; -use super::document_key::DocumentKey; -use super::object_data::{GetResult, PutResult}; -use super::object_store::ObjectStore; -use crate::Result; - -/// A document file store that manages files in NATS object storage. -/// -/// This is a specialized wrapper around [`ObjectStore`] that uses -/// [`DocumentKey`] for addressing and provides document-specific operations. -/// -/// The store is generic over the bucket type, providing compile-time -/// type safety for bucket operations. -#[derive(Clone, Deref, DerefMut)] -pub struct DocumentStore { - #[deref] - #[deref_mut] - inner: ObjectStore, - _marker: PhantomData, -} - -impl DocumentStore { - /// Creates a new document store for the specified bucket type. - pub async fn new(jetstream: &jetstream::Context) -> Result { - let inner = ObjectStore::new(jetstream, B::NAME, B::MAX_AGE).await?; - Ok(Self { - inner, - _marker: PhantomData, - }) - } - - /// Streams data to the store while computing SHA-256 hash on-the-fly. - /// - /// This method does not buffer the entire content in memory, making it - /// suitable for large file uploads. - pub async fn put(&self, key: &DocumentKey, reader: R) -> Result - where - R: tokio::io::AsyncRead + Unpin, - { - self.inner.put(&key.to_string(), reader).await - } - - /// Gets an object from the store as a stream. - /// - /// Returns `None` if the object doesn't exist. - /// The returned reader implements `AsyncRead` for streaming the content. - pub async fn get(&self, key: &DocumentKey) -> Result> { - self.inner.get(&key.to_string()).await - } - - /// Deletes an object from the store using a document key. - pub async fn delete(&self, key: &DocumentKey) -> Result<()> { - self.inner.delete(&key.to_string()).await - } - - /// Checks if an object exists using a document key. - pub async fn exists(&self, key: &DocumentKey) -> Result { - self.inner.exists(&key.to_string()).await - } - - /// Returns the bucket name for this store. - #[inline] - pub fn bucket(&self) -> &'static str { - B::NAME - } -} diff --git a/crates/nvisy-nats/src/object/mod.rs b/crates/nvisy-nats/src/object/mod.rs index 371bd08..d880736 100644 --- a/crates/nvisy-nats/src/object/mod.rs +++ b/crates/nvisy-nats/src/object/mod.rs @@ -6,42 +6,32 @@ //! //! # Architecture //! -//! ## Generic Store -//! - [`ObjectStore`] - Generic object store wrapper with streaming support +//! ## Store +//! - [`ObjectStore`] - Type-safe object store with bucket and key configuration //! -//! ## Document Storage -//! - [`DocumentStore`] - Specialized store for document files -//! - [`DocumentKey`] - Unique key for documents (workspace + object ID) +//! ## Key Types +//! - [`FileKey`] - Unique key for files (workspace + object ID) +//! - [`AccountKey`] - Key for account-scoped objects (account ID) //! -//! ## Avatar Storage -//! - [`AvatarStore`] - Specialized store for account avatars -//! - [`AvatarKey`] - Key for avatars (account ID) -//! -//! ## Thumbnail Storage -//! - [`ThumbnailStore`] - Specialized store for document thumbnails -//! - Uses [`DocumentKey`] for addressing +//! ## Bucket Types +//! - [`FilesBucket`] - Primary file storage (no expiration) +//! - [`IntermediatesBucket`] - Temporary processing artifacts (7 day TTL) +//! - [`ThumbnailsBucket`] - Document thumbnails (no expiration) +//! - [`AvatarsBucket`] - Account avatars (no expiration) //! //! ## Common Types //! - [`PutResult`] - Result of upload operations with size and SHA-256 hash //! - [`GetResult`] - Result of download operations with streaming reader -mod avatar_bucket; -mod avatar_key; -mod avatar_store; -mod document_bucket; -mod document_key; -mod document_store; mod hashing_reader; +mod object_bucket; mod object_data; +mod object_key; mod object_store; -mod thumbnail_bucket; -mod thumbnail_store; -pub use avatar_key::AvatarKey; -pub use avatar_store::AvatarStore; -pub use document_bucket::{DocumentBucket, Files, Intermediates}; -pub use document_key::DocumentKey; -pub use document_store::DocumentStore; +pub use object_bucket::{ + AvatarsBucket, FilesBucket, IntermediatesBucket, ObjectBucket, ThumbnailsBucket, +}; pub use object_data::{GetResult, PutResult}; +pub use object_key::{AccountKey, FileKey, ObjectKey}; pub use object_store::ObjectStore; -pub use thumbnail_store::ThumbnailStore; diff --git a/crates/nvisy-nats/src/object/object_bucket.rs b/crates/nvisy-nats/src/object/object_bucket.rs new file mode 100644 index 0000000..c4acc3f --- /dev/null +++ b/crates/nvisy-nats/src/object/object_bucket.rs @@ -0,0 +1,84 @@ +//! Object bucket configuration for NATS object storage. + +use std::time::Duration; + +/// Marker trait for object storage buckets. +/// +/// This trait defines the configuration for a NATS object storage bucket, +/// including its name and optional TTL for objects. +pub trait ObjectBucket: Clone + Send + Sync + 'static { + /// Bucket name used in NATS object storage. + const NAME: &'static str; + + /// Maximum age for objects in this bucket. + /// Returns `None` for buckets where objects should not expire. + const MAX_AGE: Option; +} + +/// Primary file storage for uploaded and processed files. +/// +/// No expiration, files are retained indefinitely. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct FilesBucket; + +impl ObjectBucket for FilesBucket { + const MAX_AGE: Option = None; + const NAME: &'static str = "DOCUMENT_FILES"; +} + +/// Temporary storage for intermediate processing artifacts. +/// +/// Files expire after 7 days. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct IntermediatesBucket; + +impl ObjectBucket for IntermediatesBucket { + const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); + const NAME: &'static str = "DOCUMENT_INTERMEDIATES"; +} + +/// Storage for document thumbnails. +/// +/// No expiration, thumbnails are retained indefinitely. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct ThumbnailsBucket; + +impl ObjectBucket for ThumbnailsBucket { + const MAX_AGE: Option = None; + const NAME: &'static str = "DOCUMENT_THUMBNAILS"; +} + +/// Storage for account avatars. +/// +/// No expiration, avatars are retained indefinitely. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct AvatarsBucket; + +impl ObjectBucket for AvatarsBucket { + const MAX_AGE: Option = None; + const NAME: &'static str = "ACCOUNT_AVATARS"; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bucket_names() { + assert_eq!(FilesBucket::NAME, "DOCUMENT_FILES"); + assert_eq!(IntermediatesBucket::NAME, "DOCUMENT_INTERMEDIATES"); + assert_eq!(ThumbnailsBucket::NAME, "DOCUMENT_THUMBNAILS"); + assert_eq!(AvatarsBucket::NAME, "ACCOUNT_AVATARS"); + } + + #[test] + fn test_bucket_max_age() { + assert_eq!(FilesBucket::MAX_AGE, None); + assert_eq!( + IntermediatesBucket::MAX_AGE, + Some(Duration::from_secs(7 * 24 * 60 * 60)) + ); + assert_eq!(ThumbnailsBucket::MAX_AGE, None); + assert_eq!(AvatarsBucket::MAX_AGE, None); + } +} diff --git a/crates/nvisy-nats/src/object/object_data.rs b/crates/nvisy-nats/src/object/object_data.rs index a176be9..26fef58 100644 --- a/crates/nvisy-nats/src/object/object_data.rs +++ b/crates/nvisy-nats/src/object/object_data.rs @@ -11,21 +11,14 @@ pub struct PutResult { size: u64, /// SHA-256 hash computed during streaming. sha256: Vec, - /// SHA-256 hash as hex string. - sha256_hex: String, /// NATS object unique identifier. nuid: String, } impl PutResult { /// Creates a new put result. - pub(crate) fn new(size: u64, sha256: Vec, sha256_hex: String, nuid: String) -> Self { - Self { - size, - sha256, - sha256_hex, - nuid, - } + pub(crate) fn new(size: u64, sha256: Vec, nuid: String) -> Self { + Self { size, sha256, nuid } } /// Returns the size in bytes. @@ -42,8 +35,8 @@ impl PutResult { /// Returns the SHA-256 hash as a hex string. #[inline] - pub fn sha256_hex(&self) -> &str { - &self.sha256_hex + pub fn sha256_hex(&self) -> String { + hex::encode(&self.sha256) } /// Returns the NATS object unique identifier. @@ -109,11 +102,12 @@ mod tests { #[test] fn test_put_result_getters() { - let result = PutResult::new(1024, vec![0u8; 32], "0".repeat(64), "test-nuid".to_string()); + let result = PutResult::new(1024, vec![0u8; 32], "test-nuid".to_string()); assert_eq!(result.size(), 1024); assert_eq!(result.sha256().len(), 32); assert_eq!(result.sha256_hex().len(), 64); + assert_eq!(result.sha256_hex(), "0".repeat(64)); assert_eq!(result.nuid(), "test-nuid"); } } diff --git a/crates/nvisy-nats/src/object/object_key.rs b/crates/nvisy-nats/src/object/object_key.rs new file mode 100644 index 0000000..16fe2af --- /dev/null +++ b/crates/nvisy-nats/src/object/object_key.rs @@ -0,0 +1,282 @@ +//! Object key types for NATS object storage. + +use std::fmt; +use std::str::FromStr; + +use base64::prelude::*; +use uuid::Uuid; + +use crate::{Error, Result}; + +/// Trait for object storage keys. +/// +/// Keys must be convertible to/from strings for storage addressing. +/// Each key type has a prefix that organizes objects by type in the bucket. +pub trait ObjectKey: fmt::Display + FromStr + Clone + Send + Sync + 'static { + /// The prefix for this key type (e.g., "file_", "account_"). + const PREFIX: &'static str; +} + +/// A validated key for file objects in NATS object storage. +/// +/// The key is encoded as `file_` prefix followed by URL-safe base64 of the +/// concatenated workspace ID and object ID. This produces a key like +/// `file_ABC123...` from two UUIDs (32 bytes → base64). +/// +/// The `object_id` is a UUID v7 generated at upload time, providing: +/// - Time-ordered keys for efficient storage and retrieval +/// - Guaranteed uniqueness within the workspace +/// - No collision with database-generated IDs +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FileKey { + pub workspace_id: Uuid, + pub object_id: Uuid, +} + +impl ObjectKey for FileKey { + const PREFIX: &'static str = "file_"; +} + +impl FileKey { + /// Generates a new file key with a fresh UUID v7 object ID. + /// + /// Uses UUID v7 which is time-ordered and contains randomness, + /// making keys both sortable and collision-resistant. + pub fn generate(workspace_id: Uuid) -> Self { + Self { + workspace_id, + object_id: Uuid::now_v7(), + } + } + + /// Creates a file key from existing IDs (for parsing stored keys). + pub fn from_parts(workspace_id: Uuid, object_id: Uuid) -> Self { + Self { + workspace_id, + object_id, + } + } + + /// Regenerates the object ID with a fresh UUID v7. + /// + /// This is useful when creating a new version of a file + /// while keeping the same workspace association. + pub fn regenerate(&mut self) { + self.object_id = Uuid::now_v7(); + } + + /// Encodes the key payload as URL-safe base64. + fn encode_payload(&self) -> String { + let mut bytes = [0u8; 32]; + bytes[..16].copy_from_slice(self.workspace_id.as_bytes()); + bytes[16..].copy_from_slice(self.object_id.as_bytes()); + BASE64_URL_SAFE_NO_PAD.encode(bytes) + } + + /// Decodes a key payload from URL-safe base64. + fn decode_payload(s: &str) -> Result { + let bytes = BASE64_URL_SAFE_NO_PAD.decode(s).map_err(|e| { + Error::operation("parse_key", format!("Invalid base64 encoding: {}", e)) + })?; + + if bytes.len() != 32 { + return Err(Error::operation( + "parse_key", + format!("Invalid key length: expected 32 bytes, got {}", bytes.len()), + )); + } + + let workspace_id = Uuid::from_slice(&bytes[..16]) + .map_err(|e| Error::operation("parse_key", format!("Invalid workspace UUID: {}", e)))?; + + let object_id = Uuid::from_slice(&bytes[16..]) + .map_err(|e| Error::operation("parse_key", format!("Invalid object UUID: {}", e)))?; + + Ok(Self::from_parts(workspace_id, object_id)) + } +} + +impl fmt::Display for FileKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}{}", Self::PREFIX, self.encode_payload()) + } +} + +impl FromStr for FileKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let payload = s.strip_prefix(Self::PREFIX).ok_or_else(|| { + Error::operation( + "parse_key", + format!("Invalid key prefix: expected '{}'", Self::PREFIX), + ) + })?; + Self::decode_payload(payload) + } +} + +/// A validated key for account-scoped objects in NATS object storage. +/// +/// The key format is `account_` prefix followed by the account ID, +/// since these objects are uniquely identified by their owning account (e.g., avatars). +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct AccountKey { + pub account_id: Uuid, +} + +impl ObjectKey for AccountKey { + const PREFIX: &'static str = "account_"; +} + +impl AccountKey { + /// Creates a new account key. + pub fn new(account_id: Uuid) -> Self { + Self { account_id } + } +} + +impl fmt::Display for AccountKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}{}", Self::PREFIX, self.account_id) + } +} + +impl FromStr for AccountKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let payload = s.strip_prefix(Self::PREFIX).ok_or_else(|| { + Error::operation( + "parse_key", + format!("Invalid key prefix: expected '{}'", Self::PREFIX), + ) + })?; + let account_id = Uuid::parse_str(payload) + .map_err(|e| Error::operation("parse_key", format!("Invalid account UUID: {}", e)))?; + Ok(Self::new(account_id)) + } +} + +impl From for AccountKey { + fn from(account_id: Uuid) -> Self { + Self::new(account_id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + mod file_key { + use super::*; + + #[test] + fn test_prefix() { + assert_eq!(FileKey::PREFIX, "file_"); + } + + #[test] + fn test_generate() { + let workspace_id = Uuid::new_v4(); + let key = FileKey::generate(workspace_id); + + assert_eq!(key.workspace_id, workspace_id); + assert_eq!(key.object_id.get_version_num(), 7); + } + + #[test] + fn test_from_parts() { + let workspace_id = Uuid::new_v4(); + let object_id = Uuid::now_v7(); + let key = FileKey::from_parts(workspace_id, object_id); + + assert_eq!(key.workspace_id, workspace_id); + assert_eq!(key.object_id, object_id); + } + + #[test] + fn test_display_has_prefix() { + let workspace_id = Uuid::new_v4(); + let key = FileKey::generate(workspace_id); + let encoded = key.to_string(); + + assert!(encoded.starts_with("file_")); + // prefix (5) + base64 (43) = 48 + assert_eq!(encoded.len(), 48); + } + + #[test] + fn test_roundtrip() { + let workspace_id = Uuid::new_v4(); + let object_id = Uuid::new_v4(); + + let key = FileKey::from_parts(workspace_id, object_id); + let encoded = key.to_string(); + let decoded: FileKey = encoded.parse().unwrap(); + + assert_eq!(decoded.workspace_id, workspace_id); + assert_eq!(decoded.object_id, object_id); + assert_eq!(key, decoded); + } + + #[test] + fn test_from_str_invalid_prefix() { + assert!(FileKey::from_str("account_abc").is_err()); + assert!(FileKey::from_str("abc").is_err()); + } + } + + mod account_key { + use super::*; + + #[test] + fn test_prefix() { + assert_eq!(AccountKey::PREFIX, "account_"); + } + + #[test] + fn test_new() { + let account_id = Uuid::new_v4(); + let key = AccountKey::new(account_id); + assert_eq!(key.account_id, account_id); + } + + #[test] + fn test_display_has_prefix() { + let account_id = Uuid::new_v4(); + let key = AccountKey::new(account_id); + let encoded = key.to_string(); + + assert!(encoded.starts_with("account_")); + assert_eq!(encoded, format!("account_{}", account_id)); + } + + #[test] + fn test_roundtrip() { + let account_id = Uuid::new_v4(); + let key = AccountKey::new(account_id); + let encoded = key.to_string(); + let decoded: AccountKey = encoded.parse().unwrap(); + assert_eq!(decoded.account_id, account_id); + } + + #[test] + fn test_from_uuid() { + let account_id = Uuid::new_v4(); + let key: AccountKey = account_id.into(); + assert_eq!(key.account_id, account_id); + } + + #[test] + fn test_from_str_invalid_prefix() { + assert!(AccountKey::from_str("file_abc").is_err()); + assert!(AccountKey::from_str("abc").is_err()); + } + + #[test] + fn test_from_str_invalid_uuid() { + assert!(AccountKey::from_str("account_not-a-uuid").is_err()); + } + } +} diff --git a/crates/nvisy-nats/src/object/object_store.rs b/crates/nvisy-nats/src/object/object_store.rs index 5e00784..7b23336 100644 --- a/crates/nvisy-nats/src/object/object_store.rs +++ b/crates/nvisy-nats/src/object/object_store.rs @@ -1,7 +1,7 @@ -//! Generic object store wrapper for NATS JetStream. +//! Generic object store for NATS JetStream. +use std::marker::PhantomData; use std::sync::Arc; -use std::time::Duration; use async_nats::jetstream; use async_nats::jetstream::context::ObjectStoreErrorKind; @@ -9,65 +9,71 @@ use async_nats::jetstream::object_store::{self, ObjectInfo}; use tokio::io::AsyncRead; use super::hashing_reader::HashingReader; +use super::object_bucket::ObjectBucket; use super::object_data::{GetResult, PutResult}; +use super::object_key::ObjectKey; use crate::{Error, Result}; /// Tracing target for object store operations. const TRACING_TARGET: &str = "nvisy_nats::object_store"; -/// A generic object store that manages files in NATS object storage. +/// A type-safe object store that manages objects in NATS object storage. /// /// This store provides streaming upload capabilities with on-the-fly /// SHA-256 hash computation. +/// +/// The store is generic over: +/// - `B`: The bucket type (determines storage location and TTL) +/// - `K`: The key type (determines how objects are addressed) #[derive(Clone)] -pub struct ObjectStore { +pub struct ObjectStore +where + B: ObjectBucket, + K: ObjectKey, +{ inner: Arc, - bucket: Arc, + _marker: PhantomData<(B, K)>, } -impl ObjectStore { - /// Creates a new object store for the specified bucket. - /// - /// If `max_age` is `None`, objects will not expire. - pub async fn new( - jetstream: &jetstream::Context, - bucket: impl Into, - max_age: Option, - ) -> Result { - let bucket = bucket.into(); - +impl ObjectStore +where + B: ObjectBucket, + K: ObjectKey, +{ + /// Creates a new object store for the specified bucket type. + pub(crate) async fn new(jetstream: &jetstream::Context) -> Result { tracing::debug!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, "Initializing object store" ); - let store = match jetstream.get_object_store(&bucket).await { + let store = match jetstream.get_object_store(B::NAME).await { Ok(store) => { tracing::debug!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, "Retrieved existing object store" ); store } Err(e) if matches!(e.kind(), ObjectStoreErrorKind::GetStore) => { let config = object_store::Config { - bucket: bucket.clone(), - max_age: max_age.unwrap_or_default(), + bucket: B::NAME.to_string(), + max_age: B::MAX_AGE.unwrap_or_default(), ..Default::default() }; tracing::info!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, "Creating new object store" ); jetstream.create_object_store(config).await.map_err(|e| { tracing::error!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, error = %e, "Failed to create object store" ); @@ -77,7 +83,7 @@ impl ObjectStore { Err(e) => { tracing::error!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, error = %e, "Failed to get object store" ); @@ -87,32 +93,35 @@ impl ObjectStore { Ok(Self { inner: Arc::new(store), - bucket: Arc::new(bucket), + _marker: PhantomData, }) } /// Returns the bucket name. - pub fn bucket(&self) -> &str { - &self.bucket + #[inline] + pub fn bucket(&self) -> &'static str { + B::NAME } /// Streams data to the store while computing SHA-256 hash on-the-fly. /// /// This method does not buffer the entire content in memory, making it /// suitable for large file uploads. - pub async fn put(&self, key: &str, reader: R) -> Result + pub async fn put(&self, key: &K, reader: R) -> Result where R: AsyncRead + Unpin, { + let key_str = key.to_string(); + tracing::debug!( target: TRACING_TARGET, - key = %key, - bucket = %self.bucket, + key = %key_str, + bucket = %B::NAME, "Starting streaming upload" ); let meta = object_store::ObjectMetadata { - name: key.to_string(), + name: key_str.clone(), ..Default::default() }; @@ -125,7 +134,7 @@ impl ObjectStore { .map_err(|e| { tracing::error!( target: TRACING_TARGET, - key = %key, + key = %key_str, error = %e, "Failed to upload object" ); @@ -133,34 +142,29 @@ impl ObjectStore { })?; let sha256 = hashing_reader.finalize(); - let sha256_hex = hex::encode(sha256); tracing::info!( target: TRACING_TARGET, - key = %key, + key = %key_str, size = info.size, - sha256 = %sha256_hex, nuid = %info.nuid, "Streaming upload complete" ); - Ok(PutResult::new( - info.size as u64, - sha256.to_vec(), - sha256_hex, - info.nuid, - )) + Ok(PutResult::new(info.size as u64, sha256.to_vec(), info.nuid)) } /// Gets an object from the store as a stream. /// /// Returns `None` if the object doesn't exist. /// The returned reader implements `AsyncRead` for streaming the content. - pub async fn get(&self, key: &str) -> Result> { + pub async fn get(&self, key: &K) -> Result> { + let key_str = key.to_string(); + tracing::debug!( target: TRACING_TARGET, - key = %key, - bucket = %self.bucket, + key = %key_str, + bucket = %B::NAME, "Getting object" ); @@ -170,11 +174,11 @@ impl ObjectStore { None => return Ok(None), }; - match self.inner.get(key).await { + match self.inner.get(&key_str).await { Ok(reader) => { tracing::debug!( target: TRACING_TARGET, - key = %key, + key = %key_str, size = info.size, "Object stream opened" ); @@ -186,14 +190,14 @@ impl ObjectStore { if error_str.contains("not found") || error_str.contains("no message found") { tracing::debug!( target: TRACING_TARGET, - key = %key, + key = %key_str, "Object not found" ); Ok(None) } else { tracing::error!( target: TRACING_TARGET, - key = %key, + key = %key_str, error = %e, "Failed to get object" ); @@ -204,8 +208,10 @@ impl ObjectStore { } /// Gets object info without downloading the content. - pub async fn info(&self, key: &str) -> Result> { - match self.inner.info(key).await { + pub async fn info(&self, key: &K) -> Result> { + let key_str = key.to_string(); + + match self.inner.info(&key_str).await { Ok(info) => Ok(Some(info)), Err(e) => { let error_str = e.to_string(); @@ -219,18 +225,20 @@ impl ObjectStore { } /// Deletes an object from the store. - pub async fn delete(&self, key: &str) -> Result<()> { + pub async fn delete(&self, key: &K) -> Result<()> { + let key_str = key.to_string(); + tracing::debug!( target: TRACING_TARGET, - key = %key, - bucket = %self.bucket, + key = %key_str, + bucket = %B::NAME, "Deleting object" ); - self.inner.delete(key).await.map_err(|e| { + self.inner.delete(&key_str).await.map_err(|e| { tracing::error!( target: TRACING_TARGET, - key = %key, + key = %key_str, error = %e, "Failed to delete object" ); @@ -239,7 +247,7 @@ impl ObjectStore { tracing::info!( target: TRACING_TARGET, - key = %key, + key = %key_str, "Object deleted" ); @@ -247,7 +255,7 @@ impl ObjectStore { } /// Checks if an object exists. - pub async fn exists(&self, key: &str) -> Result { + pub async fn exists(&self, key: &K) -> Result { Ok(self.info(key).await?.is_some()) } } diff --git a/crates/nvisy-nats/src/object/thumbnail_bucket.rs b/crates/nvisy-nats/src/object/thumbnail_bucket.rs deleted file mode 100644 index 5f4b9d2..0000000 --- a/crates/nvisy-nats/src/object/thumbnail_bucket.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Thumbnail bucket constants for NATS object storage. - -use std::time::Duration; - -/// Bucket name for document thumbnails. -pub const THUMBNAIL_BUCKET: &str = "DOCUMENT_THUMBNAILS"; - -/// Maximum age for thumbnails (none - retained indefinitely). -pub const THUMBNAIL_MAX_AGE: Option = None; diff --git a/crates/nvisy-nats/src/object/thumbnail_store.rs b/crates/nvisy-nats/src/object/thumbnail_store.rs deleted file mode 100644 index 464afc1..0000000 --- a/crates/nvisy-nats/src/object/thumbnail_store.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Thumbnail store for NATS object storage. - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; - -use super::document_key::DocumentKey; -use super::object_data::{GetResult, PutResult}; -use super::object_store::ObjectStore; -use super::thumbnail_bucket::{THUMBNAIL_BUCKET, THUMBNAIL_MAX_AGE}; -use crate::Result; - -/// A thumbnail store that manages document thumbnails in NATS object storage. -/// -/// Uses [`DocumentKey`] for addressing (same key format as document files). -#[derive(Clone, Deref, DerefMut)] -pub struct ThumbnailStore { - #[deref] - #[deref_mut] - inner: ObjectStore, -} - -impl ThumbnailStore { - /// Creates a new thumbnail store. - pub async fn new(jetstream: &jetstream::Context) -> Result { - let inner = ObjectStore::new(jetstream, THUMBNAIL_BUCKET, THUMBNAIL_MAX_AGE).await?; - Ok(Self { inner }) - } - - /// Streams thumbnail data to the store while computing SHA-256 hash on-the-fly. - pub async fn put(&self, key: &DocumentKey, reader: R) -> Result - where - R: tokio::io::AsyncRead + Unpin, - { - self.inner.put(&key.to_string(), reader).await - } - - /// Gets a thumbnail from the store as a stream. - /// - /// Returns `None` if the thumbnail doesn't exist. - pub async fn get(&self, key: &DocumentKey) -> Result> { - self.inner.get(&key.to_string()).await - } - - /// Deletes a thumbnail from the store. - pub async fn delete(&self, key: &DocumentKey) -> Result<()> { - self.inner.delete(&key.to_string()).await - } - - /// Checks if a thumbnail exists. - pub async fn exists(&self, key: &DocumentKey) -> Result { - self.inner.exists(&key.to_string()).await - } - - /// Returns the bucket name. - #[inline] - pub fn bucket(&self) -> &'static str { - THUMBNAIL_BUCKET - } -} diff --git a/crates/nvisy-nats/src/stream/document_job.rs b/crates/nvisy-nats/src/stream/document_job.rs deleted file mode 100644 index a351305..0000000 --- a/crates/nvisy-nats/src/stream/document_job.rs +++ /dev/null @@ -1,470 +0,0 @@ -//! Document job types for file processing pipeline. - -use jiff::Timestamp; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::document_task::PredefinedTask; -use super::event::EventPriority; - -/// Stream name for document jobs. -pub const STREAM_NAME: &str = "DOCUMENT_JOBS"; - -/// Marker trait for document processing stages. -/// -/// Each stage represents a distinct phase in the document processing pipeline, -/// with its own stream subject for NATS routing. -pub trait Stage: Serialize + DeserializeOwned + Clone + Send + Sync + 'static { - /// Stage name for logging and debugging. - const NAME: &'static str; - /// NATS stream subject suffix for this stage. - const SUBJECT: &'static str; -} - -/// Preprocessing stage data. -/// -/// Runs when a user uploads a file. Prepares the file for future processing: -/// - Format detection and validation -/// - File integrity checks -/// - Metadata extraction and fixes -/// - Thumbnail generation -/// - OCR for scanned documents -/// - Embedding generation for knowledge base / semantic search -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct PreprocessingData { - /// Whether to validate and fix file metadata. Defaults to true. - #[serde(default = "default_true", skip_serializing_if = "is_true")] - pub validate_metadata: bool, - /// Whether to run OCR on the document. Defaults to true. - #[serde(default = "default_true", skip_serializing_if = "is_true")] - pub run_ocr: bool, - /// Whether to generate embeddings for semantic search. Defaults to true. - #[serde(default = "default_true", skip_serializing_if = "is_true")] - pub generate_embeddings: bool, - /// Whether to generate thumbnails for UI previews. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub generate_thumbnails: Option, -} - -impl Default for PreprocessingData { - fn default() -> Self { - Self { - validate_metadata: true, - run_ocr: true, - generate_embeddings: true, - generate_thumbnails: None, - } - } -} - -impl Stage for PreprocessingData { - const NAME: &'static str = "preprocessing"; - const SUBJECT: &'static str = "preprocessing"; -} - -/// Processing stage data. -/// -/// Runs when a user requests changes to the document. Changes are typically -/// a collection of annotations (notes, highlights, comments) that need to be -/// applied using VLM pipelines. -#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct ProcessingData { - /// The main VLM prompt/instruction for processing. - #[serde(default, skip_serializing_if = "String::is_empty")] - pub prompt: String, - /// Additional context for the VLM. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub context: Option, - /// Annotation IDs to process. None means process all annotations. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub annotation_ids: Option>, - /// Other files to use as context (e.g., "make this look like that"). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub reference_file_ids: Option>, - /// Predefined processing tasks to apply. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub tasks: Vec, - /// Processing quality level. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub quality: Option, - /// Whether to process in chunks for large files. Defaults to false. - #[serde(default, skip_serializing_if = "is_false")] - pub chunk_processing: bool, - /// Custom processing parameters. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub custom_params: Option, -} - -impl Stage for ProcessingData { - const NAME: &'static str = "processing"; - const SUBJECT: &'static str = "processing"; -} - -/// Postprocessing stage data. -/// -/// Runs when a user downloads the file. Prepares the final output: -/// - Format conversion to requested format -/// - Compression settings -/// - Cleanup of temporary artifacts -#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct PostprocessingData { - /// Target format for the output file. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub target_format: Option, - /// Compression level for output file. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub compression_level: Option, - /// Whether to burn annotations into the document vs keeping as metadata. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub flatten_annotations: Option, - /// Cleanup tasks to perform. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub cleanup_tasks: Option>, -} - -impl Stage for PostprocessingData { - const NAME: &'static str = "postprocessing"; - const SUBJECT: &'static str = "postprocessing"; -} - -/// Processing quality level. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum ProcessingQuality { - /// Fast processing with lower quality. - Fast, - /// Balanced speed and quality. - Balanced, - /// High quality, slower processing. - High, -} - -/// Compression level for output files. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum CompressionLevel { - /// No compression. - None, - /// Medium compression, balanced. - Normal, - /// High compression, slower but smaller files. - High, -} - -/// Document processing job. -/// -/// Represents a unit of work in the document processing pipeline. -/// Each job targets a specific file and is typed by its processing stage. -/// -/// The generic parameter `S` determines the stage (preprocessing, processing, -/// or postprocessing), enabling compile-time type safety and stage-specific -/// stream routing. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(bound = "")] -pub struct DocumentJob { - /// Unique job identifier (UUID v7 for time-ordering). - pub id: Uuid, - /// Database file ID to process. - pub file_id: Uuid, - /// Storage path in NATS object store (DocumentKey encoded). - pub object_key: String, - /// File extension for format detection. - pub file_extension: String, - /// Stage-specific data. - pub data: S, - /// Job priority. - pub priority: EventPriority, - /// When the job was created. - pub created_at: Timestamp, - /// NATS subject to publish result to (for internal job chaining). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub callback_subject: Option, - /// Idempotency key to prevent duplicate job processing. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub idempotency_key: Option, -} - -impl DocumentJob { - /// Creates a new document job with the given stage data. - pub fn new(file_id: Uuid, storage_path: String, file_extension: String, data: S) -> Self { - Self { - id: Uuid::now_v7(), - file_id, - object_key: storage_path, - file_extension, - data, - priority: EventPriority::Normal, - created_at: Timestamp::now(), - callback_subject: None, - idempotency_key: None, - } - } - - /// Sets the job priority. - pub fn with_priority(mut self, priority: EventPriority) -> Self { - self.priority = priority; - self - } - - /// Sets a callback subject for job chaining. - pub fn with_callback(mut self, subject: impl Into) -> Self { - self.callback_subject = Some(subject.into()); - self - } - - /// Sets an idempotency key. - pub fn with_idempotency_key(mut self, key: impl Into) -> Self { - self.idempotency_key = Some(key.into()); - self - } - - /// Returns the file ID. - #[inline] - pub fn file_id(&self) -> Uuid { - self.file_id - } - - /// Returns the storage path. - #[inline] - pub fn storage_path(&self) -> &str { - &self.object_key - } - - /// Returns the file extension. - #[inline] - pub fn file_extension(&self) -> &str { - &self.file_extension - } - - /// Returns a reference to the stage data. - #[inline] - pub fn data(&self) -> &S { - &self.data - } - - /// Returns the stage name. - #[inline] - pub fn stage_name(&self) -> &'static str { - S::NAME - } - - /// Returns the stream subject for this job's stage. - #[inline] - pub fn subject(&self) -> &'static str { - S::SUBJECT - } - - /// Returns job age since creation. - pub fn age(&self) -> std::time::Duration { - let now = Timestamp::now(); - let signed_dur = now.duration_since(self.created_at); - std::time::Duration::from_secs(signed_dur.as_secs().max(0) as u64) - } -} - -fn default_true() -> bool { - true -} - -fn is_true(value: &bool) -> bool { - *value -} - -fn is_false(value: &bool) -> bool { - !*value -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_preprocessing_job_new() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PreprocessingData::default(), - ); - - assert_eq!(job.file_id(), file_id); - assert_eq!(job.storage_path(), "storage/path"); - assert_eq!(job.file_extension(), "pdf"); - assert_eq!(job.stage_name(), "preprocessing"); - assert_eq!(job.subject(), "preprocessing"); - } - - #[test] - fn test_preprocessing_defaults() { - let data = PreprocessingData::default(); - assert!(data.validate_metadata); - assert!(data.run_ocr); - assert!(data.generate_embeddings); - assert!(data.generate_thumbnails.is_none()); - } - - #[test] - fn test_preprocessing_serialization_skips_defaults() { - let data = PreprocessingData::default(); - let json = serde_json::to_string(&data).unwrap(); - // Should be minimal since defaults are skipped - assert_eq!(json, "{}"); - - // Parsing empty object should give defaults - let parsed: PreprocessingData = serde_json::from_str("{}").unwrap(); - assert!(parsed.validate_metadata); - assert!(parsed.run_ocr); - assert!(parsed.generate_embeddings); - } - - #[test] - fn test_processing_job_with_prompt() { - let file_id = Uuid::now_v7(); - - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - ProcessingData { - prompt: "Apply the highlighted changes".to_string(), - context: Some("This is a legal document".to_string()), - annotation_ids: None, // Process all annotations - tasks: vec![PredefinedTask::Proofread], - ..Default::default() - }, - ); - - assert_eq!(job.stage_name(), "processing"); - assert_eq!(job.data().prompt, "Apply the highlighted changes"); - assert_eq!( - job.data().context, - Some("This is a legal document".to_string()) - ); - assert!(job.data().annotation_ids.is_none()); - assert_eq!(job.data().tasks.len(), 1); - } - - #[test] - fn test_predefined_task_redact() { - let task = PredefinedTask::Redact { - patterns: vec!["email".to_string(), "phone".to_string()], - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_translate() { - let task = PredefinedTask::Translate { - target_language: "es".to_string(), - }; - - let json = serde_json::to_string(&task).unwrap(); - assert!(json.contains("translate")); - assert!(json.contains("es")); - } - - #[test] - fn test_postprocessing_job() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PostprocessingData { - target_format: Some("docx".to_string()), - compression_level: Some(CompressionLevel::Normal), - ..Default::default() - }, - ); - - assert_eq!(job.stage_name(), "postprocessing"); - assert_eq!(job.data().target_format, Some("docx".to_string())); - assert_eq!(job.data().compression_level, Some(CompressionLevel::Normal)); - } - - #[test] - fn test_job_with_callback_and_idempotency() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PreprocessingData::default(), - ) - .with_callback("results.preprocessing") - .with_idempotency_key("upload-123"); - - assert_eq!( - job.callback_subject, - Some("results.preprocessing".to_string()) - ); - assert_eq!(job.idempotency_key, Some("upload-123".to_string())); - } - - #[test] - fn test_job_serialization_roundtrip() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PreprocessingData { - validate_metadata: true, - run_ocr: true, - generate_embeddings: true, - generate_thumbnails: Some(true), - }, - ); - - let json = serde_json::to_string(&job).unwrap(); - let parsed: DocumentJob = serde_json::from_str(&json).unwrap(); - - assert_eq!(job.file_id, parsed.file_id); - assert_eq!(job.data, parsed.data); - } - - #[test] - fn test_compression_level_serialization() { - let level = CompressionLevel::High; - let json = serde_json::to_string(&level).unwrap(); - assert_eq!(json, "\"high\""); - - let parsed: CompressionLevel = serde_json::from_str(&json).unwrap(); - assert_eq!(parsed, CompressionLevel::High); - } - - #[test] - fn test_processing_quality_serialization() { - let quality = ProcessingQuality::Fast; - let json = serde_json::to_string(&quality).unwrap(); - assert_eq!(json, "\"fast\""); - - let parsed: ProcessingQuality = serde_json::from_str(&json).unwrap(); - assert_eq!(parsed, ProcessingQuality::Fast); - } - - #[test] - fn test_stage_constants() { - assert_eq!(PreprocessingData::NAME, "preprocessing"); - assert_eq!(PreprocessingData::SUBJECT, "preprocessing"); - - assert_eq!(ProcessingData::NAME, "processing"); - assert_eq!(ProcessingData::SUBJECT, "processing"); - - assert_eq!(PostprocessingData::NAME, "postprocessing"); - assert_eq!(PostprocessingData::SUBJECT, "postprocessing"); - } -} diff --git a/crates/nvisy-nats/src/stream/document_job_pub.rs b/crates/nvisy-nats/src/stream/document_job_pub.rs deleted file mode 100644 index 0195427..0000000 --- a/crates/nvisy-nats/src/stream/document_job_pub.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! Document job stream publisher. - -use std::marker::PhantomData; - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; - -use super::document_job::{DocumentJob, STREAM_NAME, Stage}; -use super::publisher::StreamPublisher; -use crate::Result; - -/// Generic document job publisher for a specific processing stage. -/// -/// This publisher routes jobs to stage-specific subjects within the -/// `DOCUMENT_JOBS` stream, enabling separate consumers per stage. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct DocumentJobPublisher { - #[deref] - #[deref_mut] - publisher: StreamPublisher>, - _marker: PhantomData, -} - -impl DocumentJobPublisher { - /// Create a new document job publisher for the specified stage. - pub async fn new(jetstream: &Context) -> Result { - let publisher = StreamPublisher::new(jetstream, STREAM_NAME).await?; - Ok(Self { - publisher, - _marker: PhantomData, - }) - } - - /// Publish a job to the stage-specific subject. - /// - /// Jobs are published to `DOCUMENT_JOBS.{stage}.{file_id}`. - pub async fn publish_job(&self, job: &DocumentJob) -> Result<()> { - let subject = format!("{}.{}", S::SUBJECT, job.file_id); - self.publisher.publish(&subject, job).await - } - - /// Publish a job with a custom subject suffix. - /// - /// Jobs are published to `DOCUMENT_JOBS.{stage}.{suffix}`. - pub async fn publish_job_with_subject(&self, job: &DocumentJob, suffix: &str) -> Result<()> { - let subject = format!("{}.{}", S::SUBJECT, suffix); - self.publisher.publish(&subject, job).await - } - - /// Publish multiple jobs in batch. - pub async fn publish_batch(&self, jobs: &[DocumentJob]) -> Result<()> { - // Group by file_id isn't needed since we use the stage subject - self.publisher - .publish_batch_parallel(S::SUBJECT, jobs, 10) - .await - } -} diff --git a/crates/nvisy-nats/src/stream/document_job_sub.rs b/crates/nvisy-nats/src/stream/document_job_sub.rs deleted file mode 100644 index edbbcc5..0000000 --- a/crates/nvisy-nats/src/stream/document_job_sub.rs +++ /dev/null @@ -1,66 +0,0 @@ -//! Document job stream subscriber. - -use std::marker::PhantomData; - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; - -use super::document_job::{DocumentJob, STREAM_NAME, Stage}; -use super::subscriber::StreamSubscriber; -use crate::Result; - -/// Generic document job subscriber for a specific processing stage. -/// -/// This subscriber filters jobs by stage-specific subjects within the -/// `DOCUMENT_JOBS` stream, enabling dedicated consumers per stage. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct DocumentJobSubscriber { - #[deref] - #[deref_mut] - subscriber: StreamSubscriber>, - _marker: PhantomData, -} - -impl DocumentJobSubscriber { - /// Create a new document job subscriber for the specified stage. - /// - /// The subscriber automatically filters to the stage-specific subject pattern. - pub async fn new(jetstream: &Context, consumer_name: &str) -> Result { - let filter_subject = format!("{}.{}.>", STREAM_NAME, S::SUBJECT); - let subscriber = StreamSubscriber::new(jetstream, STREAM_NAME, consumer_name) - .await? - .with_filter_subject(filter_subject); - Ok(Self { - subscriber, - _marker: PhantomData, - }) - } - - /// Create a subscriber without stage filtering (receives all stages). - /// - /// Note: This requires the job type to match at deserialization time, - /// so it's primarily useful for monitoring or debugging. - pub async fn new_unfiltered(jetstream: &Context, consumer_name: &str) -> Result { - let subscriber = StreamSubscriber::new(jetstream, STREAM_NAME, consumer_name).await?; - Ok(Self { - subscriber, - _marker: PhantomData, - }) - } - - /// Create a subscriber filtered to a specific file. - pub async fn new_for_file( - jetstream: &Context, - consumer_name: &str, - file_id: uuid::Uuid, - ) -> Result { - let filter_subject = format!("{}.{}.{}", STREAM_NAME, S::SUBJECT, file_id); - let subscriber = StreamSubscriber::new(jetstream, STREAM_NAME, consumer_name) - .await? - .with_filter_subject(filter_subject); - Ok(Self { - subscriber, - _marker: PhantomData, - }) - } -} diff --git a/crates/nvisy-nats/src/stream/document_task.rs b/crates/nvisy-nats/src/stream/document_task.rs deleted file mode 100644 index fc9c5b4..0000000 --- a/crates/nvisy-nats/src/stream/document_task.rs +++ /dev/null @@ -1,261 +0,0 @@ -//! Predefined document processing tasks. - -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Predefined processing tasks that can be applied to documents. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(tag = "task", rename_all = "camelCase")] -pub enum PredefinedTask { - /// Redact sensitive information matching patterns. - Redact { - /// Patterns to redact (emails, phone numbers, SSNs, etc.). - patterns: Vec, - }, - - /// Summarize document content. - Summarize { - /// Maximum length of summary. - #[serde(default, skip_serializing_if = "Option::is_none")] - max_length: Option, - }, - - /// Translate document to target language. - Translate { - /// Target language code (e.g., "es", "fr", "de"). - target_language: String, - }, - - /// Extract key information from document. - ExtractInfo { - /// Fields to extract (e.g., "dates", "names", "amounts"). - #[serde(default, skip_serializing_if = "Vec::is_empty")] - fields: Vec, - }, - - /// Insert information into document at specified locations. - InsertInfo { - /// Key-value pairs to insert. - values: Vec, - }, - - /// Generate information based on document content. - GenerateInfo { - /// Type of information to generate. - info_type: GenerateInfoType, - }, - - /// Reformat document structure. - Reformat { - /// Target format style. - #[serde(default, skip_serializing_if = "Option::is_none")] - style: Option, - }, - - /// Proofread and fix grammar/spelling. - Proofread, - - /// Generate table of contents. - GenerateToc, - - /// Split document into multiple files. - Split { - /// How to split the document. - strategy: SplitStrategy, - }, - - /// Merge multiple files into one document. - Merge { - /// File IDs to merge with this document. - file_ids: Vec, - /// Order of files in the merged document. - #[serde(default, skip_serializing_if = "Option::is_none")] - order: Option, - }, -} - -/// Value to insert into a document. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct InsertValue { - /// Field or placeholder name. - pub field: String, - /// Value to insert. - pub value: String, - /// Location hint (e.g., "header", "footer", "after:section1"). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub location: Option, -} - -/// Types of information that can be generated. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum GenerateInfoType { - /// Generate an executive summary. - ExecutiveSummary, - /// Generate keywords/tags. - Keywords, - /// Generate document metadata. - Metadata, - /// Generate abstract. - Abstract, - /// Generate key takeaways. - KeyTakeaways, - /// Generate action items. - ActionItems, - /// Generate FAQ from content. - Faq, -} - -/// Strategy for splitting documents. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(tag = "by", rename_all = "camelCase")] -pub enum SplitStrategy { - /// Split by page count. - Pages { - /// Number of pages per split. - pages_per_file: u32, - }, - /// Split by sections/chapters. - Sections, - /// Split by heading level. - Headings { - /// Heading level to split on (1-6). - level: u8, - }, - /// Split by file size. - Size { - /// Maximum size per file in bytes. - max_bytes: u64, - }, - /// Split at specific page numbers. - AtPages { - /// Page numbers to split at. - page_numbers: Vec, - }, -} - -/// Order for merging documents. -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum MergeOrder { - /// Use the order provided in file_ids. - #[default] - AsProvided, - /// Sort by filename alphabetically. - Alphabetical, - /// Sort by creation date. - ByDate, - /// Sort by file size. - BySize, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_predefined_task_redact() { - let task = PredefinedTask::Redact { - patterns: vec!["email".to_string(), "phone".to_string()], - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_translate() { - let task = PredefinedTask::Translate { - target_language: "es".to_string(), - }; - - let json = serde_json::to_string(&task).unwrap(); - assert!(json.contains("translate")); - assert!(json.contains("es")); - } - - #[test] - fn test_predefined_task_split() { - let task = PredefinedTask::Split { - strategy: SplitStrategy::Pages { pages_per_file: 10 }, - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_merge() { - let task = PredefinedTask::Merge { - file_ids: vec![Uuid::now_v7(), Uuid::now_v7()], - order: Some(MergeOrder::Alphabetical), - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_insert_info() { - let task = PredefinedTask::InsertInfo { - values: vec![ - InsertValue { - field: "company_name".to_string(), - value: "Acme Corp".to_string(), - location: Some("header".to_string()), - }, - InsertValue { - field: "date".to_string(), - value: "2024-01-15".to_string(), - location: None, - }, - ], - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_generate_info() { - let task = PredefinedTask::GenerateInfo { - info_type: GenerateInfoType::ExecutiveSummary, - }; - - let json = serde_json::to_string(&task).unwrap(); - assert!(json.contains("generateInfo")); - assert!(json.contains("executiveSummary")); - } - - #[test] - fn test_split_strategy_serialization() { - let strategies = vec![ - SplitStrategy::Pages { pages_per_file: 5 }, - SplitStrategy::Sections, - SplitStrategy::Headings { level: 2 }, - SplitStrategy::Size { - max_bytes: 1024 * 1024, - }, - SplitStrategy::AtPages { - page_numbers: vec![5, 10, 15], - }, - ]; - - for strategy in strategies { - let json = serde_json::to_string(&strategy).unwrap(); - let parsed: SplitStrategy = serde_json::from_str(&json).unwrap(); - assert_eq!(strategy, parsed); - } - } -} diff --git a/crates/nvisy-nats/src/stream/event.rs b/crates/nvisy-nats/src/stream/event.rs index 66af202..164e3d7 100644 --- a/crates/nvisy-nats/src/stream/event.rs +++ b/crates/nvisy-nats/src/stream/event.rs @@ -1,42 +1,79 @@ //! Event types for stream processing. //! -//! This module contains priority levels used across all event streams. +//! This module contains common event types and the file job type +//! used in processing pipelines. +use jiff::Timestamp; #[cfg(feature = "schema")] use schemars::JsonSchema; +use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; +use uuid::Uuid; -/// Event execution priority levels. +/// File processing job. /// -/// Priority determines the order in which events are processed when multiple -/// events are queued. Higher priority events are processed before lower priority ones. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[derive(Serialize, Deserialize)] +/// Represents a unit of work in a file processing pipeline. +/// Each job targets a specific file and carries a generic payload +/// that defines the processing parameters. +/// +/// The generic parameter `T` is the job-specific data payload. +/// Callers define their own payload types for different pipeline stages. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "lowercase")] -pub enum EventPriority { - /// Low priority - processed when system resources are available. - Low = 0, +#[serde(bound = "T: Serialize + DeserializeOwned")] +pub struct FileJob { + /// Unique job identifier (UUID v7 for time-ordering). + pub id: Uuid, + /// Database file ID to process. + pub file_id: Uuid, + /// Storage path in NATS object store (DocumentKey encoded). + pub object_key: String, + /// File extension for format detection. + pub file_extension: String, + /// Job-specific data payload. + pub data: T, + /// When the job was created. + pub created_at: Timestamp, + /// NATS subject to publish result to (for internal job chaining). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub callback_subject: Option, + /// Idempotency key to prevent duplicate job processing. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub idempotency_key: Option, +} - /// Normal priority - default for most events. - #[default] - Normal = 1, +impl FileJob { + /// Creates a new file job with the given data payload. + pub fn new(file_id: Uuid, object_key: String, file_extension: String, data: T) -> Self { + Self { + id: Uuid::now_v7(), + file_id, + object_key, + file_extension, + data, + created_at: Timestamp::now(), + callback_subject: None, + idempotency_key: None, + } + } - /// High priority - processed ahead of normal events. - High = 2, -} + /// Sets a callback subject for job chaining. + pub fn with_callback(mut self, subject: impl Into) -> Self { + self.callback_subject = Some(subject.into()); + self + } -impl EventPriority { - /// Returns the numeric value of the priority level. - #[inline] - pub const fn as_u8(self) -> u8 { - self as u8 + /// Sets an idempotency key. + pub fn with_idempotency_key(mut self, key: impl Into) -> Self { + self.idempotency_key = Some(key.into()); + self } - /// Returns true if this is a high priority event. - #[inline] - pub const fn is_high(self) -> bool { - matches!(self, Self::High) + /// Returns job age since creation. + pub fn age(&self) -> std::time::Duration { + let now = Timestamp::now(); + let signed_dur = now.duration_since(self.created_at); + std::time::Duration::from_secs(signed_dur.as_secs().max(0) as u64) } } @@ -44,29 +81,38 @@ impl EventPriority { mod tests { use super::*; - #[test] - fn test_priority_ordering() { - assert!(EventPriority::Low < EventPriority::Normal); - assert!(EventPriority::Normal < EventPriority::High); + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] + struct TestPayload { + message: String, } #[test] - fn test_priority_numeric_values() { - assert_eq!(EventPriority::Low.as_u8(), 0); - assert_eq!(EventPriority::Normal.as_u8(), 1); - assert_eq!(EventPriority::High.as_u8(), 2); - } + fn test_serialization_roundtrip() { + let file_id = Uuid::now_v7(); + let job = FileJob::new( + file_id, + "path".to_string(), + "pdf".to_string(), + TestPayload { + message: "hello".to_string(), + }, + ); - #[test] - fn test_priority_default() { - assert_eq!(EventPriority::default(), EventPriority::Normal); + let json = serde_json::to_string(&job).unwrap(); + let parsed: FileJob = serde_json::from_str(&json).unwrap(); + + assert_eq!(job.file_id, parsed.file_id); + assert_eq!(job.data, parsed.data); } #[test] - fn test_priority_serialization() { - let priority = EventPriority::High; - let serialized = serde_json::to_string(&priority).unwrap(); - let deserialized: EventPriority = serde_json::from_str(&serialized).unwrap(); - assert_eq!(priority, deserialized); + fn test_with_unit_payload() { + let file_id = Uuid::now_v7(); + let job: FileJob<()> = FileJob::new(file_id, "path".to_string(), "pdf".to_string(), ()); + + let json = serde_json::to_string(&job).unwrap(); + let parsed: FileJob<()> = serde_json::from_str(&json).unwrap(); + + assert_eq!(job.file_id, parsed.file_id); } } diff --git a/crates/nvisy-nats/src/stream/event_pub.rs b/crates/nvisy-nats/src/stream/event_pub.rs new file mode 100644 index 0000000..ee826b7 --- /dev/null +++ b/crates/nvisy-nats/src/stream/event_pub.rs @@ -0,0 +1,76 @@ +//! Generic event stream publisher. + +use std::marker::PhantomData; + +use async_nats::jetstream::Context; +use derive_more::{Deref, DerefMut}; +use serde::Serialize; + +use super::event_stream::EventStream; +use super::stream_pub::StreamPublisher; +use crate::Result; + +/// Generic event publisher for delivering typed events to workers. +/// +/// This publisher is generic over: +/// - `T`: The event/message type to publish +/// - `S`: The stream configuration (determines stream name, subject, etc.) +#[derive(Debug, Clone, Deref, DerefMut)] +pub struct EventPublisher +where + T: Serialize + Send + Sync + 'static, + S: EventStream, +{ + #[deref] + #[deref_mut] + publisher: StreamPublisher, + _stream: PhantomData, +} + +impl EventPublisher +where + T: Serialize + Send + Sync + 'static, + S: EventStream, +{ + /// Create a new event publisher for the stream type. + pub(crate) async fn new(jetstream: &Context) -> Result { + let publisher = StreamPublisher::new(jetstream, S::NAME).await?; + Ok(Self { + publisher, + _stream: PhantomData, + }) + } + + /// Publish an event to the stream's configured subject. + pub async fn publish(&self, event: &T) -> Result<()> { + self.publisher.publish(S::SUBJECT, event).await + } + + /// Publish an event with a sub-subject appended to the stream subject. + /// + /// Events are published to `{stream_subject}.{sub_subject}`. + pub async fn publish_to(&self, sub_subject: &str, event: &T) -> Result<()> { + let subject = format!("{}.{}", S::SUBJECT, sub_subject); + self.publisher.publish(&subject, event).await + } + + /// Publish multiple events to the stream's configured subject. + pub async fn publish_batch(&self, events: &[T]) -> Result<()> + where + T: Clone, + { + self.publisher.publish_batch(S::SUBJECT, events).await + } + + /// Returns the stream name. + #[inline] + pub fn stream_name(&self) -> &'static str { + S::NAME + } + + /// Returns the subject. + #[inline] + pub fn subject(&self) -> &'static str { + S::SUBJECT + } +} diff --git a/crates/nvisy-nats/src/stream/event_stream.rs b/crates/nvisy-nats/src/stream/event_stream.rs new file mode 100644 index 0000000..3fb0efb --- /dev/null +++ b/crates/nvisy-nats/src/stream/event_stream.rs @@ -0,0 +1,74 @@ +//! Event stream configuration for NATS JetStream. + +use std::time::Duration; + +/// Marker trait for event streams. +/// +/// This trait defines the configuration for a NATS JetStream stream. +pub trait EventStream: Clone + Send + Sync + 'static { + /// Stream name used in NATS JetStream. + const NAME: &'static str; + + /// Subject pattern for publishing/subscribing to this stream. + const SUBJECT: &'static str; + + /// Maximum age for messages in this stream. + /// Returns `None` for streams where messages should not expire. + const MAX_AGE: Option; + + /// Default consumer name for this stream. + const CONSUMER_NAME: &'static str; +} + +/// Stream for file processing jobs. +/// +/// Messages expire after 7 days. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct FileStream; + +impl EventStream for FileStream { + const CONSUMER_NAME: &'static str = "file-worker"; + const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); + const NAME: &'static str = "FILE_JOBS"; + const SUBJECT: &'static str = "file.jobs"; +} + +/// Stream for webhook delivery. +/// +/// Messages expire after 1 day. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct WebhookStream; + +impl EventStream for WebhookStream { + const CONSUMER_NAME: &'static str = "webhook-worker"; + const MAX_AGE: Option = Some(Duration::from_secs(24 * 60 * 60)); + const NAME: &'static str = "WEBHOOKS"; + const SUBJECT: &'static str = "webhooks"; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_file_stream() { + assert_eq!(FileStream::NAME, "FILE_JOBS"); + assert_eq!(FileStream::SUBJECT, "file.jobs"); + assert_eq!( + FileStream::MAX_AGE, + Some(Duration::from_secs(7 * 24 * 60 * 60)) + ); + assert_eq!(FileStream::CONSUMER_NAME, "file-worker"); + } + + #[test] + fn test_webhook_stream() { + assert_eq!(WebhookStream::NAME, "WEBHOOKS"); + assert_eq!(WebhookStream::SUBJECT, "webhooks"); + assert_eq!( + WebhookStream::MAX_AGE, + Some(Duration::from_secs(24 * 60 * 60)) + ); + assert_eq!(WebhookStream::CONSUMER_NAME, "webhook-worker"); + } +} diff --git a/crates/nvisy-nats/src/stream/event_sub.rs b/crates/nvisy-nats/src/stream/event_sub.rs new file mode 100644 index 0000000..974fd59 --- /dev/null +++ b/crates/nvisy-nats/src/stream/event_sub.rs @@ -0,0 +1,63 @@ +//! Generic event stream subscriber. + +use std::marker::PhantomData; + +use async_nats::jetstream::Context; +use derive_more::{Deref, DerefMut}; +use serde::de::DeserializeOwned; + +use super::event_stream::EventStream; +use super::stream_sub::StreamSubscriber; +use crate::Result; + +/// Generic event subscriber for consuming typed events. +/// +/// This subscriber is generic over: +/// - `T`: The event/message type to consume +/// - `S`: The stream configuration (determines stream name, subject, consumer name) +#[derive(Debug, Deref, DerefMut)] +pub struct EventSubscriber +where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, +{ + #[deref] + #[deref_mut] + subscriber: StreamSubscriber, + _stream: PhantomData, +} + +impl EventSubscriber +where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, +{ + /// Create a new event subscriber using the stream's default consumer name. + pub(crate) async fn new(jetstream: &Context) -> Result { + let subscriber = StreamSubscriber::new(jetstream, S::NAME, S::CONSUMER_NAME) + .await? + .with_filter_subject(format!("{}.>", S::NAME)); + Ok(Self { + subscriber, + _stream: PhantomData, + }) + } + + /// Returns the stream name. + #[inline] + pub fn stream_name(&self) -> &'static str { + S::NAME + } + + /// Returns the subject. + #[inline] + pub fn subject(&self) -> &'static str { + S::SUBJECT + } + + /// Returns the consumer name. + #[inline] + pub fn consumer_name(&self) -> &'static str { + S::CONSUMER_NAME + } +} diff --git a/crates/nvisy-nats/src/stream/mod.rs b/crates/nvisy-nats/src/stream/mod.rs index 5115c07..1a939c2 100644 --- a/crates/nvisy-nats/src/stream/mod.rs +++ b/crates/nvisy-nats/src/stream/mod.rs @@ -2,44 +2,19 @@ //! //! This module provides type-safe streaming capabilities for: //! -//! - Document processing jobs -//! - Workspace event jobs +//! - File processing jobs via [`FileJob`], [`EventPublisher`], [`EventSubscriber`] +//! - Generic event publishing and subscribing with stream configuration via [`EventStream`] -// Base types mod event; -mod publisher; -mod subscriber; +mod event_pub; +mod event_stream; +mod event_sub; +mod stream_pub; +mod stream_sub; -// Document job -mod document_job; -mod document_job_pub; -mod document_job_sub; -mod document_task; - -// Workspace event -mod workspace_event; -mod workspace_event_pub; -mod workspace_event_sub; - -pub use document_job::{ - CompressionLevel, DocumentJob, PostprocessingData, PreprocessingData, ProcessingData, - ProcessingQuality, STREAM_NAME as DOCUMENT_JOB_STREAM, Stage, -}; -pub use document_job_pub::DocumentJobPublisher; -pub use document_job_sub::DocumentJobSubscriber; -pub use document_task::{GenerateInfoType, InsertValue, MergeOrder, PredefinedTask, SplitStrategy}; -pub use event::EventPriority; -pub use publisher::StreamPublisher; -pub use subscriber::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; -pub use workspace_event::{ - DocumentCreatedEvent, DocumentDeletedEvent, DocumentUpdateEvent, ErrorEvent, - FilePostprocessedEvent, FilePreprocessedEvent, FileTransformedEvent, JobCompletedEvent, - JobFailedEvent, JobProgressEvent, JobStage, JoinEvent, LeaveEvent, MemberAddedEvent, - MemberPresenceEvent, MemberRemovedEvent, PostprocessingType, PreprocessingType, - TransformationType, TypingEvent, WorkspaceEvent, WorkspaceUpdatedEvent, WorkspaceWsMessage, -}; -pub use workspace_event_pub::WorkspaceEventPublisher; -pub use workspace_event_sub::{ - WorkspaceEventBatchStream, WorkspaceEventMessage, WorkspaceEventStream, - WorkspaceEventSubscriber, -}; +pub use event::FileJob; +pub use event_pub::EventPublisher; +pub use event_stream::{EventStream, FileStream, WebhookStream}; +pub use event_sub::EventSubscriber; +pub use stream_pub::StreamPublisher; +pub use stream_sub::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; diff --git a/crates/nvisy-nats/src/stream/publisher.rs b/crates/nvisy-nats/src/stream/stream_pub.rs similarity index 98% rename from crates/nvisy-nats/src/stream/publisher.rs rename to crates/nvisy-nats/src/stream/stream_pub.rs index 4e143cc..4dde9e0 100644 --- a/crates/nvisy-nats/src/stream/publisher.rs +++ b/crates/nvisy-nats/src/stream/stream_pub.rs @@ -34,7 +34,7 @@ where { /// Create a new type-safe stream publisher #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub async fn new(jetstream: &Context, stream_name: &str) -> Result { + pub(crate) async fn new(jetstream: &Context, stream_name: &str) -> Result { let stream_config = stream::Config { name: stream_name.to_string(), description: Some(format!("Type-safe stream: {}", stream_name)), diff --git a/crates/nvisy-nats/src/stream/subscriber.rs b/crates/nvisy-nats/src/stream/stream_sub.rs similarity index 93% rename from crates/nvisy-nats/src/stream/subscriber.rs rename to crates/nvisy-nats/src/stream/stream_sub.rs index e3dc33c..458448d 100644 --- a/crates/nvisy-nats/src/stream/subscriber.rs +++ b/crates/nvisy-nats/src/stream/stream_sub.rs @@ -37,7 +37,11 @@ where { /// Create a new type-safe stream subscriber. #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub async fn new(jetstream: &Context, stream_name: &str, consumer_name: &str) -> Result { + pub(crate) async fn new( + jetstream: &Context, + stream_name: &str, + consumer_name: &str, + ) -> Result { // Verify stream exists jetstream .get_stream(stream_name) @@ -255,38 +259,6 @@ where .map_err(|e| Error::operation("consumer_info", e.to_string())) .map(|info| (*info).clone()) } - - /// Create a new subscriber with exponential backoff retry logic. - #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub async fn new_with_retry( - jetstream: &Context, - stream_name: &str, - consumer_name: &str, - max_retries: u32, - ) -> Result { - let mut attempts = 0; - let mut delay = std::time::Duration::from_millis(100); - - loop { - match Self::new(jetstream, stream_name, consumer_name).await { - Ok(subscriber) => return Ok(subscriber), - Err(e) if attempts < max_retries => { - attempts += 1; - debug!( - target: TRACING_TARGET_STREAM, - attempt = attempts, - max_retries = max_retries, - delay_ms = delay.as_millis(), - error = %e, - "Retrying subscriber creation" - ); - tokio::time::sleep(delay).await; - delay = std::cmp::min(delay * 2, std::time::Duration::from_secs(30)); - } - Err(e) => return Err(e), - } - } - } } /// Type-safe message stream wrapper. diff --git a/crates/nvisy-nats/src/stream/workspace_event.rs b/crates/nvisy-nats/src/stream/workspace_event.rs deleted file mode 100644 index 75a81b2..0000000 --- a/crates/nvisy-nats/src/stream/workspace_event.rs +++ /dev/null @@ -1,657 +0,0 @@ -//! Workspace event stream for real-time WebSocket communication. -//! -//! This module provides NATS-based pub/sub for workspace WebSocket messages, -//! enabling distributed real-time communication across multiple server instances. - -use jiff::Timestamp; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Member joined the workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JoinEvent { - pub account_id: Uuid, - pub display_name: String, - pub timestamp: Timestamp, -} - -/// Member left the workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct LeaveEvent { - pub account_id: Uuid, - pub timestamp: Timestamp, -} - -/// Document content update event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct DocumentUpdateEvent { - pub document_id: Uuid, - pub version: u32, - pub updated_by: Uuid, - pub timestamp: Timestamp, -} - -/// Document created event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct DocumentCreatedEvent { - pub document_id: Uuid, - pub display_name: String, - pub created_by: Uuid, - pub timestamp: Timestamp, -} - -/// Document deleted event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct DocumentDeletedEvent { - pub document_id: Uuid, - pub deleted_by: Uuid, - pub timestamp: Timestamp, -} - -/// Type of preprocessing operation completed. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum PreprocessingType { - /// File metadata validation completed. - Validation, - /// OCR text extraction completed. - Ocr, - /// Embeddings generation completed. - Embeddings, - /// Thumbnail generation completed. - Thumbnails, - /// All preprocessing steps completed. - Complete, -} - -/// File preprocessing completed event. -/// -/// Emitted when a preprocessing step (validation, OCR, embeddings) completes. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct FilePreprocessedEvent { - pub file_id: Uuid, - pub document_id: Uuid, - pub preprocessing_type: PreprocessingType, - #[serde(skip_serializing_if = "Option::is_none")] - pub details: Option, - pub timestamp: Timestamp, -} - -/// Type of transformation applied to the file. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum TransformationType { - /// Content was redacted. - Redaction, - /// Content was summarized. - Summary, - /// Content was translated. - Translation, - /// Information was extracted. - Extraction, - /// Information was inserted. - Insertion, - /// Content was reformatted. - Reformat, - /// Content was proofread. - Proofread, - /// Table of contents was generated. - TableOfContents, - /// File was split into multiple files. - Split, - /// Multiple files were merged. - Merge, - /// Custom VLM-based transformation. - Custom, -} - -/// File transformed event. -/// -/// Emitted when a document processing transformation completes. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct FileTransformedEvent { - pub file_id: Uuid, - pub document_id: Uuid, - pub transformation_type: TransformationType, - /// For split operations, the resulting file IDs. - #[serde(skip_serializing_if = "Option::is_none")] - pub result_file_ids: Option>, - /// Human-readable summary of the transformation. - #[serde(skip_serializing_if = "Option::is_none")] - pub summary: Option, - pub transformed_by: Uuid, - pub timestamp: Timestamp, -} - -/// Type of postprocessing operation completed. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum PostprocessingType { - /// Format conversion completed. - Conversion, - /// Compression completed. - Compression, - /// Annotations flattened into document. - FlattenAnnotations, - /// All postprocessing steps completed. - Complete, -} - -/// File postprocessed event. -/// -/// Emitted when a postprocessing step (conversion, compression) completes. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct FilePostprocessedEvent { - pub file_id: Uuid, - pub document_id: Uuid, - pub postprocessing_type: PostprocessingType, - /// The output format if conversion was performed. - #[serde(skip_serializing_if = "Option::is_none")] - pub output_format: Option, - pub timestamp: Timestamp, -} - -/// Job processing stage for progress tracking. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum JobStage { - Preprocessing, - Processing, - Postprocessing, -} - -/// Job progress event. -/// -/// Emitted periodically during long-running jobs to indicate progress. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JobProgressEvent { - pub job_id: Uuid, - pub file_id: Uuid, - pub document_id: Uuid, - pub stage: JobStage, - /// Progress percentage (0-100). - pub progress: u8, - /// Current operation being performed. - #[serde(skip_serializing_if = "Option::is_none")] - pub current_operation: Option, - pub timestamp: Timestamp, -} - -/// Job completed event. -/// -/// Emitted when an entire document processing job completes successfully. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JobCompletedEvent { - pub job_id: Uuid, - pub file_id: Uuid, - pub document_id: Uuid, - /// The final output file ID (may differ from input if transformations created new files). - #[serde(skip_serializing_if = "Option::is_none")] - pub output_file_id: Option, - pub timestamp: Timestamp, -} - -/// Job failed event. -/// -/// Emitted when a document processing job fails. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JobFailedEvent { - pub job_id: Uuid, - pub file_id: Uuid, - pub document_id: Uuid, - pub stage: JobStage, - pub error_code: String, - pub error_message: String, - pub timestamp: Timestamp, -} - -/// Member presence update event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct MemberPresenceEvent { - pub account_id: Uuid, - pub is_online: bool, - pub timestamp: Timestamp, -} - -/// Member added to workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct MemberAddedEvent { - pub account_id: Uuid, - pub display_name: String, - pub member_role: String, - pub added_by: Uuid, - pub timestamp: Timestamp, -} - -/// Member removed from workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct MemberRemovedEvent { - pub account_id: Uuid, - pub removed_by: Uuid, - pub timestamp: Timestamp, -} - -/// Workspace settings updated event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct WorkspaceUpdatedEvent { - pub display_name: Option, - pub updated_by: Uuid, - pub timestamp: Timestamp, -} - -/// Typing indicator event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct TypingEvent { - pub account_id: Uuid, - pub document_id: Option, - pub timestamp: Timestamp, -} - -/// Error event from server. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct ErrorEvent { - pub code: String, - pub message: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub details: Option, -} - -/// WebSocket message types for workspace communication. -/// -/// All messages are serialized as JSON with a `type` field that identifies -/// the message variant. This enables type-safe message handling on both -/// client and server. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(tag = "type", rename_all = "camelCase")] -pub enum WorkspaceWsMessage { - /// Client announces presence in the workspace. - Join(JoinEvent), - - /// Client leaves the workspace. - Leave(LeaveEvent), - - /// Document content update notification. - DocumentUpdate(DocumentUpdateEvent), - - /// Document creation notification. - DocumentCreated(DocumentCreatedEvent), - - /// Document deletion notification. - DocumentDeleted(DocumentDeletedEvent), - - /// File preprocessing step completed (validation, OCR, embeddings). - FilePreprocessed(FilePreprocessedEvent), - - /// File transformation completed (redaction, translation, etc.). - FileTransformed(FileTransformedEvent), - - /// File postprocessing step completed (conversion, compression). - FilePostprocessed(FilePostprocessedEvent), - - /// Job progress update. - JobProgress(JobProgressEvent), - - /// Job completed successfully. - JobCompleted(JobCompletedEvent), - - /// Job failed. - JobFailed(JobFailedEvent), - - /// Member presence update. - MemberPresence(MemberPresenceEvent), - - /// Member added to workspace. - MemberAdded(MemberAddedEvent), - - /// Member removed from workspace. - MemberRemoved(MemberRemovedEvent), - - /// Workspace settings updated. - WorkspaceUpdated(WorkspaceUpdatedEvent), - - /// Typing indicator. - Typing(TypingEvent), - - /// Error message from server. - Error(ErrorEvent), -} - -impl WorkspaceWsMessage { - /// Creates an error message with the given code and message. - #[inline] - pub fn error(code: impl Into, message: impl Into) -> Self { - Self::Error(ErrorEvent { - code: code.into(), - message: message.into(), - details: None, - }) - } - - /// Creates an error message with additional details. - #[inline] - pub fn error_with_details( - code: impl Into, - message: impl Into, - details: impl Into, - ) -> Self { - Self::Error(ErrorEvent { - code: code.into(), - message: message.into(), - details: Some(details.into()), - }) - } - - /// Get the account ID associated with this message, if any. - pub fn account_id(&self) -> Option { - match self { - Self::Join(e) => Some(e.account_id), - Self::Leave(e) => Some(e.account_id), - Self::DocumentUpdate(e) => Some(e.updated_by), - Self::DocumentCreated(e) => Some(e.created_by), - Self::DocumentDeleted(e) => Some(e.deleted_by), - Self::FilePreprocessed(_) => None, - Self::FileTransformed(e) => Some(e.transformed_by), - Self::FilePostprocessed(_) => None, - Self::JobProgress(_) => None, - Self::JobCompleted(_) => None, - Self::JobFailed(_) => None, - Self::MemberPresence(e) => Some(e.account_id), - Self::MemberAdded(e) => Some(e.account_id), - Self::MemberRemoved(e) => Some(e.account_id), - Self::WorkspaceUpdated(e) => Some(e.updated_by), - Self::Typing(e) => Some(e.account_id), - Self::Error(_) => None, - } - } - - /// Get the timestamp of this message. - pub fn timestamp(&self) -> Option { - match self { - Self::Join(e) => Some(e.timestamp), - Self::Leave(e) => Some(e.timestamp), - Self::DocumentUpdate(e) => Some(e.timestamp), - Self::DocumentCreated(e) => Some(e.timestamp), - Self::DocumentDeleted(e) => Some(e.timestamp), - Self::FilePreprocessed(e) => Some(e.timestamp), - Self::FileTransformed(e) => Some(e.timestamp), - Self::FilePostprocessed(e) => Some(e.timestamp), - Self::JobProgress(e) => Some(e.timestamp), - Self::JobCompleted(e) => Some(e.timestamp), - Self::JobFailed(e) => Some(e.timestamp), - Self::MemberPresence(e) => Some(e.timestamp), - Self::MemberAdded(e) => Some(e.timestamp), - Self::MemberRemoved(e) => Some(e.timestamp), - Self::WorkspaceUpdated(e) => Some(e.timestamp), - Self::Typing(e) => Some(e.timestamp), - Self::Error(_) => None, - } - } - - /// Create a join event. - pub fn join(account_id: Uuid, display_name: impl Into) -> Self { - Self::Join(JoinEvent { - account_id, - display_name: display_name.into(), - timestamp: Timestamp::now(), - }) - } - - /// Create a leave event. - pub fn leave(account_id: Uuid) -> Self { - Self::Leave(LeaveEvent { - account_id, - timestamp: Timestamp::now(), - }) - } - - /// Create a typing event. - pub fn typing(account_id: Uuid, document_id: Option) -> Self { - Self::Typing(TypingEvent { - account_id, - document_id, - timestamp: Timestamp::now(), - }) - } - - /// Create a document update event. - pub fn document_update(document_id: Uuid, version: u32, updated_by: Uuid) -> Self { - Self::DocumentUpdate(DocumentUpdateEvent { - document_id, - version, - updated_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a document created event. - pub fn document_created( - document_id: Uuid, - display_name: impl Into, - created_by: Uuid, - ) -> Self { - Self::DocumentCreated(DocumentCreatedEvent { - document_id, - display_name: display_name.into(), - created_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a document deleted event. - pub fn document_deleted(document_id: Uuid, deleted_by: Uuid) -> Self { - Self::DocumentDeleted(DocumentDeletedEvent { - document_id, - deleted_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a file preprocessed event. - pub fn file_preprocessed( - file_id: Uuid, - document_id: Uuid, - preprocessing_type: PreprocessingType, - ) -> Self { - Self::FilePreprocessed(FilePreprocessedEvent { - file_id, - document_id, - preprocessing_type, - details: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a file transformed event. - pub fn file_transformed( - file_id: Uuid, - document_id: Uuid, - transformation_type: TransformationType, - transformed_by: Uuid, - ) -> Self { - Self::FileTransformed(FileTransformedEvent { - file_id, - document_id, - transformation_type, - result_file_ids: None, - summary: None, - transformed_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a file postprocessed event. - pub fn file_postprocessed( - file_id: Uuid, - document_id: Uuid, - postprocessing_type: PostprocessingType, - ) -> Self { - Self::FilePostprocessed(FilePostprocessedEvent { - file_id, - document_id, - postprocessing_type, - output_format: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a job progress event. - pub fn job_progress( - job_id: Uuid, - file_id: Uuid, - document_id: Uuid, - stage: JobStage, - progress: u8, - ) -> Self { - Self::JobProgress(JobProgressEvent { - job_id, - file_id, - document_id, - stage, - progress: progress.min(100), - current_operation: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a job completed event. - pub fn job_completed(job_id: Uuid, file_id: Uuid, document_id: Uuid) -> Self { - Self::JobCompleted(JobCompletedEvent { - job_id, - file_id, - document_id, - output_file_id: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a job failed event. - pub fn job_failed( - job_id: Uuid, - file_id: Uuid, - document_id: Uuid, - stage: JobStage, - error_code: impl Into, - error_message: impl Into, - ) -> Self { - Self::JobFailed(JobFailedEvent { - job_id, - file_id, - document_id, - stage, - error_code: error_code.into(), - error_message: error_message.into(), - timestamp: Timestamp::now(), - }) - } - - /// Create a member presence event. - pub fn member_presence(account_id: Uuid, is_online: bool) -> Self { - Self::MemberPresence(MemberPresenceEvent { - account_id, - is_online, - timestamp: Timestamp::now(), - }) - } - - /// Create a member added event. - pub fn member_added( - account_id: Uuid, - display_name: impl Into, - member_role: impl Into, - added_by: Uuid, - ) -> Self { - Self::MemberAdded(MemberAddedEvent { - account_id, - display_name: display_name.into(), - member_role: member_role.into(), - added_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a member removed event. - pub fn member_removed(account_id: Uuid, removed_by: Uuid) -> Self { - Self::MemberRemoved(MemberRemovedEvent { - account_id, - removed_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a workspace updated event. - pub fn workspace_updated(display_name: Option, updated_by: Uuid) -> Self { - Self::WorkspaceUpdated(WorkspaceUpdatedEvent { - display_name, - updated_by, - timestamp: Timestamp::now(), - }) - } -} - -/// Workspace event envelope for NATS publishing. -/// -/// Wraps the WebSocket message with metadata for routing and filtering. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct WorkspaceEvent { - /// The workspace this event belongs to. - pub workspace_id: Uuid, - - /// The WebSocket message payload. - pub message: WorkspaceWsMessage, - - /// When this event was created. - pub created_at: Timestamp, -} - -impl WorkspaceEvent { - /// Create a new workspace event. - pub fn new(workspace_id: Uuid, message: WorkspaceWsMessage) -> Self { - Self { - workspace_id, - message, - created_at: Timestamp::now(), - } - } -} diff --git a/crates/nvisy-nats/src/stream/workspace_event_pub.rs b/crates/nvisy-nats/src/stream/workspace_event_pub.rs deleted file mode 100644 index 3152eeb..0000000 --- a/crates/nvisy-nats/src/stream/workspace_event_pub.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! Workspace event stream publisher for real-time WebSocket communication. - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; -use uuid::Uuid; - -use super::publisher::StreamPublisher; -use super::workspace_event::{WorkspaceEvent, WorkspaceWsMessage}; -use crate::Result; - -/// Workspace event publisher for broadcasting WebSocket messages. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct WorkspaceEventPublisher { - #[deref] - #[deref_mut] - publisher: StreamPublisher, -} - -impl WorkspaceEventPublisher { - /// Create a new workspace event publisher. - pub async fn new(jetstream: &Context) -> Result { - let publisher = StreamPublisher::new(jetstream, "PROJECT_EVENTS").await?; - Ok(Self { publisher }) - } - - /// Publish a WebSocket message to a specific workspace. - /// - /// Messages are published to the subject `PROJECT_EVENTS.{workspace_id}`. - pub async fn publish_message( - &self, - workspace_id: Uuid, - message: WorkspaceWsMessage, - ) -> Result<()> { - let event = WorkspaceEvent::new(workspace_id, message); - let subject = workspace_id.to_string(); - self.publisher.publish(&subject, &event).await - } - - /// Publish multiple messages to a workspace in batch. - pub async fn publish_batch( - &self, - workspace_id: Uuid, - messages: Vec, - ) -> Result<()> { - let events: Vec = messages - .into_iter() - .map(|msg| WorkspaceEvent::new(workspace_id, msg)) - .collect(); - - let subject = workspace_id.to_string(); - self.publisher - .publish_batch_parallel(&subject, &events, 10) - .await - } -} diff --git a/crates/nvisy-nats/src/stream/workspace_event_sub.rs b/crates/nvisy-nats/src/stream/workspace_event_sub.rs deleted file mode 100644 index e05fce9..0000000 --- a/crates/nvisy-nats/src/stream/workspace_event_sub.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! Workspace event stream subscriber for real-time WebSocket communication. - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; -use uuid::Uuid; - -use super::subscriber::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; -use super::workspace_event::WorkspaceEvent; -use crate::Result; - -/// Workspace event subscriber for receiving WebSocket messages. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct WorkspaceEventSubscriber { - #[deref] - #[deref_mut] - subscriber: StreamSubscriber, -} - -impl WorkspaceEventSubscriber { - /// Create a new workspace event subscriber. - /// - /// # Arguments - /// - /// * `jetstream` - JetStream context - /// * `consumer_name` - Unique name for this consumer (e.g., "server-instance-1") - pub async fn new(jetstream: &Context, consumer_name: &str) -> Result { - let subscriber = StreamSubscriber::new(jetstream, "PROJECT_EVENTS", consumer_name).await?; - Ok(Self { subscriber }) - } - - /// Create a subscriber filtered to a specific workspace. - /// - /// Only receives events for the specified workspace ID. - pub async fn new_for_workspace( - jetstream: &Context, - consumer_name: &str, - workspace_id: Uuid, - ) -> Result { - let subscriber = StreamSubscriber::new(jetstream, "PROJECT_EVENTS", consumer_name) - .await? - .with_filter_subject(format!("PROJECT_EVENTS.{}", workspace_id)); - Ok(Self { subscriber }) - } -} - -/// Type alias for workspace event batch stream. -pub type WorkspaceEventBatchStream = TypedBatchStream; - -/// Type alias for workspace event message. -pub type WorkspaceEventMessage = TypedMessage; - -/// Type alias for workspace event message stream. -pub type WorkspaceEventStream = TypedMessageStream; diff --git a/crates/nvisy-postgres/Cargo.toml b/crates/nvisy-postgres/Cargo.toml index 8b9194e..b15d4c8 100644 --- a/crates/nvisy-postgres/Cargo.toml +++ b/crates/nvisy-postgres/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-postgres" +description = "Type-safe PostgreSQL database layer for the Nvisy platform with async connection pooling" +readme = "./README.md" +keywords = ["postgres", "database", "orm", "diesel", "async"] +categories = ["database", "development-tools"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-postgres/README.md b/crates/nvisy-postgres/README.md index ddaf54a..98c7851 100644 --- a/crates/nvisy-postgres/README.md +++ b/crates/nvisy-postgres/README.md @@ -1,12 +1,10 @@ # nvisy-postgres +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Type-safe PostgreSQL database layer for the Nvisy platform with async connection pooling and embedded migrations. -[![Rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![Diesel](https://img.shields.io/badge/Diesel-2.3+-000000?style=flat-square&logo=rust&logoColor=white)](https://diesel.rs/) -[![PostgreSQL](https://img.shields.io/badge/PostgreSQL-17+-000000?style=flat-square&logo=postgresql&logoColor=white)](https://www.postgresql.org/) - ## Features - **Async Connection Pooling** - High-performance connection management with @@ -32,3 +30,18 @@ make generate-migrations The generated schema is located at `src/schema.rs` and provides type-safe table definitions for Diesel queries. + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-postgres/src/model/account_api_token.rs b/crates/nvisy-postgres/src/model/account_api_token.rs index 67ede27..ac01344 100644 --- a/crates/nvisy-postgres/src/model/account_api_token.rs +++ b/crates/nvisy-postgres/src/model/account_api_token.rs @@ -6,8 +6,10 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::account_api_tokens; -use crate::types::constants::token; -use crate::types::{ApiTokenType, HasCreatedAt, HasExpiresAt, HasSecurityContext}; +use crate::types::{ + ApiTokenType, EXPIRY_WARNING_MINUTES, HasCreatedAt, HasExpiresAt, HasSecurityContext, + LONG_LIVED_THRESHOLD_HOURS, +}; /// Account API token model representing an authentication token. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] @@ -139,7 +141,7 @@ impl AccountApiToken { /// Returns whether the token is about to expire (within warning threshold). pub fn is_expiring_soon_default(&self) -> bool { - self.is_expiring_soon(token::EXPIRY_WARNING_MINUTES) + self.is_expiring_soon(EXPIRY_WARNING_MINUTES) } /// Returns whether this is a web token. @@ -159,7 +161,7 @@ impl AccountApiToken { /// Returns whether the token is long-lived (active for more than 24 hours). pub fn is_long_lived(&self) -> bool { - i64::from(self.token_duration().get_hours()) > token::LONG_LIVED_THRESHOLD_HOURS + i64::from(self.token_duration().get_hours()) > LONG_LIVED_THRESHOLD_HOURS } /// Returns a shortened version of the token ID for logging/display. diff --git a/crates/nvisy-postgres/src/model/account_notification.rs b/crates/nvisy-postgres/src/model/account_notification.rs index 8521db8..4a1dbde 100644 --- a/crates/nvisy-postgres/src/model/account_notification.rs +++ b/crates/nvisy-postgres/src/model/account_notification.rs @@ -5,8 +5,7 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::account_notifications; -use crate::types::constants::notification; -use crate::types::{HasCreatedAt, HasExpiresAt, NotificationEvent}; +use crate::types::{DEFAULT_RETENTION_DAYS, HasCreatedAt, HasExpiresAt, NotificationEvent}; /// Account notification model representing a notification sent to a user. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] @@ -186,9 +185,7 @@ impl HasExpiresAt for AccountNotification { Some( self.expires_at.map(Into::into).unwrap_or( jiff::Timestamp::now() - .checked_add( - jiff::Span::new().hours(notification::DEFAULT_RETENTION_DAYS as i64 * 24), - ) + .checked_add(jiff::Span::new().hours(DEFAULT_RETENTION_DAYS as i64 * 24)) .expect("valid notification expiry"), ), ) diff --git a/crates/nvisy-postgres/src/model/document.rs b/crates/nvisy-postgres/src/model/document.rs deleted file mode 100644 index ff92e47..0000000 --- a/crates/nvisy-postgres/src/model/document.rs +++ /dev/null @@ -1,143 +0,0 @@ -//! Main document model for PostgreSQL database operations. - -use diesel::prelude::*; -use jiff_diesel::Timestamp; -use uuid::Uuid; - -use crate::schema::documents; -use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt, Tags}; - -/// Main document model representing a document within a workspace. -#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = documents)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct Document { - /// Unique document identifier. - pub id: Uuid, - /// Reference to the workspace this document belongs to. - pub workspace_id: Uuid, - /// Reference to the account that owns this document. - pub account_id: Uuid, - /// Human-readable document name. - pub display_name: String, - /// Detailed description of the document. - pub description: Option, - /// Tags for document classification and search. - pub tags: Vec>, - /// Additional document metadata. - pub metadata: serde_json::Value, - /// Timestamp when the document was created. - pub created_at: Timestamp, - /// Timestamp when the document was last updated. - pub updated_at: Timestamp, - /// Timestamp when the document was soft-deleted. - pub deleted_at: Option, -} - -/// Data for creating a new document. -#[derive(Debug, Default, Clone, Insertable)] -#[diesel(table_name = documents)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocument { - /// Workspace ID. - pub workspace_id: Uuid, - /// Account ID. - pub account_id: Uuid, - /// Document name. - pub display_name: Option, - /// Document description. - pub description: Option, - /// Document tags. - pub tags: Option>>, - /// Metadata. - pub metadata: Option, -} - -/// Data for updating a document. -#[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = documents)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocument { - /// Document name. - pub display_name: Option, - /// Document description. - pub description: Option>, - /// Document tags. - pub tags: Option>>, - /// Metadata. - pub metadata: Option, -} - -impl Document { - /// Returns the flattened tags (removing None values). - pub fn tags(&self) -> Vec { - let tags = self.tags.clone(); - tags.into_iter().flatten().collect() - } - - /// Returns whether the document is deleted. - pub fn is_deleted(&self) -> bool { - self.deleted_at.is_some() - } - - /// Returns whether the document has tags. - pub fn has_tags(&self) -> bool { - !self.tags.is_empty() - } - - /// Returns whether the document contains a specific tag. - pub fn has_tag(&self, tag: &str) -> bool { - self.tags - .iter() - .any(|t| t.as_ref() == Some(&tag.to_string())) - } - - /// Returns the flattened tags (removing None values). - pub fn get_tags(&self) -> Vec { - self.tags.iter().filter_map(|tag| tag.clone()).collect() - } - - /// Returns the tags as a Tags helper. - pub fn tags_helper(&self) -> Tags { - Tags::from_optional_strings(self.tags.clone()) - } - - /// Returns whether the document has a description. - pub fn has_description(&self) -> bool { - self.description - .as_deref() - .is_some_and(|desc| !desc.is_empty()) - } - - /// Returns whether the document has custom metadata. - pub fn has_metadata(&self) -> bool { - !self.metadata.as_object().is_none_or(|obj| obj.is_empty()) - } - - /// Returns the document's display name or a default. - pub fn display_name_or_default(&self) -> &str { - if self.display_name.is_empty() { - "Untitled Document" - } else { - &self.display_name - } - } -} - -impl HasCreatedAt for Document { - fn created_at(&self) -> jiff::Timestamp { - self.created_at.into() - } -} - -impl HasUpdatedAt for Document { - fn updated_at(&self) -> jiff::Timestamp { - self.updated_at.into() - } -} - -impl HasDeletedAt for Document { - fn deleted_at(&self) -> Option { - self.deleted_at.map(Into::into) - } -} diff --git a/crates/nvisy-postgres/src/model/document_comment.rs b/crates/nvisy-postgres/src/model/document_comment.rs deleted file mode 100644 index f67e27e..0000000 --- a/crates/nvisy-postgres/src/model/document_comment.rs +++ /dev/null @@ -1,152 +0,0 @@ -//! Document comment model for PostgreSQL database operations. - -use diesel::prelude::*; -use jiff_diesel::Timestamp; -use uuid::Uuid; - -use crate::schema::document_comments; -use crate::types::constants::comment; -use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt}; - -/// Document comment model representing user discussions on files. -#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = document_comments)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentComment { - /// Unique comment identifier. - pub id: Uuid, - /// Reference to the parent file. - pub file_id: Uuid, - /// Reference to the account that authored this comment. - pub account_id: Uuid, - /// Parent comment for threaded replies (NULL for top-level comments). - pub parent_comment_id: Option, - /// Account being replied to (@mention). - pub reply_to_account_id: Option, - /// Comment text content. - pub content: String, - /// Additional comment metadata. - pub metadata: serde_json::Value, - /// Timestamp when the comment was created. - pub created_at: Timestamp, - /// Timestamp when the comment was last updated. - pub updated_at: Timestamp, - /// Timestamp when the comment was soft-deleted. - pub deleted_at: Option, -} - -/// Data for creating a new document comment. -#[derive(Debug, Default, Clone, Insertable)] -#[diesel(table_name = document_comments)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentComment { - /// File ID. - pub file_id: Uuid, - /// Account ID. - pub account_id: Uuid, - /// Parent comment ID for replies. - pub parent_comment_id: Option, - /// Reply to account ID (@mention). - pub reply_to_account_id: Option, - /// Comment content. - pub content: String, - /// Metadata. - pub metadata: Option, -} - -/// Data for updating a document comment. -#[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_comments)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentComment { - /// Comment content. - pub content: Option, - /// Metadata. - pub metadata: Option, -} - -impl DocumentComment { - /// Returns the comment content, or `None` if the comment is deleted. - pub fn get_content(&self) -> Option { - if self.is_deleted() { - None - } else { - Some(self.content.clone()) - } - } - - /// Returns whether this is a top-level comment (not a reply). - pub fn is_top_level(&self) -> bool { - self.parent_comment_id.is_none() - } - - /// Returns whether this is a reply to another comment. - pub fn is_reply(&self) -> bool { - self.parent_comment_id.is_some() - } - - /// Returns whether this comment mentions another account. - pub fn has_mention(&self) -> bool { - self.reply_to_account_id.is_some() - } - - /// Returns whether this comment is deleted. - pub fn is_deleted(&self) -> bool { - self.deleted_at.is_some() - } - - /// Returns whether this comment has been edited. - pub fn is_edited(&self) -> bool { - let duration = - jiff::Timestamp::from(self.updated_at) - jiff::Timestamp::from(self.created_at); - duration.get_seconds() > comment::EDIT_GRACE_PERIOD_SECONDS - } -} - -impl NewDocumentComment { - /// Creates a new comment on a file. - pub fn for_file(file_id: Uuid, account_id: Uuid, content: String) -> Self { - Self { - file_id, - account_id, - content, - ..Default::default() - } - } - - /// Sets the parent comment ID for threaded replies. - pub fn with_parent(mut self, parent_comment_id: Uuid) -> Self { - self.parent_comment_id = Some(parent_comment_id); - self - } - - /// Sets the reply-to account ID for @mentions. - pub fn with_reply_to(mut self, reply_to_account_id: Uuid) -> Self { - self.reply_to_account_id = Some(reply_to_account_id); - self - } - - /// Sets custom metadata for the comment. - pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self { - self.metadata = Some(metadata); - self - } -} - -impl HasCreatedAt for DocumentComment { - fn created_at(&self) -> jiff::Timestamp { - self.created_at.into() - } -} - -impl HasUpdatedAt for DocumentComment { - fn updated_at(&self) -> jiff::Timestamp { - self.updated_at.into() - } -} - -impl HasDeletedAt for DocumentComment { - fn deleted_at(&self) -> Option { - self.deleted_at.map(Into::into) - } -} diff --git a/crates/nvisy-postgres/src/model/document_file.rs b/crates/nvisy-postgres/src/model/file.rs similarity index 53% rename from crates/nvisy-postgres/src/model/document_file.rs rename to crates/nvisy-postgres/src/model/file.rs index c4fa53e..dc22d7f 100644 --- a/crates/nvisy-postgres/src/model/document_file.rs +++ b/crates/nvisy-postgres/src/model/file.rs @@ -1,50 +1,39 @@ -//! Document file model for PostgreSQL database operations. +//! File model for PostgreSQL database operations. use diesel::prelude::*; use jiff_diesel::Timestamp; use uuid::Uuid; -use crate::schema::document_files; -use crate::types::constants::file; -use crate::types::{ - ContentSegmentation, HasCreatedAt, HasDeletedAt, HasUpdatedAt, ProcessingStatus, RequireMode, -}; +use crate::schema::files; +use crate::types::{FileSource, HasCreatedAt, HasDeletedAt, HasUpdatedAt, RECENTLY_UPLOADED_HOURS}; -/// Document file model representing a file attached to a document. +/// File model representing a file stored in the system. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = document_files)] +#[diesel(table_name = files)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentFile { +pub struct File { /// Unique file identifier. pub id: Uuid, - /// Reference to the workspace this file belongs to (required). + /// Reference to the workspace this file belongs to. pub workspace_id: Uuid, - /// Reference to the document this file belongs to (optional). - pub document_id: Option, /// Reference to the account that owns this file. pub account_id: Uuid, - /// Parent file reference for hierarchical relationships. + /// Parent file reference for version chains. pub parent_id: Option, + /// Version number (1 for original, increments for new versions). + pub version_number: i32, /// Human-readable file name for display. pub display_name: String, /// Original filename when uploaded. pub original_filename: String, /// File extension (without the dot). pub file_extension: String, + /// MIME type of the file. + pub mime_type: Option, /// Classification tags. pub tags: Vec>, - /// Processing mode requirements. - pub require_mode: RequireMode, - /// Processing priority (higher numbers = higher priority). - pub processing_priority: i32, - /// Current processing status. - pub processing_status: ProcessingStatus, - /// Whether file content has been indexed for search. - pub is_indexed: bool, - /// Content segmentation strategy. - pub content_segmentation: ContentSegmentation, - /// Whether to enable visual content processing. - pub visual_support: bool, + /// How the file was created (uploaded, imported, generated). + pub source: FileSource, /// File size in bytes. pub file_size_bytes: i64, /// SHA-256 hash of the file. @@ -63,18 +52,16 @@ pub struct DocumentFile { pub deleted_at: Option, } -/// Data for creating a new document file. +/// Data for creating a new file. #[derive(Debug, Default, Clone, Insertable)] -#[diesel(table_name = document_files)] +#[diesel(table_name = files)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentFile { +pub struct NewFile { /// Workspace ID (required). pub workspace_id: Uuid, - /// Document ID (optional). - pub document_id: Option, /// Account ID. pub account_id: Uuid, - /// Parent file ID. + /// Parent file ID (for version chains). pub parent_id: Option, /// Display name. pub display_name: Option, @@ -82,74 +69,49 @@ pub struct NewDocumentFile { pub original_filename: Option, /// File extension. pub file_extension: Option, - /// Tags + /// MIME type. + pub mime_type: Option, + /// Tags. pub tags: Option>>, - /// Require mode - pub require_mode: Option, - /// Processing priority - pub processing_priority: Option, - /// Processing status - pub processing_status: Option, - /// Is indexed flag. - pub is_indexed: Option, - /// Content segmentation - pub content_segmentation: Option, - /// Visual support - pub visual_support: Option, - /// File size in bytes + /// How the file was created. + pub source: Option, + /// File size in bytes. pub file_size_bytes: i64, - /// SHA-256 hash + /// SHA-256 hash. pub file_hash_sha256: Vec, - /// Storage path + /// Storage path. pub storage_path: String, - /// Storage bucket + /// Storage bucket. pub storage_bucket: String, - /// Metadata + /// Metadata. pub metadata: Option, } -/// Data for updating a document file. +/// Data for updating a file. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_files)] +#[diesel(table_name = files)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentFile { - // Note: workspace_id is required and should not be updated after creation - /// Document ID - pub document_id: Option>, - /// Storage path (for moving files between buckets) - pub storage_path: Option, - /// Display name +pub struct UpdateFile { + /// Display name. pub display_name: Option, - /// Parent file ID + /// Parent file ID. pub parent_id: Option>, - /// Tags + /// Tags. pub tags: Option>>, - /// Require mode - pub require_mode: Option, - /// Processing priority - pub processing_priority: Option, - /// Processing status - pub processing_status: Option, - /// Is indexed flag - pub is_indexed: Option, - /// Content segmentation - pub content_segmentation: Option, - /// Visual support - pub visual_support: Option, - /// File size in bytes - pub file_size_bytes: Option, - /// SHA-256 hash - pub file_hash_sha256: Option>, - /// Metadata + /// How the file was created. + pub source: Option, + /// MIME type. + pub mime_type: Option>, + /// Metadata. pub metadata: Option, - /// Soft delete timestamp + /// Soft delete timestamp. pub deleted_at: Option>, } -impl DocumentFile { +impl File { /// Returns whether the file was uploaded recently. pub fn is_recently_uploaded(&self) -> bool { - self.was_created_within(jiff::Span::new().hours(file::RECENTLY_UPLOADED_HOURS)) + self.was_created_within(jiff::Span::new().hours(RECENTLY_UPLOADED_HOURS)) } /// Returns whether the file is deleted. @@ -157,26 +119,6 @@ impl DocumentFile { self.deleted_at.is_some() } - /// Returns whether the file is ready for use. - pub fn is_ready(&self) -> bool { - self.processing_status.is_ready() - } - - /// Returns whether the file is currently being processed. - pub fn is_processing(&self) -> bool { - self.processing_status.is_processing() - } - - /// Returns whether the file has completed processing. - pub fn is_processed(&self) -> bool { - self.processing_status.is_final() - } - - /// Returns whether the file processing was canceled. - pub fn is_canceled(&self) -> bool { - self.processing_status.is_canceled() - } - /// Returns the file size in a human-readable format. pub fn file_size_human(&self) -> String { let bytes = self.file_size_bytes as f64; @@ -240,31 +182,30 @@ impl DocumentFile { .collect() } - /// Returns the processing priority level description. - pub fn priority_description(&self) -> &'static str { - match self.processing_priority { - p if p >= 9 => "Critical", - p if p >= 7 => "High", - p if p >= 5 => "Medium", - p if p >= 3 => "Low", - _ => "Minimal", - } + /// Returns whether this is the original version (version 1). + pub fn is_original_version(&self) -> bool { + self.version_number == 1 + } + + /// Returns whether this file is a newer version of another file. + pub fn is_version_of(&self, other: &File) -> bool { + self.parent_id == Some(other.id) && self.version_number > other.version_number } } -impl HasCreatedAt for DocumentFile { +impl HasCreatedAt for File { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } } -impl HasUpdatedAt for DocumentFile { +impl HasUpdatedAt for File { fn updated_at(&self) -> jiff::Timestamp { self.updated_at.into() } } -impl HasDeletedAt for DocumentFile { +impl HasDeletedAt for File { fn deleted_at(&self) -> Option { self.deleted_at.map(Into::into) } diff --git a/crates/nvisy-postgres/src/model/document_annotation.rs b/crates/nvisy-postgres/src/model/file_annotation.rs similarity index 76% rename from crates/nvisy-postgres/src/model/document_annotation.rs rename to crates/nvisy-postgres/src/model/file_annotation.rs index 3017bd0..ab7bae2 100644 --- a/crates/nvisy-postgres/src/model/document_annotation.rs +++ b/crates/nvisy-postgres/src/model/file_annotation.rs @@ -1,26 +1,26 @@ -//! Document annotation model for PostgreSQL database operations. +//! File annotation model for PostgreSQL database operations. use diesel::prelude::*; use jiff_diesel::Timestamp; use uuid::Uuid; -use crate::schema::document_annotations; +use crate::schema::file_annotations; use crate::types::{AnnotationType, HasCreatedAt, HasDeletedAt, HasUpdatedAt}; -/// Document annotation model representing user annotations on document content. +/// File annotation model representing user annotations on file content. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = document_annotations)] +#[diesel(table_name = file_annotations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentAnnotation { +pub struct FileAnnotation { /// Unique annotation identifier. pub id: Uuid, - /// Reference to the document file this annotation belongs to. - pub document_file_id: Uuid, + /// Reference to the file this annotation belongs to. + pub file_id: Uuid, /// Reference to the account that created this annotation. pub account_id: Uuid, /// Annotation text content. pub content: String, - /// Type of annotation (note, highlight). + /// Type of annotation (annotation, highlight). pub annotation_type: AnnotationType, /// Extended metadata including position/location. pub metadata: serde_json::Value, @@ -32,13 +32,13 @@ pub struct DocumentAnnotation { pub deleted_at: Option, } -/// Data for creating a new document annotation. +/// Data for creating a new file annotation. #[derive(Debug, Clone, Insertable)] -#[diesel(table_name = document_annotations)] +#[diesel(table_name = file_annotations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentAnnotation { - /// Document file ID. - pub document_file_id: Uuid, +pub struct NewFileAnnotation { + /// File ID. + pub file_id: Uuid, /// Account ID. pub account_id: Uuid, /// Annotation content. @@ -49,20 +49,22 @@ pub struct NewDocumentAnnotation { pub metadata: Option, } -/// Data for updating a document annotation. +/// Data for updating a file annotation. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_annotations)] +#[diesel(table_name = file_annotations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentAnnotation { +pub struct UpdateFileAnnotation { /// Annotation content. pub content: Option, /// Annotation type. pub annotation_type: Option, /// Metadata. pub metadata: Option, + /// Soft delete timestamp. + pub deleted_at: Option>, } -impl DocumentAnnotation { +impl FileAnnotation { /// Returns whether the annotation was created recently. pub fn is_recent(&self) -> bool { self.was_created_within(jiff::Span::new().hours(24)) @@ -104,19 +106,19 @@ impl DocumentAnnotation { } } -impl HasCreatedAt for DocumentAnnotation { +impl HasCreatedAt for FileAnnotation { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } } -impl HasUpdatedAt for DocumentAnnotation { +impl HasUpdatedAt for FileAnnotation { fn updated_at(&self) -> jiff::Timestamp { self.updated_at.into() } } -impl HasDeletedAt for DocumentAnnotation { +impl HasDeletedAt for FileAnnotation { fn deleted_at(&self) -> Option { self.deleted_at.map(Into::into) } diff --git a/crates/nvisy-postgres/src/model/document_chunk.rs b/crates/nvisy-postgres/src/model/file_chunk.rs similarity index 74% rename from crates/nvisy-postgres/src/model/document_chunk.rs rename to crates/nvisy-postgres/src/model/file_chunk.rs index 86b53eb..c26ce84 100644 --- a/crates/nvisy-postgres/src/model/document_chunk.rs +++ b/crates/nvisy-postgres/src/model/file_chunk.rs @@ -1,22 +1,22 @@ -//! Document chunk model for PostgreSQL database operations. +//! File chunk model for PostgreSQL database operations. use diesel::prelude::*; use jiff_diesel::Timestamp; use pgvector::Vector; use uuid::Uuid; -use crate::schema::document_chunks; +use crate::schema::file_chunks; use crate::types::{HasCreatedAt, HasUpdatedAt}; -/// Document chunk model representing a text segment from a document file. +/// File chunk model representing a text segment from a file. /// /// Chunks are used for semantic search via vector embeddings. Each chunk -/// represents a portion of a document with its embedding vector for +/// represents a portion of a file with its embedding vector for /// similarity search. #[derive(Debug, Clone, Queryable, Selectable)] -#[diesel(table_name = document_chunks)] +#[diesel(table_name = file_chunks)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentChunk { +pub struct FileChunk { /// Unique chunk identifier. pub id: Uuid, /// Reference to the file this chunk belongs to. @@ -41,11 +41,11 @@ pub struct DocumentChunk { pub updated_at: Timestamp, } -/// Data for creating a new document chunk. +/// Data for creating a new file chunk. #[derive(Debug, Clone, Insertable)] -#[diesel(table_name = document_chunks)] +#[diesel(table_name = file_chunks)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentChunk { +pub struct NewFileChunk { /// File ID (required). pub file_id: Uuid, /// Chunk index within the file. @@ -56,19 +56,19 @@ pub struct NewDocumentChunk { pub content_size: Option, /// Token count. pub token_count: Option, - /// Vector embedding. + /// Vector embedding (required). pub embedding: Vector, - /// Embedding model name. - pub embedding_model: Option, + /// Embedding model name (required). + pub embedding_model: String, /// Metadata. pub metadata: Option, } -/// Data for updating a document chunk. +/// Data for updating a file chunk. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_chunks)] +#[diesel(table_name = file_chunks)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentChunk { +pub struct UpdateFileChunk { /// Token count. pub token_count: Option, /// Vector embedding. @@ -79,7 +79,7 @@ pub struct UpdateDocumentChunk { pub metadata: Option, } -impl DocumentChunk { +impl FileChunk { /// Returns whether the chunk has custom metadata. pub fn has_metadata(&self) -> bool { !self.metadata.as_object().is_none_or(|obj| obj.is_empty()) @@ -91,32 +91,32 @@ impl DocumentChunk { } } -impl HasCreatedAt for DocumentChunk { +impl HasCreatedAt for FileChunk { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } } -impl HasUpdatedAt for DocumentChunk { +impl HasUpdatedAt for FileChunk { fn updated_at(&self) -> jiff::Timestamp { self.updated_at.into() } } -/// A document chunk with its similarity score. +/// A file chunk with its similarity score. /// /// Returned from similarity search queries. #[derive(Debug, Clone)] -pub struct ScoredDocumentChunk { - /// The document chunk. - pub chunk: DocumentChunk, +pub struct ScoredFileChunk { + /// The file chunk. + pub chunk: FileChunk, /// Similarity score (0.0 to 1.0, higher is more similar). pub score: f64, } -impl ScoredDocumentChunk { +impl ScoredFileChunk { /// Returns a reference to the chunk. - pub fn chunk(&self) -> &DocumentChunk { + pub fn chunk(&self) -> &FileChunk { &self.chunk } @@ -126,7 +126,7 @@ impl ScoredDocumentChunk { } /// Consumes self and returns the inner chunk. - pub fn into_chunk(self) -> DocumentChunk { + pub fn into_chunk(self) -> FileChunk { self.chunk } } diff --git a/crates/nvisy-postgres/src/model/mod.rs b/crates/nvisy-postgres/src/model/mod.rs index 8e4d87d..8c381c7 100644 --- a/crates/nvisy-postgres/src/model/mod.rs +++ b/crates/nvisy-postgres/src/model/mod.rs @@ -3,23 +3,23 @@ //! This module contains Diesel model definitions for all database tables, //! including structs for querying, inserting, and updating records. -pub mod account; -pub mod account_action_token; -pub mod account_api_token; -pub mod account_notification; -pub mod document; -pub mod document_annotation; -pub mod document_chunk; -pub mod document_comment; -pub mod document_file; +mod account; +mod account_action_token; +mod account_api_token; +mod account_notification; +mod file; +mod file_annotation; +mod file_chunk; +mod pipeline; +mod pipeline_run; -pub mod workspace; -pub mod workspace_activity; -pub mod workspace_integration; -pub mod workspace_integration_run; -pub mod workspace_invite; -pub mod workspace_member; -pub mod workspace_webhook; +mod workspace; +mod workspace_activity; +mod workspace_integration; +mod workspace_integration_run; +mod workspace_invite; +mod workspace_member; +mod workspace_webhook; // Account models pub use account::{Account, NewAccount, UpdateAccount}; @@ -30,16 +30,13 @@ pub use account_api_token::{AccountApiToken, NewAccountApiToken, UpdateAccountAp pub use account_notification::{ AccountNotification, NewAccountNotification, UpdateAccountNotification, }; -// Document models -pub use document::{Document, NewDocument, UpdateDocument}; -pub use document_annotation::{ - DocumentAnnotation, NewDocumentAnnotation, UpdateDocumentAnnotation, -}; -pub use document_chunk::{ - DocumentChunk, NewDocumentChunk, ScoredDocumentChunk, UpdateDocumentChunk, -}; -pub use document_comment::{DocumentComment, NewDocumentComment, UpdateDocumentComment}; -pub use document_file::{DocumentFile, NewDocumentFile, UpdateDocumentFile}; +// File models +pub use file::{File, NewFile, UpdateFile}; +pub use file_annotation::{FileAnnotation, NewFileAnnotation, UpdateFileAnnotation}; +pub use file_chunk::{FileChunk, NewFileChunk, ScoredFileChunk, UpdateFileChunk}; +// Pipeline models +pub use pipeline::{NewPipeline, Pipeline, UpdatePipeline}; +pub use pipeline_run::{NewPipelineRun, PipelineRun, UpdatePipelineRun}; // Workspace models pub use workspace::{NewWorkspace, UpdateWorkspace, Workspace}; pub use workspace_activity::{NewWorkspaceActivity, WorkspaceActivity}; diff --git a/crates/nvisy-postgres/src/model/pipeline.rs b/crates/nvisy-postgres/src/model/pipeline.rs new file mode 100644 index 0000000..b239bb0 --- /dev/null +++ b/crates/nvisy-postgres/src/model/pipeline.rs @@ -0,0 +1,122 @@ +//! Pipeline model for PostgreSQL database operations. + +use diesel::prelude::*; +use jiff_diesel::Timestamp; +use uuid::Uuid; + +use crate::schema::pipelines; +use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt, PipelineStatus}; + +/// Pipeline model representing a workflow definition in the system. +#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] +#[diesel(table_name = pipelines)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct Pipeline { + /// Unique pipeline identifier. + pub id: Uuid, + /// Reference to the workspace this pipeline belongs to. + pub workspace_id: Uuid, + /// Reference to the account that created this pipeline. + pub account_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline lifecycle status. + pub status: PipelineStatus, + /// Pipeline definition (steps, input/output schemas, etc.). + pub definition: serde_json::Value, + /// Extended metadata. + pub metadata: serde_json::Value, + /// Timestamp when the pipeline was created. + pub created_at: Timestamp, + /// Timestamp when the pipeline was last updated. + pub updated_at: Timestamp, + /// Timestamp when the pipeline was soft-deleted. + pub deleted_at: Option, +} + +/// Data for creating a new pipeline. +#[derive(Debug, Default, Clone, Insertable)] +#[diesel(table_name = pipelines)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct NewPipeline { + /// Workspace ID (required). + pub workspace_id: Uuid, + /// Account ID (required). + pub account_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline status. + pub status: Option, + /// Pipeline definition. + pub definition: Option, + /// Metadata. + pub metadata: Option, +} + +/// Data for updating a pipeline. +#[derive(Debug, Clone, Default, AsChangeset)] +#[diesel(table_name = pipelines)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct UpdatePipeline { + /// Pipeline name. + pub name: Option, + /// Pipeline description. + pub description: Option>, + /// Pipeline status. + pub status: Option, + /// Pipeline definition. + pub definition: Option, + /// Metadata. + pub metadata: Option, + /// Soft delete timestamp. + pub deleted_at: Option>, +} + +impl Pipeline { + /// Returns whether the pipeline is deleted. + pub fn is_deleted(&self) -> bool { + self.deleted_at.is_some() + } + + /// Returns whether the pipeline is in draft status. + pub fn is_draft(&self) -> bool { + self.status.is_draft() + } + + /// Returns whether the pipeline is enabled. + pub fn is_enabled(&self) -> bool { + self.status.is_enabled() + } + + /// Returns whether the pipeline is disabled. + pub fn is_disabled(&self) -> bool { + self.status.is_disabled() + } + + /// Returns whether the pipeline has a description. + pub fn has_description(&self) -> bool { + self.description.as_ref().is_some_and(|d| !d.is_empty()) + } +} + +impl HasCreatedAt for Pipeline { + fn created_at(&self) -> jiff::Timestamp { + self.created_at.into() + } +} + +impl HasUpdatedAt for Pipeline { + fn updated_at(&self) -> jiff::Timestamp { + self.updated_at.into() + } +} + +impl HasDeletedAt for Pipeline { + fn deleted_at(&self) -> Option { + self.deleted_at.map(Into::into) + } +} diff --git a/crates/nvisy-postgres/src/model/pipeline_run.rs b/crates/nvisy-postgres/src/model/pipeline_run.rs new file mode 100644 index 0000000..f6509af --- /dev/null +++ b/crates/nvisy-postgres/src/model/pipeline_run.rs @@ -0,0 +1,177 @@ +//! Pipeline run model for PostgreSQL database operations. + +use diesel::prelude::*; +use jiff_diesel::Timestamp; +use uuid::Uuid; + +use crate::schema::pipeline_runs; +use crate::types::{HasCreatedAt, PipelineRunStatus, PipelineTriggerType}; + +/// Pipeline run model representing an execution instance of a pipeline. +#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] +#[diesel(table_name = pipeline_runs)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct PipelineRun { + /// Unique run identifier. + pub id: Uuid, + /// Reference to the pipeline definition. + pub pipeline_id: Uuid, + /// Reference to the workspace. + pub workspace_id: Uuid, + /// Account that triggered the run. + pub account_id: Uuid, + /// How the run was initiated. + pub trigger_type: PipelineTriggerType, + /// Current execution status. + pub status: PipelineRunStatus, + /// Runtime input configuration. + pub input_config: serde_json::Value, + /// Runtime output configuration. + pub output_config: serde_json::Value, + /// Pipeline definition snapshot at run time. + pub definition_snapshot: serde_json::Value, + /// Error details if run failed. + pub error: Option, + /// Run metrics (duration, resources, etc.). + pub metrics: serde_json::Value, + /// When execution started. + pub started_at: Option, + /// When execution completed. + pub completed_at: Option, + /// When run was created/queued. + pub created_at: Timestamp, +} + +/// Data for creating a new pipeline run. +#[derive(Debug, Default, Clone, Insertable)] +#[diesel(table_name = pipeline_runs)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct NewPipelineRun { + /// Pipeline ID (required). + pub pipeline_id: Uuid, + /// Workspace ID (required). + pub workspace_id: Uuid, + /// Account ID (required). + pub account_id: Uuid, + /// Trigger type. + pub trigger_type: Option, + /// Initial status. + pub status: Option, + /// Input configuration. + pub input_config: Option, + /// Output configuration. + pub output_config: Option, + /// Definition snapshot. + pub definition_snapshot: serde_json::Value, + /// Metrics. + pub metrics: Option, +} + +/// Data for updating a pipeline run. +#[derive(Debug, Clone, Default, AsChangeset)] +#[diesel(table_name = pipeline_runs)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct UpdatePipelineRun { + /// Execution status. + pub status: Option, + /// Output configuration. + pub output_config: Option, + /// Error details. + pub error: Option>, + /// Metrics. + pub metrics: Option, + /// When execution started. + pub started_at: Option>, + /// When execution completed. + pub completed_at: Option>, +} + +impl PipelineRun { + /// Returns whether the run is queued. + pub fn is_queued(&self) -> bool { + self.status.is_queued() + } + + /// Returns whether the run is currently running. + pub fn is_running(&self) -> bool { + self.status.is_running() + } + + /// Returns whether the run completed successfully. + pub fn is_completed(&self) -> bool { + self.status.is_completed() + } + + /// Returns whether the run failed. + pub fn is_failed(&self) -> bool { + self.status.is_failed() + } + + /// Returns whether the run was cancelled. + pub fn is_cancelled(&self) -> bool { + self.status.is_cancelled() + } + + /// Returns whether the run is still active (queued or running). + pub fn is_active(&self) -> bool { + self.status.is_active() + } + + /// Returns whether the run has finished (completed, failed, or cancelled). + pub fn is_finished(&self) -> bool { + self.status.is_finished() + } + + /// Returns whether the run has an error. + pub fn has_error(&self) -> bool { + self.error.is_some() + } + + /// Returns the error message if present. + pub fn error_message(&self) -> Option<&str> { + self.error + .as_ref() + .and_then(|e| e.get("message")) + .and_then(|m| m.as_str()) + } + + /// Returns the duration of the run in seconds, if available. + pub fn duration_seconds(&self) -> Option { + let started = self.started_at?; + let completed = self.completed_at?; + let started_ts: jiff::Timestamp = started.into(); + let completed_ts: jiff::Timestamp = completed.into(); + Some(completed_ts.duration_since(started_ts).as_secs_f64()) + } + + /// Returns whether the run was manually triggered. + pub fn is_manual(&self) -> bool { + self.trigger_type.is_manual() + } + + /// Returns whether the run was triggered automatically. + pub fn is_automatic(&self) -> bool { + self.trigger_type.is_automatic() + } + + /// Returns whether the run can be retried. + pub fn is_retriable(&self) -> bool { + self.status.is_retriable() + } + + /// Returns the steps from the definition snapshot. + pub fn steps(&self) -> Option<&Vec> { + self.definition_snapshot.get("steps")?.as_array() + } + + /// Returns the number of steps in the run. + pub fn step_count(&self) -> usize { + self.steps().map_or(0, |s| s.len()) + } +} + +impl HasCreatedAt for PipelineRun { + fn created_at(&self) -> jiff::Timestamp { + self.created_at.into() + } +} diff --git a/crates/nvisy-postgres/src/model/workspace_invite.rs b/crates/nvisy-postgres/src/model/workspace_invite.rs index 844b5bb..384f769 100644 --- a/crates/nvisy-postgres/src/model/workspace_invite.rs +++ b/crates/nvisy-postgres/src/model/workspace_invite.rs @@ -5,8 +5,7 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::workspace_invites; -use crate::types::constants::invite; -use crate::types::{HasCreatedAt, HasUpdatedAt, InviteStatus, WorkspaceRole}; +use crate::types::{HasCreatedAt, HasUpdatedAt, InviteStatus, RECENTLY_SENT_HOURS, WorkspaceRole}; /// Workspace invitation model representing an invitation to join a workspace. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] @@ -122,7 +121,7 @@ impl WorkspaceInvite { /// Returns whether the invitation was sent recently. pub fn is_recently_sent(&self) -> bool { - self.was_created_within(jiff::Span::new().hours(invite::RECENTLY_SENT_HOURS)) + self.was_created_within(jiff::Span::new().hours(RECENTLY_SENT_HOURS)) } /// Returns the time remaining until expiration. diff --git a/crates/nvisy-postgres/src/model/workspace_webhook.rs b/crates/nvisy-postgres/src/model/workspace_webhook.rs index 36ea3f1..7fce105 100644 --- a/crates/nvisy-postgres/src/model/workspace_webhook.rs +++ b/crates/nvisy-postgres/src/model/workspace_webhook.rs @@ -42,6 +42,8 @@ pub struct WorkspaceWebhook { pub events: Vec>, /// Custom headers to include in webhook requests. pub headers: serde_json::Value, + /// HMAC-SHA256 signing secret for webhook verification. + pub secret: String, /// Current status of the webhook. pub status: WebhookStatus, /// Timestamp of last webhook trigger. diff --git a/crates/nvisy-postgres/src/query/document.rs b/crates/nvisy-postgres/src/query/document.rs deleted file mode 100644 index 857f0ba..0000000 --- a/crates/nvisy-postgres/src/query/document.rs +++ /dev/null @@ -1,340 +0,0 @@ -//! Document repository for managing document operations. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use pgtrgm::expression_methods::TrgmExpressionMethods; -use uuid::Uuid; - -use crate::model::{Document, NewDocument, UpdateDocument}; -use crate::types::{CursorPage, CursorPagination, OffsetPagination}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for document database operations. -/// -/// Handles document lifecycle management including creation, updates, -/// and search functionality. -pub trait DocumentRepository { - /// Creates a new document with the provided metadata. - fn create_document( - &mut self, - new_document: NewDocument, - ) -> impl Future> + Send; - - /// Finds a document by its unique identifier. - fn find_document_by_id( - &mut self, - document_id: Uuid, - ) -> impl Future>> + Send; - - /// Lists documents associated with a specific workspace with offset pagination. - fn offset_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists documents associated with a specific workspace with cursor pagination. - fn cursor_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists documents created by a specific account with offset pagination. - fn offset_list_account_documents( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists documents created by a specific account with cursor pagination. - fn cursor_list_account_documents( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Updates a document with new information and metadata. - fn update_document( - &mut self, - document_id: Uuid, - updates: UpdateDocument, - ) -> impl Future> + Send; - - /// Soft deletes a document by setting the deletion timestamp. - fn delete_document(&mut self, document_id: Uuid) -> impl Future> + Send; - - /// Lists all documents with offset pagination. - fn offset_list_documents( - &mut self, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Searches documents by name or description with optional workspace filtering. - fn search_documents( - &mut self, - search_query: &str, - workspace_id: Option, - pagination: OffsetPagination, - ) -> impl Future>> + Send; -} - -impl DocumentRepository for PgConnection { - async fn create_document(&mut self, new_document: NewDocument) -> PgResult { - use schema::documents; - - let document = diesel::insert_into(documents::table) - .values(&new_document) - .returning(Document::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(document) - } - - async fn find_document_by_id(&mut self, document_id: Uuid) -> PgResult> { - use schema::documents::{self, dsl}; - - let document = documents::table - .filter(dsl::id.eq(document_id)) - .filter(dsl::deleted_at.is_null()) - .select(Document::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(document) - } - - async fn offset_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let documents = documents::table - .filter(dsl::workspace_id.eq(workspace_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::updated_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(documents) - } - - async fn cursor_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::documents::{self, dsl}; - - let base_filter = dsl::workspace_id - .eq(workspace_id) - .and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - documents::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - documents::table - .filter(base_filter) - .filter( - dsl::updated_at - .lt(cursor_ts) - .or(dsl::updated_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - documents::table - .filter(base_filter) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |d| { - (d.updated_at.into(), d.id) - })) - } - - async fn offset_list_account_documents( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let documents = documents::table - .filter(dsl::account_id.eq(account_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::updated_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(documents) - } - - async fn cursor_list_account_documents( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::documents::{self, dsl}; - - let base_filter = dsl::account_id - .eq(account_id) - .and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - documents::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - documents::table - .filter(base_filter) - .filter( - dsl::updated_at - .lt(cursor_ts) - .or(dsl::updated_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - documents::table - .filter(base_filter) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |d| { - (d.updated_at.into(), d.id) - })) - } - - async fn update_document( - &mut self, - document_id: Uuid, - updates: UpdateDocument, - ) -> PgResult { - use schema::documents::{self, dsl}; - - let document = diesel::update(documents::table.filter(dsl::id.eq(document_id))) - .set(&updates) - .returning(Document::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(document) - } - - async fn delete_document(&mut self, document_id: Uuid) -> PgResult<()> { - use diesel::dsl::now; - use schema::documents::{self, dsl}; - - diesel::update(documents::table.filter(dsl::id.eq(document_id))) - .set(dsl::deleted_at.eq(now)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } - - async fn offset_list_documents( - &mut self, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let documents = documents::table - .filter(dsl::deleted_at.is_null()) - .order(dsl::updated_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(documents) - } - - async fn search_documents( - &mut self, - search_query: &str, - workspace_id: Option, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let mut query = documents::table - .filter(dsl::deleted_at.is_null()) - .filter(dsl::display_name.trgm_similar_to(search_query)) - .order(dsl::display_name.asc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .into_boxed(); - - if let Some(ws_id) = workspace_id { - query = query.filter(dsl::workspace_id.eq(ws_id)); - } - - let documents = query.load(self).await.map_err(PgError::from)?; - Ok(documents) - } -} diff --git a/crates/nvisy-postgres/src/query/document_chunk.rs b/crates/nvisy-postgres/src/query/document_chunk.rs deleted file mode 100644 index b9197d0..0000000 --- a/crates/nvisy-postgres/src/query/document_chunk.rs +++ /dev/null @@ -1,380 +0,0 @@ -//! Document chunks repository for managing document text segments and embeddings. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use pgvector::Vector; -use uuid::Uuid; - -use crate::model::{DocumentChunk, NewDocumentChunk, ScoredDocumentChunk, UpdateDocumentChunk}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for document chunk database operations. -/// -/// Handles chunk lifecycle management including creation, embedding updates, -/// and semantic similarity search via pgvector. -pub trait DocumentChunkRepository { - /// Creates multiple document chunks in a single transaction. - fn create_document_chunks( - &mut self, - new_chunks: Vec, - ) -> impl Future>> + Send; - - /// Updates a chunk with new data. - fn update_document_chunk( - &mut self, - chunk_id: Uuid, - updates: UpdateDocumentChunk, - ) -> impl Future> + Send; - - /// Deletes all chunks for a file. - fn delete_document_file_chunks( - &mut self, - file_id: Uuid, - ) -> impl Future> + Send; - - /// Deletes all chunks for all files of a document. - fn delete_document_chunks( - &mut self, - document_id: Uuid, - ) -> impl Future> + Send; - - /// Lists all chunks for a specific file ordered by chunk index. - fn list_document_file_chunks( - &mut self, - file_id: Uuid, - ) -> impl Future>> + Send; - - /// Searches for similar chunks using cosine similarity. - /// - /// Returns chunks ordered by similarity (most similar first). - fn search_similar_document_chunks( - &mut self, - query_embedding: Vector, - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within specific files. - fn search_similar_document_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within all files of specific documents. - fn search_similar_document_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within specific files with score filtering. - /// - /// Returns chunks with similarity score >= min_score, ordered by similarity. - fn search_scored_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within all files of specific documents with score filtering. - /// - /// Returns chunks with similarity score >= min_score, ordered by similarity. - fn search_scored_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> impl Future>> + Send; - - /// Gets the total chunk count for a file. - fn count_document_file_chunks( - &mut self, - file_id: Uuid, - ) -> impl Future> + Send; -} - -impl DocumentChunkRepository for PgConnection { - async fn create_document_chunks( - &mut self, - new_chunks: Vec, - ) -> PgResult> { - use schema::document_chunks; - - if new_chunks.is_empty() { - return Ok(vec![]); - } - - let chunks = diesel::insert_into(document_chunks::table) - .values(&new_chunks) - .returning(DocumentChunk::as_returning()) - .get_results(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn update_document_chunk( - &mut self, - chunk_id: Uuid, - updates: UpdateDocumentChunk, - ) -> PgResult { - use schema::document_chunks::{self, dsl}; - - let chunk = diesel::update(document_chunks::table.filter(dsl::id.eq(chunk_id))) - .set(&updates) - .returning(DocumentChunk::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(chunk) - } - - async fn delete_document_file_chunks(&mut self, file_id: Uuid) -> PgResult { - use schema::document_chunks::{self, dsl}; - - let affected = diesel::delete(document_chunks::table.filter(dsl::file_id.eq(file_id))) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(affected) - } - - async fn delete_document_chunks(&mut self, document_id: Uuid) -> PgResult { - use schema::document_chunks::{self, dsl}; - use schema::document_files; - - // Get all file IDs for this document - let file_ids: Vec = document_files::table - .filter(document_files::document_id.eq(document_id)) - .select(document_files::id) - .load(self) - .await - .map_err(PgError::from)?; - - if file_ids.is_empty() { - return Ok(0); - } - - // Delete all chunks for those files - let affected = diesel::delete(document_chunks::table.filter(dsl::file_id.eq_any(file_ids))) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(affected) - } - - async fn list_document_file_chunks(&mut self, file_id: Uuid) -> PgResult> { - use schema::document_chunks::{self, dsl}; - - let chunks = document_chunks::table - .filter(dsl::file_id.eq(file_id)) - .order(dsl::chunk_index.asc()) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_similar_document_chunks( - &mut self, - query_embedding: Vector, - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - - let chunks = document_chunks::table - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_similar_document_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - let chunks = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_similar_document_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - use schema::document_files; - - if document_ids.is_empty() { - return Ok(vec![]); - } - - // Get all file IDs for the given documents - let file_ids: Vec = document_files::table - .filter(document_files::document_id.eq_any(document_ids)) - .select(document_files::id) - .load(self) - .await - .map_err(PgError::from)?; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - let chunks = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_scored_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - // Cosine distance ranges from 0 (identical) to 2 (opposite) - // Score = 1 - distance, so min_score threshold means max_distance = 1 - min_score - let max_distance = 1.0 - min_score; - - let chunks: Vec<(DocumentChunk, f64)> = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .filter( - dsl::embedding - .cosine_distance(&query_embedding) - .le(max_distance), - ) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(( - DocumentChunk::as_select(), - (1.0.into_sql::() - - dsl::embedding.cosine_distance(&query_embedding)), - )) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks - .into_iter() - .map(|(chunk, score)| ScoredDocumentChunk { chunk, score }) - .collect()) - } - - async fn search_scored_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - use schema::document_files; - - if document_ids.is_empty() { - return Ok(vec![]); - } - - // Get all file IDs for the given documents - let file_ids: Vec = document_files::table - .filter(document_files::document_id.eq_any(document_ids)) - .select(document_files::id) - .load(self) - .await - .map_err(PgError::from)?; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - let max_distance = 1.0 - min_score; - - let chunks: Vec<(DocumentChunk, f64)> = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .filter( - dsl::embedding - .cosine_distance(&query_embedding) - .le(max_distance), - ) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(( - DocumentChunk::as_select(), - (1.0.into_sql::() - - dsl::embedding.cosine_distance(&query_embedding)), - )) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks - .into_iter() - .map(|(chunk, score)| ScoredDocumentChunk { chunk, score }) - .collect()) - } - - async fn count_document_file_chunks(&mut self, file_id: Uuid) -> PgResult { - use schema::document_chunks::{self, dsl}; - - let count: i64 = document_chunks::table - .filter(dsl::file_id.eq(file_id)) - .count() - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(count) - } -} diff --git a/crates/nvisy-postgres/src/query/document_comment.rs b/crates/nvisy-postgres/src/query/document_comment.rs deleted file mode 100644 index 880ba18..0000000 --- a/crates/nvisy-postgres/src/query/document_comment.rs +++ /dev/null @@ -1,316 +0,0 @@ -//! Document comments repository for managing collaborative commenting operations. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use uuid::Uuid; - -use crate::model::{DocumentComment, NewDocumentComment, UpdateDocumentComment}; -use crate::types::{CursorPage, CursorPagination, OffsetPagination}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for document comment database operations. -/// -/// Handles comment lifecycle management including creation, threading, replies, -/// and mention tracking. -pub trait DocumentCommentRepository { - /// Creates a new document comment. - fn create_document_comment( - &mut self, - new_comment: NewDocumentComment, - ) -> impl Future> + Send; - - /// Finds a document comment by its unique identifier. - fn find_document_comment_by_id( - &mut self, - comment_id: Uuid, - ) -> impl Future>> + Send; - - /// Lists document comments for a file with offset pagination. - fn offset_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists document comments for a file with cursor pagination. - fn cursor_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists document comments created by an account with offset pagination. - fn offset_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists document comments created by an account with cursor pagination. - fn cursor_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists document comments mentioning an account with offset pagination. - fn offset_list_document_comments_mentioning_account( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Updates a document comment. - fn update_document_comment( - &mut self, - comment_id: Uuid, - updates: UpdateDocumentComment, - ) -> impl Future> + Send; - - /// Soft deletes a document comment. - fn delete_document_comment( - &mut self, - comment_id: Uuid, - ) -> impl Future> + Send; -} - -impl DocumentCommentRepository for PgConnection { - async fn create_document_comment( - &mut self, - new_comment: NewDocumentComment, - ) -> PgResult { - use schema::document_comments; - - let comment = diesel::insert_into(document_comments::table) - .values(&new_comment) - .returning(DocumentComment::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(comment) - } - - async fn find_document_comment_by_id( - &mut self, - comment_id: Uuid, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comment = document_comments::table - .filter(dsl::id.eq(comment_id)) - .filter(dsl::deleted_at.is_null()) - .select(DocumentComment::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(comment) - } - - async fn offset_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comments = document_comments::table - .filter(dsl::file_id.eq(file_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(comments) - } - - async fn cursor_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::document_comments::{self, dsl}; - - let base_filter = dsl::file_id.eq(file_id).and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - document_comments::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_comments::table - .filter(base_filter) - .filter( - dsl::created_at - .lt(cursor_ts) - .or(dsl::created_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - document_comments::table - .filter(base_filter) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |c| { - (c.created_at.into(), c.id) - })) - } - - async fn offset_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comments = document_comments::table - .filter(dsl::account_id.eq(account_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(comments) - } - - async fn cursor_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::document_comments::{self, dsl}; - - let base_filter = dsl::account_id - .eq(account_id) - .and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - document_comments::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_comments::table - .filter(base_filter) - .filter( - dsl::created_at - .lt(cursor_ts) - .or(dsl::created_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - document_comments::table - .filter(base_filter) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |c| { - (c.created_at.into(), c.id) - })) - } - - async fn offset_list_document_comments_mentioning_account( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comments = document_comments::table - .filter(dsl::reply_to_account_id.eq(account_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(comments) - } - - async fn update_document_comment( - &mut self, - comment_id: Uuid, - updates: UpdateDocumentComment, - ) -> PgResult { - use schema::document_comments::{self, dsl}; - - let comment = diesel::update(document_comments::table.filter(dsl::id.eq(comment_id))) - .set(&updates) - .returning(DocumentComment::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(comment) - } - - async fn delete_document_comment(&mut self, comment_id: Uuid) -> PgResult<()> { - use diesel::dsl::now; - use schema::document_comments::{self, dsl}; - - diesel::update(document_comments::table.filter(dsl::id.eq(comment_id))) - .set(dsl::deleted_at.eq(now)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } -} diff --git a/crates/nvisy-postgres/src/query/document_file.rs b/crates/nvisy-postgres/src/query/file.rs similarity index 62% rename from crates/nvisy-postgres/src/query/document_file.rs rename to crates/nvisy-postgres/src/query/file.rs index 985afdd..7758958 100644 --- a/crates/nvisy-postgres/src/query/document_file.rs +++ b/crates/nvisy-postgres/src/query/file.rs @@ -1,4 +1,4 @@ -//! Document files repository for managing uploaded document files. +//! Files repository for managing uploaded files. use std::future::Future; @@ -8,29 +8,26 @@ use diesel_async::RunQueryDsl; use pgtrgm::expression_methods::TrgmExpressionMethods; use uuid::Uuid; -use crate::model::{DocumentFile, NewDocumentFile, UpdateDocumentFile}; +use crate::model::{File, NewFile, UpdateFile}; use crate::types::{ CursorPage, CursorPagination, FileFilter, FileSortBy, FileSortField, OffsetPagination, - ProcessingStatus, SortOrder, + SortOrder, }; use crate::{PgConnection, PgError, PgResult, schema}; -/// Repository for document file database operations. +/// Repository for file database operations. /// -/// Handles file lifecycle management including upload tracking, processing -/// status updates, virus scanning, storage management, and cleanup operations. -pub trait DocumentFileRepository { - /// Creates a new document file record. - fn create_document_file( - &mut self, - new_file: NewDocumentFile, - ) -> impl Future> + Send; +/// Handles file lifecycle management including upload tracking, +/// storage management, and cleanup operations. +pub trait FileRepository { + /// Creates a new file record. + fn create_file(&mut self, new_file: NewFile) -> impl Future> + Send; /// Finds a file by its unique identifier. - fn find_document_file_by_id( + fn find_file_by_id( &mut self, file_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Finds a file by ID within a specific workspace. /// @@ -39,36 +36,29 @@ pub trait DocumentFileRepository { &mut self, workspace_id: Uuid, file_id: Uuid, - ) -> impl Future>> + Send; - - /// Lists all files associated with a document with offset pagination. - fn offset_list_document_files( - &mut self, - document_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists all files uploaded by a specific account with offset pagination. fn offset_list_account_files( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Updates a file with new metadata or settings. - fn update_document_file( + fn update_file( &mut self, file_id: Uuid, - updates: UpdateDocumentFile, - ) -> impl Future> + Send; + updates: UpdateFile, + ) -> impl Future> + Send; /// Soft deletes a file by setting the deletion timestamp. - fn delete_document_file(&mut self, file_id: Uuid) -> impl Future> + Send; + fn delete_file(&mut self, file_id: Uuid) -> impl Future> + Send; /// Soft deletes multiple files in a workspace by setting deletion timestamps. /// /// Returns the number of files deleted. - fn delete_document_files( + fn delete_files( &mut self, workspace_id: Uuid, file_ids: &[Uuid], @@ -83,7 +73,7 @@ pub trait DocumentFileRepository { pagination: OffsetPagination, sort_by: FileSortBy, filter: FileFilter, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists all files in a workspace with cursor pagination and optional filtering. fn cursor_list_workspace_files( @@ -91,20 +81,13 @@ pub trait DocumentFileRepository { workspace_id: Uuid, pagination: CursorPagination, filter: FileFilter, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Finds files with a matching SHA-256 hash. fn find_files_by_hash( &mut self, file_hash: &[u8], - ) -> impl Future>> + Send; - - /// Finds files with a specific processing status. - fn find_files_by_status( - &mut self, - status: ProcessingStatus, - pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Calculates total storage usage for an account. fn get_account_storage_usage( @@ -113,19 +96,42 @@ pub trait DocumentFileRepository { ) -> impl Future> + Send; /// Finds multiple files by their IDs. - fn find_document_files_by_ids( + fn find_files_by_ids( &mut self, file_ids: &[Uuid], - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; + + /// Lists all versions of a file (the file itself and all files that have it as parent). + /// + /// Returns files ordered by version_number descending (newest first). + fn list_file_versions( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Finds the latest version of a file by traversing the version chain. + /// + /// Starting from a file, follows the chain of files where parent_id points + /// to the previous version and returns the one with the highest version_number. + fn find_latest_version( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Gets the next version number for creating a new version of a file. + fn get_next_version_number( + &mut self, + file_id: Uuid, + ) -> impl Future> + Send; } -impl DocumentFileRepository for PgConnection { - async fn create_document_file(&mut self, new_file: NewDocumentFile) -> PgResult { - use schema::document_files; +impl FileRepository for PgConnection { + async fn create_file(&mut self, new_file: NewFile) -> PgResult { + use schema::files; - let file = diesel::insert_into(document_files::table) + let file = diesel::insert_into(files::table) .values(&new_file) - .returning(DocumentFile::as_returning()) + .returning(File::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -133,13 +139,13 @@ impl DocumentFileRepository for PgConnection { Ok(file) } - async fn find_document_file_by_id(&mut self, file_id: Uuid) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn find_file_by_id(&mut self, file_id: Uuid) -> PgResult> { + use schema::files::{self, dsl}; - let file = document_files::table + let file = files::table .filter(dsl::id.eq(file_id)) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .first(self) .await .optional() @@ -152,14 +158,14 @@ impl DocumentFileRepository for PgConnection { &mut self, workspace_id: Uuid, file_id: Uuid, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; - let file = document_files::table + let file = files::table .filter(dsl::id.eq(file_id)) .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .first(self) .await .optional() @@ -168,41 +174,20 @@ impl DocumentFileRepository for PgConnection { Ok(file) } - async fn offset_list_document_files( - &mut self, - document_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_files::{self, dsl}; - - let files = document_files::table - .filter(dsl::document_id.eq(document_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentFile::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(files) - } - async fn offset_list_account_files( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; - let files = document_files::table + let files = files::table .filter(dsl::account_id.eq(account_id)) .filter(dsl::deleted_at.is_null()) .order(dsl::created_at.desc()) .limit(pagination.limit) .offset(pagination.offset) - .select(DocumentFile::as_select()) + .select(File::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -210,16 +195,12 @@ impl DocumentFileRepository for PgConnection { Ok(files) } - async fn update_document_file( - &mut self, - file_id: Uuid, - updates: UpdateDocumentFile, - ) -> PgResult { - use schema::document_files::{self, dsl}; + async fn update_file(&mut self, file_id: Uuid, updates: UpdateFile) -> PgResult { + use schema::files::{self, dsl}; - let file = diesel::update(document_files::table.filter(dsl::id.eq(file_id))) + let file = diesel::update(files::table.filter(dsl::id.eq(file_id))) .set(&updates) - .returning(DocumentFile::as_returning()) + .returning(File::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -227,11 +208,11 @@ impl DocumentFileRepository for PgConnection { Ok(file) } - async fn delete_document_file(&mut self, file_id: Uuid) -> PgResult<()> { + async fn delete_file(&mut self, file_id: Uuid) -> PgResult<()> { use diesel::dsl::now; - use schema::document_files::{self, dsl}; + use schema::files::{self, dsl}; - diesel::update(document_files::table.filter(dsl::id.eq(file_id))) + diesel::update(files::table.filter(dsl::id.eq(file_id))) .set(dsl::deleted_at.eq(now)) .execute(self) .await @@ -240,16 +221,12 @@ impl DocumentFileRepository for PgConnection { Ok(()) } - async fn delete_document_files( - &mut self, - workspace_id: Uuid, - file_ids: &[Uuid], - ) -> PgResult { + async fn delete_files(&mut self, workspace_id: Uuid, file_ids: &[Uuid]) -> PgResult { use diesel::dsl::now; - use schema::document_files::{self, dsl}; + use schema::files::{self, dsl}; let count = diesel::update( - document_files::table + files::table .filter(dsl::id.eq_any(file_ids)) .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()), @@ -268,11 +245,11 @@ impl DocumentFileRepository for PgConnection { pagination: OffsetPagination, sort_by: FileSortBy, filter: FileFilter, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; // Build base query - let mut query = document_files::table + let mut query = files::table .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) .into_boxed(); @@ -295,7 +272,7 @@ impl DocumentFileRepository for PgConnection { }; let files = query - .select(DocumentFile::as_select()) + .select(File::as_select()) .limit(pagination.limit) .offset(pagination.offset) .load(self) @@ -310,15 +287,15 @@ impl DocumentFileRepository for PgConnection { workspace_id: Uuid, pagination: CursorPagination, filter: FileFilter, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; // Precompute filter values let search_term = filter.search_term().map(|s| s.to_string()); let extensions: Vec = filter.extensions().iter().map(|s| s.to_string()).collect(); // Build base query with filters - let mut base_query = document_files::table + let mut base_query = files::table .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) .into_boxed(); @@ -346,7 +323,7 @@ impl DocumentFileRepository for PgConnection { }; // Rebuild query for fetching items (can't reuse boxed query after count) - let mut query = document_files::table + let mut query = files::table .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) .into_boxed(); @@ -364,7 +341,7 @@ impl DocumentFileRepository for PgConnection { let limit = pagination.limit + 1; // Apply cursor filter if present - let items: Vec = if let Some(cursor) = &pagination.after { + let items: Vec = if let Some(cursor) = &pagination.after { let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); query @@ -373,7 +350,7 @@ impl DocumentFileRepository for PgConnection { .lt(&cursor_time) .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), ) - .select(DocumentFile::as_select()) + .select(File::as_select()) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(limit) .load(self) @@ -381,7 +358,7 @@ impl DocumentFileRepository for PgConnection { .map_err(PgError::from)? } else { query - .select(DocumentFile::as_select()) + .select(File::as_select()) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(limit) .load(self) @@ -393,17 +370,17 @@ impl DocumentFileRepository for PgConnection { items, total, pagination.limit, - |f: &DocumentFile| (f.created_at.into(), f.id), + |f: &File| (f.created_at.into(), f.id), )) } - async fn find_files_by_hash(&mut self, file_hash: &[u8]) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn find_files_by_hash(&mut self, file_hash: &[u8]) -> PgResult> { + use schema::files::{self, dsl}; - let files = document_files::table + let files = files::table .filter(dsl::file_hash_sha256.eq(file_hash)) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -411,20 +388,27 @@ impl DocumentFileRepository for PgConnection { Ok(files) } - async fn find_files_by_status( - &mut self, - status: ProcessingStatus, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn get_account_storage_usage(&mut self, account_id: Uuid) -> PgResult { + use schema::files::{self, dsl}; - let files = document_files::table - .filter(dsl::processing_status.eq(status)) + let usage: Option = files::table + .filter(dsl::account_id.eq(account_id)) .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentFile::as_select()) + .select(diesel::dsl::sum(dsl::file_size_bytes)) + .first(self) + .await + .map_err(PgError::from)?; + + Ok(usage.unwrap_or_else(|| BigDecimal::from(0))) + } + + async fn find_files_by_ids(&mut self, file_ids: &[Uuid]) -> PgResult> { + use schema::files::{self, dsl}; + + let files = files::table + .filter(dsl::id.eq_any(file_ids)) + .filter(dsl::deleted_at.is_null()) + .select(File::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -432,34 +416,54 @@ impl DocumentFileRepository for PgConnection { Ok(files) } - async fn get_account_storage_usage(&mut self, account_id: Uuid) -> PgResult { - use schema::document_files::{self, dsl}; + async fn list_file_versions(&mut self, file_id: Uuid) -> PgResult> { + use schema::files::{self, dsl}; - let usage: Option = document_files::table - .filter(dsl::account_id.eq(account_id)) + // Get the original file and all files that have it (or its descendants) as parent + // This query gets the file itself plus all files where parent_id = file_id + let files = files::table + .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) .filter(dsl::deleted_at.is_null()) - .select(diesel::dsl::sum(dsl::file_size_bytes)) + .order(dsl::version_number.desc()) + .select(File::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(files) + } + + async fn find_latest_version(&mut self, file_id: Uuid) -> PgResult> { + use schema::files::{self, dsl}; + + // Find the file with highest version_number that has file_id as parent, + // or the file itself if no newer versions exist + let latest = files::table + .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) + .filter(dsl::deleted_at.is_null()) + .order(dsl::version_number.desc()) + .select(File::as_select()) .first(self) .await + .optional() .map_err(PgError::from)?; - Ok(usage.unwrap_or_else(|| BigDecimal::from(0))) + Ok(latest) } - async fn find_document_files_by_ids( - &mut self, - file_ids: &[Uuid], - ) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn get_next_version_number(&mut self, file_id: Uuid) -> PgResult { + use diesel::dsl::max; + use schema::files::{self, dsl}; - let files = document_files::table - .filter(dsl::id.eq_any(file_ids)) + // Get the max version_number from the file and its versions + let max_version: Option = files::table + .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) - .load(self) + .select(max(dsl::version_number)) + .first(self) .await .map_err(PgError::from)?; - Ok(files) + Ok(max_version.unwrap_or(0) + 1) } } diff --git a/crates/nvisy-postgres/src/query/document_annotation.rs b/crates/nvisy-postgres/src/query/file_annotation.rs similarity index 54% rename from crates/nvisy-postgres/src/query/document_annotation.rs rename to crates/nvisy-postgres/src/query/file_annotation.rs index 052c4a7..a5fca6c 100644 --- a/crates/nvisy-postgres/src/query/document_annotation.rs +++ b/crates/nvisy-postgres/src/query/file_annotation.rs @@ -1,4 +1,4 @@ -//! Document annotations repository for managing user annotations on documents. +//! File annotations repository for managing user annotations on files. use std::future::Future; @@ -6,79 +6,79 @@ use diesel::prelude::*; use diesel_async::RunQueryDsl; use uuid::Uuid; -use crate::model::{DocumentAnnotation, NewDocumentAnnotation, UpdateDocumentAnnotation}; +use crate::model::{FileAnnotation, NewFileAnnotation, UpdateFileAnnotation}; use crate::types::{CursorPage, CursorPagination, OffsetPagination}; use crate::{PgConnection, PgError, PgResult, schema}; -/// Repository for document annotation database operations. +/// Repository for file annotation database operations. /// /// Handles annotation lifecycle management including creation, updates, /// filtering by type, and retrieval across files and accounts. -pub trait DocumentAnnotationRepository { - /// Creates a new document annotation. - fn create_document_annotation( +pub trait FileAnnotationRepository { + /// Creates a new file annotation. + fn create_file_annotation( &mut self, - new_annotation: NewDocumentAnnotation, - ) -> impl Future> + Send; + new_annotation: NewFileAnnotation, + ) -> impl Future> + Send; - /// Finds a document annotation by its unique identifier. - fn find_document_annotation_by_id( + /// Finds a file annotation by its unique identifier. + fn find_file_annotation_by_id( &mut self, annotation_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations for a file with offset pagination. - fn offset_list_file_document_annotations( + /// Lists file annotations for a file with offset pagination. + fn offset_list_file_annotations( &mut self, file_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations for a file with cursor pagination. - fn cursor_list_file_document_annotations( + /// Lists file annotations for a file with cursor pagination. + fn cursor_list_file_annotations( &mut self, file_id: Uuid, pagination: CursorPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations created by an account with offset pagination. - fn offset_list_account_document_annotations( + /// Lists file annotations created by an account with offset pagination. + fn offset_list_account_file_annotations( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations created by an account with cursor pagination. - fn cursor_list_account_document_annotations( + /// Lists file annotations created by an account with cursor pagination. + fn cursor_list_account_file_annotations( &mut self, account_id: Uuid, pagination: CursorPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Updates a document annotation. - fn update_document_annotation( + /// Updates a file annotation. + fn update_file_annotation( &mut self, annotation_id: Uuid, - updates: UpdateDocumentAnnotation, - ) -> impl Future> + Send; + updates: UpdateFileAnnotation, + ) -> impl Future> + Send; - /// Soft deletes a document annotation. - fn delete_document_annotation( + /// Soft deletes a file annotation. + fn delete_file_annotation( &mut self, annotation_id: Uuid, ) -> impl Future> + Send; } -impl DocumentAnnotationRepository for PgConnection { - async fn create_document_annotation( +impl FileAnnotationRepository for PgConnection { + async fn create_file_annotation( &mut self, - new_annotation: NewDocumentAnnotation, - ) -> PgResult { - use schema::document_annotations; + new_annotation: NewFileAnnotation, + ) -> PgResult { + use schema::file_annotations; - let annotation = diesel::insert_into(document_annotations::table) + let annotation = diesel::insert_into(file_annotations::table) .values(&new_annotation) - .returning(DocumentAnnotation::as_returning()) + .returning(FileAnnotation::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -86,16 +86,16 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotation) } - async fn find_document_annotation_by_id( + async fn find_file_annotation_by_id( &mut self, annotation_id: Uuid, - ) -> PgResult> { - use schema::document_annotations::{self, dsl}; + ) -> PgResult> { + use schema::file_annotations::{self, dsl}; - let annotation = document_annotations::table + let annotation = file_annotations::table .filter(dsl::id.eq(annotation_id)) .filter(dsl::deleted_at.is_null()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .first(self) .await .optional() @@ -104,20 +104,20 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotation) } - async fn offset_list_file_document_annotations( + async fn offset_list_file_annotations( &mut self, file_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_annotations::{self, dsl}; + ) -> PgResult> { + use schema::file_annotations::{self, dsl}; - let annotations = document_annotations::table - .filter(dsl::document_file_id.eq(file_id)) + let annotations = file_annotations::table + .filter(dsl::file_id.eq(file_id)) .filter(dsl::deleted_at.is_null()) .order(dsl::created_at.desc()) .limit(pagination.limit) .offset(pagination.offset) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -125,21 +125,19 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotations) } - async fn cursor_list_file_document_annotations( + async fn cursor_list_file_annotations( &mut self, file_id: Uuid, pagination: CursorPagination, - ) -> PgResult> { + ) -> PgResult> { use diesel::dsl::count_star; - use schema::document_annotations::{self, dsl}; + use schema::file_annotations::{self, dsl}; - let base_filter = dsl::document_file_id - .eq(file_id) - .and(dsl::deleted_at.is_null()); + let base_filter = dsl::file_id.eq(file_id).and(dsl::deleted_at.is_null()); let total = if pagination.include_count { Some( - document_annotations::table + file_annotations::table .filter(base_filter) .select(count_star()) .get_result(self) @@ -152,7 +150,7 @@ impl DocumentAnnotationRepository for PgConnection { let items = if let Some(cursor) = &pagination.after { let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_annotations::table + file_annotations::table .filter(base_filter) .filter( dsl::created_at @@ -161,16 +159,16 @@ impl DocumentAnnotationRepository for PgConnection { ) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? } else { - document_annotations::table + file_annotations::table .filter(base_filter) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? @@ -181,20 +179,20 @@ impl DocumentAnnotationRepository for PgConnection { })) } - async fn offset_list_account_document_annotations( + async fn offset_list_account_file_annotations( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_annotations::{self, dsl}; + ) -> PgResult> { + use schema::file_annotations::{self, dsl}; - let annotations = document_annotations::table + let annotations = file_annotations::table .filter(dsl::account_id.eq(account_id)) .filter(dsl::deleted_at.is_null()) .order(dsl::created_at.desc()) .limit(pagination.limit) .offset(pagination.offset) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -202,13 +200,13 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotations) } - async fn cursor_list_account_document_annotations( + async fn cursor_list_account_file_annotations( &mut self, account_id: Uuid, pagination: CursorPagination, - ) -> PgResult> { + ) -> PgResult> { use diesel::dsl::count_star; - use schema::document_annotations::{self, dsl}; + use schema::file_annotations::{self, dsl}; let base_filter = dsl::account_id .eq(account_id) @@ -216,7 +214,7 @@ impl DocumentAnnotationRepository for PgConnection { let total = if pagination.include_count { Some( - document_annotations::table + file_annotations::table .filter(base_filter) .select(count_star()) .get_result(self) @@ -229,7 +227,7 @@ impl DocumentAnnotationRepository for PgConnection { let items = if let Some(cursor) = &pagination.after { let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_annotations::table + file_annotations::table .filter(base_filter) .filter( dsl::created_at @@ -238,16 +236,16 @@ impl DocumentAnnotationRepository for PgConnection { ) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? } else { - document_annotations::table + file_annotations::table .filter(base_filter) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? @@ -258,29 +256,28 @@ impl DocumentAnnotationRepository for PgConnection { })) } - async fn update_document_annotation( + async fn update_file_annotation( &mut self, annotation_id: Uuid, - updates: UpdateDocumentAnnotation, - ) -> PgResult { - use schema::document_annotations::{self, dsl}; - - let annotation = - diesel::update(document_annotations::table.filter(dsl::id.eq(annotation_id))) - .set(&updates) - .returning(DocumentAnnotation::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; + updates: UpdateFileAnnotation, + ) -> PgResult { + use schema::file_annotations::{self, dsl}; + + let annotation = diesel::update(file_annotations::table.filter(dsl::id.eq(annotation_id))) + .set(&updates) + .returning(FileAnnotation::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; Ok(annotation) } - async fn delete_document_annotation(&mut self, annotation_id: Uuid) -> PgResult<()> { + async fn delete_file_annotation(&mut self, annotation_id: Uuid) -> PgResult<()> { use diesel::dsl::now; - use schema::document_annotations::{self, dsl}; + use schema::file_annotations::{self, dsl}; - diesel::update(document_annotations::table.filter(dsl::id.eq(annotation_id))) + diesel::update(file_annotations::table.filter(dsl::id.eq(annotation_id))) .set(dsl::deleted_at.eq(now)) .execute(self) .await diff --git a/crates/nvisy-postgres/src/query/file_chunk.rs b/crates/nvisy-postgres/src/query/file_chunk.rs new file mode 100644 index 0000000..edf9797 --- /dev/null +++ b/crates/nvisy-postgres/src/query/file_chunk.rs @@ -0,0 +1,338 @@ +//! File chunks repository for managing text segments and embeddings. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use pgvector::Vector; +use uuid::Uuid; + +use crate::model::{FileChunk, NewFileChunk, ScoredFileChunk, UpdateFileChunk}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for file chunk database operations. +/// +/// Handles chunk lifecycle management including creation, embedding updates, +/// and semantic similarity search via pgvector. +pub trait FileChunkRepository { + /// Creates multiple file chunks in a single transaction. + fn create_file_chunks( + &mut self, + new_chunks: Vec, + ) -> impl Future>> + Send; + + /// Updates a chunk with new data. + fn update_file_chunk( + &mut self, + chunk_id: Uuid, + updates: UpdateFileChunk, + ) -> impl Future> + Send; + + /// Deletes all chunks for a file. + fn delete_file_chunks(&mut self, file_id: Uuid) + -> impl Future> + Send; + + /// Lists all chunks for a specific file ordered by chunk index. + fn list_file_chunks( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Searches for similar chunks using cosine similarity. + /// + /// Returns chunks ordered by similarity (most similar first). + fn search_similar_chunks( + &mut self, + query_embedding: Vector, + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within specific files. + fn search_similar_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within a workspace. + fn search_similar_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within specific files with score filtering. + /// + /// Returns chunks with similarity score >= min_score, ordered by similarity. + fn search_scored_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + min_score: f64, + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within a workspace with score filtering. + /// + /// Returns chunks with similarity score >= min_score, ordered by similarity. + fn search_scored_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + min_score: f64, + limit: i64, + ) -> impl Future>> + Send; + + /// Gets the total chunk count for a file. + fn count_file_chunks(&mut self, file_id: Uuid) -> impl Future> + Send; +} + +impl FileChunkRepository for PgConnection { + async fn create_file_chunks( + &mut self, + new_chunks: Vec, + ) -> PgResult> { + use schema::file_chunks; + + if new_chunks.is_empty() { + return Ok(vec![]); + } + + let chunks = diesel::insert_into(file_chunks::table) + .values(&new_chunks) + .returning(FileChunk::as_returning()) + .get_results(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn update_file_chunk( + &mut self, + chunk_id: Uuid, + updates: UpdateFileChunk, + ) -> PgResult { + use schema::file_chunks::{self, dsl}; + + let chunk = diesel::update(file_chunks::table.filter(dsl::id.eq(chunk_id))) + .set(&updates) + .returning(FileChunk::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(chunk) + } + + async fn delete_file_chunks(&mut self, file_id: Uuid) -> PgResult { + use schema::file_chunks::{self, dsl}; + + let affected = diesel::delete(file_chunks::table.filter(dsl::file_id.eq(file_id))) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(affected) + } + + async fn list_file_chunks(&mut self, file_id: Uuid) -> PgResult> { + use schema::file_chunks::{self, dsl}; + + let chunks = file_chunks::table + .filter(dsl::file_id.eq(file_id)) + .order(dsl::chunk_index.asc()) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_similar_chunks( + &mut self, + query_embedding: Vector, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + + let chunks = file_chunks::table + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_similar_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + let chunks = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_similar_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + use schema::files; + + // Get all file IDs for the workspace + let file_ids: Vec = files::table + .filter(files::workspace_id.eq(workspace_id)) + .filter(files::deleted_at.is_null()) + .select(files::id) + .load(self) + .await + .map_err(PgError::from)?; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + let chunks = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_scored_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + min_score: f64, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + // Cosine distance ranges from 0 (identical) to 2 (opposite) + // Score = 1 - distance, so min_score threshold means max_distance = 1 - min_score + let max_distance = 1.0 - min_score; + + let chunks: Vec<(FileChunk, f64)> = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .filter( + dsl::embedding + .cosine_distance(&query_embedding) + .le(max_distance), + ) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(( + FileChunk::as_select(), + (1.0.into_sql::() + - dsl::embedding.cosine_distance(&query_embedding)), + )) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks + .into_iter() + .map(|(chunk, score)| ScoredFileChunk { chunk, score }) + .collect()) + } + + async fn search_scored_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + min_score: f64, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + use schema::files; + + // Get all file IDs for the workspace + let file_ids: Vec = files::table + .filter(files::workspace_id.eq(workspace_id)) + .filter(files::deleted_at.is_null()) + .select(files::id) + .load(self) + .await + .map_err(PgError::from)?; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + let max_distance = 1.0 - min_score; + + let chunks: Vec<(FileChunk, f64)> = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .filter( + dsl::embedding + .cosine_distance(&query_embedding) + .le(max_distance), + ) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(( + FileChunk::as_select(), + (1.0.into_sql::() + - dsl::embedding.cosine_distance(&query_embedding)), + )) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks + .into_iter() + .map(|(chunk, score)| ScoredFileChunk { chunk, score }) + .collect()) + } + + async fn count_file_chunks(&mut self, file_id: Uuid) -> PgResult { + use schema::file_chunks::{self, dsl}; + + let count: i64 = file_chunks::table + .filter(dsl::file_id.eq(file_id)) + .count() + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(count) + } +} diff --git a/crates/nvisy-postgres/src/query/mod.rs b/crates/nvisy-postgres/src/query/mod.rs index 1e727ea..d4b1f75 100644 --- a/crates/nvisy-postgres/src/query/mod.rs +++ b/crates/nvisy-postgres/src/query/mod.rs @@ -13,34 +13,35 @@ //! [`CursorPagination`]: crate::types::CursorPagination //! [`OffsetPagination`]: crate::types::OffsetPagination -pub mod account; -pub mod account_action_token; -pub mod account_api_token; -pub mod account_notification; +mod account; +mod account_action_token; +mod account_api_token; +mod account_notification; -pub mod document; -pub mod document_annotation; -pub mod document_chunk; -pub mod document_comment; -pub mod document_file; +mod file; +mod file_annotation; +mod file_chunk; -pub mod workspace; -pub mod workspace_activity; -pub mod workspace_integration; -pub mod workspace_integration_run; -pub mod workspace_invite; -pub mod workspace_member; -pub mod workspace_webhook; +mod pipeline; +mod pipeline_run; + +mod workspace; +mod workspace_activity; +mod workspace_integration; +mod workspace_integration_run; +mod workspace_invite; +mod workspace_member; +mod workspace_webhook; pub use account::AccountRepository; pub use account_action_token::AccountActionTokenRepository; pub use account_api_token::AccountApiTokenRepository; pub use account_notification::AccountNotificationRepository; -pub use document::DocumentRepository; -pub use document_annotation::DocumentAnnotationRepository; -pub use document_chunk::DocumentChunkRepository; -pub use document_comment::DocumentCommentRepository; -pub use document_file::DocumentFileRepository; +pub use file::FileRepository; +pub use file_annotation::FileAnnotationRepository; +pub use file_chunk::FileChunkRepository; +pub use pipeline::PipelineRepository; +pub use pipeline_run::PipelineRunRepository; pub use workspace::WorkspaceRepository; pub use workspace_activity::WorkspaceActivityRepository; pub use workspace_integration::WorkspaceIntegrationRepository; diff --git a/crates/nvisy-postgres/src/query/pipeline.rs b/crates/nvisy-postgres/src/query/pipeline.rs new file mode 100644 index 0000000..1789cc6 --- /dev/null +++ b/crates/nvisy-postgres/src/query/pipeline.rs @@ -0,0 +1,361 @@ +//! Pipelines repository for managing workflow definitions. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use pgtrgm::expression_methods::TrgmExpressionMethods; +use uuid::Uuid; + +use crate::model::{NewPipeline, Pipeline, UpdatePipeline}; +use crate::types::{CursorPage, CursorPagination, OffsetPagination, PipelineStatus}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for pipeline database operations. +/// +/// Handles pipeline lifecycle management including creation, updates, +/// status transitions, and queries. +pub trait PipelineRepository { + /// Creates a new pipeline record. + fn create_pipeline( + &mut self, + new_pipeline: NewPipeline, + ) -> impl Future> + Send; + + /// Finds a pipeline by its unique identifier. + fn find_pipeline_by_id( + &mut self, + pipeline_id: Uuid, + ) -> impl Future>> + Send; + + /// Finds a pipeline by ID within a specific workspace. + /// + /// Provides workspace-scoped access control at the database level. + fn find_workspace_pipeline( + &mut self, + workspace_id: Uuid, + pipeline_id: Uuid, + ) -> impl Future>> + Send; + + /// Lists all pipelines in a workspace with offset pagination. + fn offset_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists all pipelines in a workspace with cursor pagination. + fn cursor_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + search_term: Option<&str>, + ) -> impl Future>> + Send; + + /// Lists all pipelines created by an account with offset pagination. + fn offset_list_account_pipelines( + &mut self, + account_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists enabled pipelines in a workspace. + fn list_enabled_workspace_pipelines( + &mut self, + workspace_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates a pipeline with new data. + fn update_pipeline( + &mut self, + pipeline_id: Uuid, + updates: UpdatePipeline, + ) -> impl Future> + Send; + + /// Soft deletes a pipeline by setting the deletion timestamp. + fn delete_pipeline(&mut self, pipeline_id: Uuid) -> impl Future> + Send; + + /// Counts pipelines in a workspace by status. + fn count_workspace_pipelines_by_status( + &mut self, + workspace_id: Uuid, + status: PipelineStatus, + ) -> impl Future> + Send; + + /// Searches pipelines by name using trigram similarity. + fn search_pipelines_by_name( + &mut self, + workspace_id: Uuid, + search_term: &str, + limit: i64, + ) -> impl Future>> + Send; +} + +impl PipelineRepository for PgConnection { + async fn create_pipeline(&mut self, new_pipeline: NewPipeline) -> PgResult { + use schema::pipelines; + + let pipeline = diesel::insert_into(pipelines::table) + .values(&new_pipeline) + .returning(Pipeline::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn find_pipeline_by_id(&mut self, pipeline_id: Uuid) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipeline = pipelines::table + .filter(dsl::id.eq(pipeline_id)) + .filter(dsl::deleted_at.is_null()) + .select(Pipeline::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn find_workspace_pipeline( + &mut self, + workspace_id: Uuid, + pipeline_id: Uuid, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipeline = pipelines::table + .filter(dsl::id.eq(pipeline_id)) + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .select(Pipeline::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn offset_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } + + async fn cursor_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + search_term: Option<&str>, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + // Build base query with filters + let mut base_query = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .into_boxed(); + + // Apply status filter + if let Some(status) = status_filter { + base_query = base_query.filter(dsl::status.eq(status)); + } + + // Apply search filter + if let Some(term) = search_term { + base_query = base_query.filter(dsl::name.trgm_similar_to(term)); + } + + let total = if pagination.include_count { + Some( + base_query + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + // Rebuild query for fetching items + let mut query = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .into_boxed(); + + if let Some(status) = status_filter { + query = query.filter(dsl::status.eq(status)); + } + + if let Some(term) = search_term { + query = query.filter(dsl::name.trgm_similar_to(term)); + } + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + query + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(Pipeline::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + query + .select(Pipeline::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |p: &Pipeline| (p.created_at.into(), p.id), + )) + } + + async fn offset_list_account_pipelines( + &mut self, + account_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::account_id.eq(account_id)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } + + async fn list_enabled_workspace_pipelines( + &mut self, + workspace_id: Uuid, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::status.eq(PipelineStatus::Enabled)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::name.asc()) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } + + async fn update_pipeline( + &mut self, + pipeline_id: Uuid, + updates: UpdatePipeline, + ) -> PgResult { + use schema::pipelines::{self, dsl}; + + let pipeline = diesel::update(pipelines::table.filter(dsl::id.eq(pipeline_id))) + .set(&updates) + .returning(Pipeline::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn delete_pipeline(&mut self, pipeline_id: Uuid) -> PgResult<()> { + use diesel::dsl::now; + use schema::pipelines::{self, dsl}; + + diesel::update(pipelines::table.filter(dsl::id.eq(pipeline_id))) + .set(dsl::deleted_at.eq(now)) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(()) + } + + async fn count_workspace_pipelines_by_status( + &mut self, + workspace_id: Uuid, + status: PipelineStatus, + ) -> PgResult { + use schema::pipelines::{self, dsl}; + + let count = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::status.eq(status)) + .filter(dsl::deleted_at.is_null()) + .count() + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(count) + } + + async fn search_pipelines_by_name( + &mut self, + workspace_id: Uuid, + search_term: &str, + limit: i64, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::name.trgm_similar_to(search_term)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::name.asc()) + .limit(limit) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } +} diff --git a/crates/nvisy-postgres/src/query/pipeline_run.rs b/crates/nvisy-postgres/src/query/pipeline_run.rs new file mode 100644 index 0000000..4794df6 --- /dev/null +++ b/crates/nvisy-postgres/src/query/pipeline_run.rs @@ -0,0 +1,536 @@ +//! Pipeline runs repository for managing pipeline execution instances. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use uuid::Uuid; + +use crate::model::{NewPipelineRun, PipelineRun, UpdatePipelineRun}; +use crate::types::{CursorPage, CursorPagination, OffsetPagination, PipelineRunStatus}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for pipeline run database operations. +/// +/// Handles pipeline run lifecycle management including creation, status updates, +/// completion tracking, and queries. +pub trait PipelineRunRepository { + /// Creates a new pipeline run record. + fn create_pipeline_run( + &mut self, + new_run: NewPipelineRun, + ) -> impl Future> + Send; + + /// Finds a pipeline run by its unique identifier. + fn find_pipeline_run_by_id( + &mut self, + run_id: Uuid, + ) -> impl Future>> + Send; + + /// Finds a pipeline run by ID within a specific workspace. + fn find_workspace_pipeline_run( + &mut self, + workspace_id: Uuid, + run_id: Uuid, + ) -> impl Future>> + Send; + + /// Lists all runs for a specific pipeline with offset pagination. + fn offset_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists all runs for a specific pipeline with cursor pagination. + fn cursor_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> impl Future>> + Send; + + /// Lists all runs in a workspace with offset pagination. + fn offset_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists all runs in a workspace with cursor pagination. + fn cursor_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> impl Future>> + Send; + + /// Lists active runs (queued or running) in a workspace. + fn list_active_workspace_runs( + &mut self, + workspace_id: Uuid, + ) -> impl Future>> + Send; + + /// Lists active runs (queued or running) for a specific pipeline. + fn list_active_pipeline_runs( + &mut self, + pipeline_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates a pipeline run with new data. + fn update_pipeline_run( + &mut self, + run_id: Uuid, + updates: UpdatePipelineRun, + ) -> impl Future> + Send; + + /// Marks a run as started. + fn start_pipeline_run( + &mut self, + run_id: Uuid, + ) -> impl Future> + Send; + + /// Marks a run as completed successfully. + fn complete_pipeline_run( + &mut self, + run_id: Uuid, + output_config: serde_json::Value, + metrics: serde_json::Value, + ) -> impl Future> + Send; + + /// Marks a run as failed with error details. + fn fail_pipeline_run( + &mut self, + run_id: Uuid, + error: serde_json::Value, + metrics: serde_json::Value, + ) -> impl Future> + Send; + + /// Marks a run as cancelled. + fn cancel_pipeline_run( + &mut self, + run_id: Uuid, + ) -> impl Future> + Send; + + /// Counts runs for a pipeline by status. + fn count_pipeline_runs_by_status( + &mut self, + pipeline_id: Uuid, + status: PipelineRunStatus, + ) -> impl Future> + Send; + + /// Gets the most recent run for a pipeline. + fn find_latest_pipeline_run( + &mut self, + pipeline_id: Uuid, + ) -> impl Future>> + Send; +} + +impl PipelineRunRepository for PgConnection { + async fn create_pipeline_run(&mut self, new_run: NewPipelineRun) -> PgResult { + use schema::pipeline_runs; + + let run = diesel::insert_into(pipeline_runs::table) + .values(&new_run) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn find_pipeline_run_by_id(&mut self, run_id: Uuid) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let run = pipeline_runs::table + .filter(dsl::id.eq(run_id)) + .select(PipelineRun::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(run) + } + + async fn find_workspace_pipeline_run( + &mut self, + workspace_id: Uuid, + run_id: Uuid, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let run = pipeline_runs::table + .filter(dsl::id.eq(run_id)) + .filter(dsl::workspace_id.eq(workspace_id)) + .select(PipelineRun::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(run) + } + + async fn offset_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn cursor_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + // Build base query with filters + let mut base_query = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .into_boxed(); + + if let Some(status) = status_filter { + base_query = base_query.filter(dsl::status.eq(status)); + } + + let total = if pagination.include_count { + Some( + base_query + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + // Rebuild query for fetching items + let mut query = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .into_boxed(); + + if let Some(status) = status_filter { + query = query.filter(dsl::status.eq(status)); + } + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + query + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + query + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |r: &PipelineRun| (r.created_at.into(), r.id), + )) + } + + async fn offset_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn cursor_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + // Build base query with filters + let mut base_query = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .into_boxed(); + + if let Some(status) = status_filter { + base_query = base_query.filter(dsl::status.eq(status)); + } + + let total = if pagination.include_count { + Some( + base_query + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + // Rebuild query for fetching items + let mut query = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .into_boxed(); + + if let Some(status) = status_filter { + query = query.filter(dsl::status.eq(status)); + } + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + query + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + query + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |r: &PipelineRun| (r.created_at.into(), r.id), + )) + } + + async fn list_active_workspace_runs( + &mut self, + workspace_id: Uuid, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter( + dsl::status + .eq(PipelineRunStatus::Queued) + .or(dsl::status.eq(PipelineRunStatus::Running)), + ) + .order(dsl::created_at.desc()) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn list_active_pipeline_runs(&mut self, pipeline_id: Uuid) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .filter( + dsl::status + .eq(PipelineRunStatus::Queued) + .or(dsl::status.eq(PipelineRunStatus::Running)), + ) + .order(dsl::created_at.desc()) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn update_pipeline_run( + &mut self, + run_id: Uuid, + updates: UpdatePipelineRun, + ) -> PgResult { + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(&updates) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn start_pipeline_run(&mut self, run_id: Uuid) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Running), + dsl::started_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn complete_pipeline_run( + &mut self, + run_id: Uuid, + output_config: serde_json::Value, + metrics: serde_json::Value, + ) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Completed), + dsl::output_config.eq(output_config), + dsl::metrics.eq(metrics), + dsl::completed_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn fail_pipeline_run( + &mut self, + run_id: Uuid, + error: serde_json::Value, + metrics: serde_json::Value, + ) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Failed), + dsl::error.eq(Some(error)), + dsl::metrics.eq(metrics), + dsl::completed_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn cancel_pipeline_run(&mut self, run_id: Uuid) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Cancelled), + dsl::completed_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn count_pipeline_runs_by_status( + &mut self, + pipeline_id: Uuid, + status: PipelineRunStatus, + ) -> PgResult { + use schema::pipeline_runs::{self, dsl}; + + let count = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .filter(dsl::status.eq(status)) + .count() + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(count) + } + + async fn find_latest_pipeline_run( + &mut self, + pipeline_id: Uuid, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let run = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .order(dsl::created_at.desc()) + .select(PipelineRun::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(run) + } +} diff --git a/crates/nvisy-postgres/src/query/workspace_member.rs b/crates/nvisy-postgres/src/query/workspace_member.rs index 7f7c91a..c6bbe36 100644 --- a/crates/nvisy-postgres/src/query/workspace_member.rs +++ b/crates/nvisy-postgres/src/query/workspace_member.rs @@ -149,6 +149,16 @@ pub trait WorkspaceMemberRepository { workspace_id: Uuid, email: &str, ) -> impl Future>> + Send; + + /// Checks if two accounts share at least one common workspace. + /// + /// Returns true if both accounts are members of at least one common workspace. + /// This is an optimized query that stops at the first match. + fn accounts_share_workspace( + &mut self, + account_id_a: Uuid, + account_id_b: Uuid, + ) -> impl Future> + Send; } impl WorkspaceMemberRepository for PgConnection { @@ -680,4 +690,38 @@ impl WorkspaceMemberRepository for PgConnection { Ok(result) } + + async fn accounts_share_workspace( + &mut self, + account_id_a: Uuid, + account_id_b: Uuid, + ) -> PgResult { + use diesel::dsl::exists; + use schema::workspace_members; + + // Self-check: an account always "shares" with itself + if account_id_a == account_id_b { + return Ok(true); + } + + // Use EXISTS with a self-join to find any common workspace + // This is optimized to stop at the first match + let wm_a = workspace_members::table; + let wm_b = diesel::alias!(workspace_members as wm_b); + + let shares = diesel::select(exists( + wm_a.inner_join( + wm_b.on(wm_b + .field(workspace_members::workspace_id) + .eq(workspace_members::workspace_id)), + ) + .filter(workspace_members::account_id.eq(account_id_a)) + .filter(wm_b.field(workspace_members::account_id).eq(account_id_b)), + )) + .get_result::(self) + .await + .map_err(PgError::from)?; + + Ok(shares) + } } diff --git a/crates/nvisy-postgres/src/query/workspace_webhook.rs b/crates/nvisy-postgres/src/query/workspace_webhook.rs index 988c6a0..bc15ad3 100644 --- a/crates/nvisy-postgres/src/query/workspace_webhook.rs +++ b/crates/nvisy-postgres/src/query/workspace_webhook.rs @@ -7,7 +7,9 @@ use diesel_async::RunQueryDsl; use uuid::Uuid; use crate::model::{NewWorkspaceWebhook, UpdateWorkspaceWebhook, WorkspaceWebhook}; -use crate::types::{Cursor, CursorPage, CursorPagination, OffsetPagination, WebhookStatus}; +use crate::types::{ + Cursor, CursorPage, CursorPagination, OffsetPagination, WebhookEvent, WebhookStatus, +}; use crate::{PgConnection, PgError, PgResult, schema}; /// Repository for workspace webhook database operations. @@ -82,6 +84,19 @@ pub trait WorkspaceWebhookRepository { &mut self, webhook_id: Uuid, ) -> impl Future> + Send; + + /// Finds all active webhooks for a workspace that are subscribed to a specific event. + /// + /// Returns webhooks where: + /// - The webhook belongs to the specified workspace + /// - The webhook status is Active + /// - The webhook's events array contains the specified event + /// - The webhook is not deleted + fn find_webhooks_for_event( + &mut self, + workspace_id: Uuid, + event: WebhookEvent, + ) -> impl Future>> + Send; } impl WorkspaceWebhookRepository for PgConnection { @@ -313,4 +328,34 @@ impl WorkspaceWebhookRepository for PgConnection { Ok(webhook) } + + async fn find_webhooks_for_event( + &mut self, + ws_id: Uuid, + event: WebhookEvent, + ) -> PgResult> { + use diesel::dsl::sql; + use diesel::sql_types::Bool; + use schema::workspace_webhooks::dsl::*; + + // Query webhooks where the events array contains the target event. + // Uses PostgreSQL's `@>` (array contains) operator via raw SQL. + // The events column is Array>, so we check if + // the array contains the event value. + let event_str = format!("'{}'", event.to_string().replace('\'', "''")); + let contains_event = + sql::(&format!("events @> ARRAY[{}]::WEBHOOK_EVENT[]", event_str)); + + let webhooks = workspace_webhooks + .filter(workspace_id.eq(ws_id)) + .filter(status.eq(WebhookStatus::Active)) + .filter(deleted_at.is_null()) + .filter(contains_event) + .select(WorkspaceWebhook::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(webhooks) + } } diff --git a/crates/nvisy-postgres/src/schema.rs b/crates/nvisy-postgres/src/schema.rs index 2ee76a9..830366a 100644 --- a/crates/nvisy-postgres/src/schema.rs +++ b/crates/nvisy-postgres/src/schema.rs @@ -18,8 +18,8 @@ pub mod sql_types { pub struct ApiTokenType; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "content_segmentation"))] - pub struct ContentSegmentation; + #[diesel(postgres_type(name = "file_source"))] + pub struct FileSource; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] #[diesel(postgres_type(name = "integration_status"))] @@ -38,24 +38,20 @@ pub mod sql_types { pub struct NotificationEvent; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "processing_status"))] - pub struct ProcessingStatus; + #[diesel(postgres_type(name = "pipeline_run_status"))] + pub struct PipelineRunStatus; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "require_mode"))] - pub struct RequireMode; + #[diesel(postgres_type(name = "pipeline_status"))] + pub struct PipelineStatus; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "run_type"))] - pub struct RunType; - - #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "studio_session_status"))] - pub struct StudioSessionStatus; + #[diesel(postgres_type(name = "pipeline_trigger_type"))] + pub struct PipelineTriggerType; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "studio_tool_status"))] - pub struct StudioToolStatus; + #[diesel(postgres_type(name = "run_type"))] + pub struct RunType; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] #[diesel(postgres_type(name = "webhook_event"))] @@ -162,9 +158,9 @@ diesel::table! { use pgvector::sql_types::*; use super::sql_types::AnnotationType; - document_annotations (id) { + file_annotations (id) { id -> Uuid, - document_file_id -> Uuid, + file_id -> Uuid, account_id -> Uuid, content -> Text, annotation_type -> AnnotationType, @@ -179,7 +175,7 @@ diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - document_chunks (id) { + file_chunks (id) { id -> Uuid, file_id -> Uuid, chunk_index -> Int4, @@ -197,44 +193,20 @@ diesel::table! { diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; + use super::sql_types::FileSource; - document_comments (id) { - id -> Uuid, - file_id -> Uuid, - account_id -> Uuid, - parent_comment_id -> Nullable, - reply_to_account_id -> Nullable, - content -> Text, - metadata -> Jsonb, - created_at -> Timestamptz, - updated_at -> Timestamptz, - deleted_at -> Nullable, - } -} - -diesel::table! { - use diesel::sql_types::*; - use pgvector::sql_types::*; - use super::sql_types::RequireMode; - use super::sql_types::ProcessingStatus; - use super::sql_types::ContentSegmentation; - - document_files (id) { + files (id) { id -> Uuid, workspace_id -> Uuid, - document_id -> Nullable, account_id -> Uuid, parent_id -> Nullable, + version_number -> Int4, display_name -> Text, original_filename -> Text, file_extension -> Text, + mime_type -> Nullable, tags -> Array>, - require_mode -> RequireMode, - processing_priority -> Int4, - processing_status -> ProcessingStatus, - is_indexed -> Bool, - content_segmentation -> ContentSegmentation, - visual_support -> Bool, + source -> FileSource, file_size_bytes -> Int8, file_hash_sha256 -> Bytea, storage_path -> Text, @@ -249,75 +221,44 @@ diesel::table! { diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; + use super::sql_types::PipelineTriggerType; + use super::sql_types::PipelineRunStatus; - documents (id) { + pipeline_runs (id) { id -> Uuid, + pipeline_id -> Uuid, workspace_id -> Uuid, account_id -> Uuid, - display_name -> Text, - description -> Nullable, - tags -> Array>, - metadata -> Jsonb, - created_at -> Timestamptz, - updated_at -> Timestamptz, - deleted_at -> Nullable, - } -} - -diesel::table! { - use diesel::sql_types::*; - use pgvector::sql_types::*; - - studio_operations (id) { - id -> Uuid, - tool_call_id -> Uuid, - file_id -> Uuid, - chunk_id -> Nullable, - operation_type -> Text, - operation_diff -> Jsonb, - applied -> Bool, - reverted -> Bool, + trigger_type -> PipelineTriggerType, + status -> PipelineRunStatus, + input_config -> Jsonb, + output_config -> Jsonb, + definition_snapshot -> Jsonb, + error -> Nullable, + metrics -> Jsonb, + started_at -> Nullable, + completed_at -> Nullable, created_at -> Timestamptz, - applied_at -> Nullable, } } diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - use super::sql_types::StudioSessionStatus; + use super::sql_types::PipelineStatus; - studio_sessions (id) { + pipelines (id) { id -> Uuid, workspace_id -> Uuid, account_id -> Uuid, - primary_file_id -> Uuid, - display_name -> Text, - session_status -> StudioSessionStatus, - model_config -> Jsonb, - message_count -> Int4, - token_count -> Int4, + name -> Text, + description -> Nullable, + status -> PipelineStatus, + definition -> Jsonb, + metadata -> Jsonb, created_at -> Timestamptz, updated_at -> Timestamptz, - } -} - -diesel::table! { - use diesel::sql_types::*; - use pgvector::sql_types::*; - use super::sql_types::StudioToolStatus; - - studio_tool_calls (id) { - id -> Uuid, - session_id -> Uuid, - file_id -> Uuid, - chunk_id -> Nullable, - tool_name -> Text, - tool_input -> Jsonb, - tool_output -> Jsonb, - tool_status -> StudioToolStatus, - started_at -> Timestamptz, - completed_at -> Nullable, + deleted_at -> Nullable, } } @@ -441,6 +382,7 @@ diesel::table! { url -> Text, events -> Array>, headers -> Jsonb, + secret -> Text, status -> WebhookStatus, last_triggered_at -> Nullable, created_by -> Uuid, @@ -474,24 +416,16 @@ diesel::table! { diesel::joinable!(account_action_tokens -> accounts (account_id)); diesel::joinable!(account_api_tokens -> accounts (account_id)); diesel::joinable!(account_notifications -> accounts (account_id)); -diesel::joinable!(document_annotations -> accounts (account_id)); -diesel::joinable!(document_annotations -> document_files (document_file_id)); -diesel::joinable!(document_chunks -> document_files (file_id)); -diesel::joinable!(document_comments -> document_files (file_id)); -diesel::joinable!(document_files -> accounts (account_id)); -diesel::joinable!(document_files -> documents (document_id)); -diesel::joinable!(document_files -> workspaces (workspace_id)); -diesel::joinable!(documents -> accounts (account_id)); -diesel::joinable!(documents -> workspaces (workspace_id)); -diesel::joinable!(studio_operations -> document_chunks (chunk_id)); -diesel::joinable!(studio_operations -> document_files (file_id)); -diesel::joinable!(studio_operations -> studio_tool_calls (tool_call_id)); -diesel::joinable!(studio_sessions -> accounts (account_id)); -diesel::joinable!(studio_sessions -> document_files (primary_file_id)); -diesel::joinable!(studio_sessions -> workspaces (workspace_id)); -diesel::joinable!(studio_tool_calls -> document_chunks (chunk_id)); -diesel::joinable!(studio_tool_calls -> document_files (file_id)); -diesel::joinable!(studio_tool_calls -> studio_sessions (session_id)); +diesel::joinable!(file_annotations -> accounts (account_id)); +diesel::joinable!(file_annotations -> files (file_id)); +diesel::joinable!(file_chunks -> files (file_id)); +diesel::joinable!(files -> accounts (account_id)); +diesel::joinable!(files -> workspaces (workspace_id)); +diesel::joinable!(pipeline_runs -> accounts (account_id)); +diesel::joinable!(pipeline_runs -> pipelines (pipeline_id)); +diesel::joinable!(pipeline_runs -> workspaces (workspace_id)); +diesel::joinable!(pipelines -> accounts (account_id)); +diesel::joinable!(pipelines -> workspaces (workspace_id)); diesel::joinable!(workspace_activities -> accounts (account_id)); diesel::joinable!(workspace_activities -> workspaces (workspace_id)); diesel::joinable!(workspace_integration_runs -> accounts (account_id)); @@ -511,14 +445,11 @@ diesel::allow_tables_to_appear_in_same_query!( account_api_tokens, account_notifications, accounts, - document_annotations, - document_chunks, - document_comments, - document_files, - documents, - studio_operations, - studio_sessions, - studio_tool_calls, + file_annotations, + file_chunks, + files, + pipeline_runs, + pipelines, workspace_activities, workspace_integration_runs, workspace_integrations, diff --git a/crates/nvisy-postgres/src/types/constants.rs b/crates/nvisy-postgres/src/types/constants.rs index 4ad6ad4..843c1dc 100644 --- a/crates/nvisy-postgres/src/types/constants.rs +++ b/crates/nvisy-postgres/src/types/constants.rs @@ -1,115 +1,39 @@ //! Constants used throughout the application. -/// Database-related constants. -pub mod database { - /// Default pagination limit. - pub const DEFAULT_PAGE_SIZE: i64 = 50; - - /// Maximum pagination limit. - pub const MAX_PAGE_SIZE: i64 = 1000; -} - -/// Security-related constants. -pub mod security { - /// Default bcrypt cost for password hashing. - pub const DEFAULT_BCRYPT_COST: u32 = 12; - - /// Maximum number of active sessions per user. - pub const MAX_SESSIONS_PER_USER: i32 = 10; -} - -/// File and storage related constants. -pub mod storage { - /// Maximum file size in MB. - pub const MAX_FILE_SIZE_MB: i32 = 100; - - /// Maximum total storage per workspace in MB. - pub const MAX_PROJECT_STORAGE_MB: i32 = 1000; -} - -/// Notification and communication constants. -pub mod notification { - /// Default notification retention days. - pub const DEFAULT_RETENTION_DAYS: i32 = 90; - - /// Number of days within which a notification is considered "recent". - pub const RECENT_DAYS: i64 = 7; -} - -/// Constants related to account security and behavior. -pub mod account { - /// Maximum number of consecutive failed login attempts before account lockout. - pub const MAX_FAILED_LOGIN_ATTEMPTS: i32 = 5; - - /// Number of days after which a password change reminder should be shown. - pub const PASSWORD_CHANGE_REMINDER_DAYS: i64 = 90; - - /// Number of days within which an account is considered "recently active". - pub const RECENT_ACTIVITY_DAYS: i64 = 30; - - /// Number of hours within which an account is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; -} - -/// Constants related to API tokens and sessions. -pub mod token { - /// Number of minutes within which a token is considered "recently used". - pub const RECENT_USE_MINUTES: i64 = 30; - - /// Number of minutes before expiry to show expiration warnings. - pub const EXPIRY_WARNING_MINUTES: i64 = 15; - - /// Number of hours after which a token is considered "long-lived". - pub const LONG_LIVED_THRESHOLD_HOURS: i64 = 24; - - /// Number of hours within which a token is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 1; -} - -/// Constants related to comments and discussions. -pub mod comment { - /// Number of seconds of grace period for detecting comment edits. - pub const EDIT_GRACE_PERIOD_SECONDS: i64 = 5; - - /// Number of hours within which a comment is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; - - /// Number of hours within which a comment is considered "recently updated". - pub const RECENTLY_UPDATED_HOURS: i64 = 1; -} - -/// Constants related to workspaces and workspace management. -pub mod workspace { - /// Number of days within which workspace access is considered "recent". - pub const RECENT_ACCESS_DAYS: i64 = 7; - - /// Number of hours within which a workspace is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; -} - -/// Constants related to documents and document processing. -pub mod document { - /// Number of hours within which a document is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; - - /// Number of hours within which a document is considered "recently updated". - pub const RECENTLY_UPDATED_HOURS: i64 = 1; -} - -/// Constants related to file processing and storage. -pub mod file { - /// Number of hours within which a file is considered "recently uploaded". - pub const RECENTLY_UPLOADED_HOURS: i64 = 1; - - /// Number of days within which processing status is considered "stale". - pub const STALE_PROCESSING_DAYS: i64 = 1; -} - -/// Constants related to invitations and membership. -pub mod invite { - /// Number of days an invitation remains valid by default. - pub const DEFAULT_EXPIRY_DAYS: i64 = 7; - - /// Number of hours within which an invite is considered "recently sent". - pub const RECENTLY_SENT_HOURS: i64 = 24; -} +/// Number of minutes before expiry to show expiration warnings. +/// +/// Used in: `account_api_tokens` +pub const EXPIRY_WARNING_MINUTES: i64 = 15; + +/// Number of hours after which a token is considered "long-lived". +/// +/// Used in: `account_api_tokens` +pub const LONG_LIVED_THRESHOLD_HOURS: i64 = 24; + +/// Number of seconds of grace period for detecting comment edits. +/// +/// Used in: `document_comments` +pub const EDIT_GRACE_PERIOD_SECONDS: i64 = 5; + +/// Number of hours within which a file is considered "recently uploaded". +/// +/// Used in: `document_files` +pub const RECENTLY_UPLOADED_HOURS: i64 = 1; + +/// Number of hours within which an invite is considered "recently sent". +/// +/// Used in: `workspace_invites` +pub const RECENTLY_SENT_HOURS: i64 = 24; + +/// Default notification retention days. +/// +/// Used in: `account_notifications` +pub const DEFAULT_RETENTION_DAYS: i32 = 90; + +/// Number of dimensions for vector embeddings. +/// +/// This value must match the `VECTOR(n)` dimension in the database schema. +/// Currently configured for OpenAI text-embedding-3-small (1536 dimensions). +/// +/// Used in: `document_chunks` +pub const EMBEDDING_DIMENSIONS: usize = 1536; diff --git a/crates/nvisy-postgres/src/types/constraint/document_annotations.rs b/crates/nvisy-postgres/src/types/constraint/document_annotations.rs deleted file mode 100644 index 2e974fc..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_annotations.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Document annotations table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document annotations table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentAnnotationConstraints { - // Annotation content constraints - #[strum(serialize = "document_annotations_content_length")] - ContentLength, - #[strum(serialize = "document_annotations_type_format")] - TypeFormat, - - // Annotation metadata constraints - #[strum(serialize = "document_annotations_metadata_size")] - MetadataSize, - - // Annotation chronological constraints - #[strum(serialize = "document_annotations_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_annotations_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_annotations_deleted_after_updated")] - DeletedAfterUpdated, -} - -impl DocumentAnnotationConstraints { - /// Creates a new [`DocumentAnnotationConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentAnnotationConstraints::ContentLength - | DocumentAnnotationConstraints::TypeFormat - | DocumentAnnotationConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentAnnotationConstraints::UpdatedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterUpdated => { - ConstraintCategory::Chronological - } - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentAnnotationConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentAnnotationConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_chunks.rs b/crates/nvisy-postgres/src/types/constraint/document_chunks.rs deleted file mode 100644 index 75983f9..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_chunks.rs +++ /dev/null @@ -1,79 +0,0 @@ -//! Document chunks table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document chunks table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentChunkConstraints { - // Chunk position constraints - #[strum(serialize = "document_chunks_chunk_index_min")] - ChunkIndexMin, - - // Content constraints - #[strum(serialize = "document_chunks_content_sha256_length")] - ContentSha256Length, - #[strum(serialize = "document_chunks_content_size_min")] - ContentSizeMin, - #[strum(serialize = "document_chunks_token_count_min")] - TokenCountMin, - - // Embedding constraints - #[strum(serialize = "document_chunks_embedding_model_format")] - EmbeddingModelFormat, - - // Metadata constraints - #[strum(serialize = "document_chunks_metadata_size")] - MetadataSize, - - // Chronological constraints - #[strum(serialize = "document_chunks_updated_after_created")] - UpdatedAfterCreated, - - // Uniqueness constraints - #[strum(serialize = "document_chunks_file_chunk_unique")] - FileChunkUnique, -} - -impl DocumentChunkConstraints { - /// Creates a new [`DocumentChunkConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentChunkConstraints::ChunkIndexMin - | DocumentChunkConstraints::ContentSha256Length - | DocumentChunkConstraints::ContentSizeMin - | DocumentChunkConstraints::TokenCountMin - | DocumentChunkConstraints::EmbeddingModelFormat - | DocumentChunkConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentChunkConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, - - DocumentChunkConstraints::FileChunkUnique => ConstraintCategory::Uniqueness, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentChunkConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentChunkConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_comments.rs b/crates/nvisy-postgres/src/types/constraint/document_comments.rs deleted file mode 100644 index 9c41218..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_comments.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Document comments table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document comments table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentCommentConstraints { - // Comment content validation constraints - #[strum(serialize = "document_comments_content_length")] - ContentLength, - - // Comment target validation constraints - #[strum(serialize = "document_comments_one_target")] - OneTarget, - - // Comment metadata constraints - #[strum(serialize = "document_comments_metadata_size")] - MetadataSize, - - // Comment chronological constraints - #[strum(serialize = "document_comments_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_comments_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_comments_deleted_after_updated")] - DeletedAfterUpdated, -} - -impl DocumentCommentConstraints { - /// Creates a new [`DocumentCommentConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentCommentConstraints::ContentLength - | DocumentCommentConstraints::OneTarget - | DocumentCommentConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentCommentConstraints::UpdatedAfterCreated - | DocumentCommentConstraints::DeletedAfterCreated - | DocumentCommentConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentCommentConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentCommentConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_files.rs b/crates/nvisy-postgres/src/types/constraint/document_files.rs deleted file mode 100644 index f95977a..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_files.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! Document files table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document files table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentFileConstraints { - // File identity validation constraints - #[strum(serialize = "document_files_display_name_length")] - DisplayNameLength, - #[strum(serialize = "document_files_original_filename_length")] - OriginalFilenameLength, - #[strum(serialize = "document_files_file_extension_format")] - FileExtensionFormat, - #[strum(serialize = "document_files_tags_count_max")] - TagsCountMax, - - // File processing constraints - #[strum(serialize = "document_files_processing_priority_range")] - ProcessingPriorityRange, - - // File storage constraints - #[strum(serialize = "document_files_file_size_min")] - FileSizeMin, - #[strum(serialize = "document_files_storage_path_not_empty")] - StoragePathNotEmpty, - #[strum(serialize = "document_files_storage_bucket_not_empty")] - StorageBucketNotEmpty, - #[strum(serialize = "document_files_file_hash_sha256_length")] - FileHashSha256Length, - - // File metadata constraints - #[strum(serialize = "document_files_metadata_size")] - MetadataSize, - - // File retention constraints - #[strum(serialize = "document_files_retention_period")] - RetentionPeriod, - - // File chronological constraints - #[strum(serialize = "document_files_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_files_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_files_deleted_after_updated")] - DeletedAfterUpdated, - #[strum(serialize = "document_files_auto_delete_after_created")] - AutoDeleteAfterCreated, -} - -impl DocumentFileConstraints { - /// Creates a new [`DocumentFileConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentFileConstraints::DisplayNameLength - | DocumentFileConstraints::OriginalFilenameLength - | DocumentFileConstraints::FileExtensionFormat - | DocumentFileConstraints::TagsCountMax - | DocumentFileConstraints::ProcessingPriorityRange - | DocumentFileConstraints::FileSizeMin - | DocumentFileConstraints::StoragePathNotEmpty - | DocumentFileConstraints::StorageBucketNotEmpty - | DocumentFileConstraints::FileHashSha256Length - | DocumentFileConstraints::MetadataSize - | DocumentFileConstraints::RetentionPeriod => ConstraintCategory::Validation, - - DocumentFileConstraints::UpdatedAfterCreated - | DocumentFileConstraints::DeletedAfterCreated - | DocumentFileConstraints::DeletedAfterUpdated - | DocumentFileConstraints::AutoDeleteAfterCreated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentFileConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentFileConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_versions.rs b/crates/nvisy-postgres/src/types/constraint/document_versions.rs deleted file mode 100644 index 476c880..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_versions.rs +++ /dev/null @@ -1,107 +0,0 @@ -//! Document versions table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document versions table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentVersionConstraints { - // Version validation constraints - #[strum(serialize = "document_versions_version_number_min")] - VersionNumberMin, - #[strum(serialize = "document_versions_display_name_length")] - DisplayNameLength, - #[strum(serialize = "document_versions_file_extension_format")] - FileExtensionFormat, - - // Version processing constraints - #[strum(serialize = "document_versions_processing_credits_min")] - ProcessingCreditsMin, - #[strum(serialize = "document_versions_processing_duration_min")] - ProcessingDurationMin, - #[strum(serialize = "document_versions_api_calls_min")] - ApiCallsMin, - - // Version storage constraints - #[strum(serialize = "document_versions_file_size_min")] - FileSizeMin, - #[strum(serialize = "document_versions_storage_path_not_empty")] - StoragePathNotEmpty, - #[strum(serialize = "document_versions_storage_bucket_not_empty")] - StorageBucketNotEmpty, - #[strum(serialize = "document_versions_file_hash_sha256_length")] - FileHashSha256Length, - - // Version metadata constraints - #[strum(serialize = "document_versions_results_size")] - ResultsSize, - #[strum(serialize = "document_versions_metadata_size")] - MetadataSize, - - // Version retention constraints - #[strum(serialize = "document_versions_retention_period")] - RetentionPeriod, - - // Version chronological constraints - #[strum(serialize = "document_versions_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_versions_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_versions_deleted_after_updated")] - DeletedAfterUpdated, - #[strum(serialize = "document_versions_auto_delete_after_created")] - AutoDeleteAfterCreated, -} - -impl DocumentVersionConstraints { - /// Creates a new [`DocumentVersionConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentVersionConstraints::VersionNumberMin - | DocumentVersionConstraints::DisplayNameLength - | DocumentVersionConstraints::FileExtensionFormat - | DocumentVersionConstraints::ProcessingCreditsMin - | DocumentVersionConstraints::ProcessingDurationMin - | DocumentVersionConstraints::ApiCallsMin - | DocumentVersionConstraints::FileSizeMin - | DocumentVersionConstraints::StoragePathNotEmpty - | DocumentVersionConstraints::StorageBucketNotEmpty - | DocumentVersionConstraints::FileHashSha256Length - | DocumentVersionConstraints::ResultsSize - | DocumentVersionConstraints::MetadataSize - | DocumentVersionConstraints::RetentionPeriod => ConstraintCategory::Validation, - - DocumentVersionConstraints::UpdatedAfterCreated - | DocumentVersionConstraints::DeletedAfterCreated - | DocumentVersionConstraints::DeletedAfterUpdated - | DocumentVersionConstraints::AutoDeleteAfterCreated => { - ConstraintCategory::Chronological - } - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentVersionConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentVersionConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/documents.rs b/crates/nvisy-postgres/src/types/constraint/documents.rs deleted file mode 100644 index 1ac5c85..0000000 --- a/crates/nvisy-postgres/src/types/constraint/documents.rs +++ /dev/null @@ -1,69 +0,0 @@ -//! Documents table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentConstraints { - // Document validation constraints - #[strum(serialize = "documents_display_name_length")] - DisplayNameLength, - #[strum(serialize = "documents_description_length_max")] - DescriptionLengthMax, - #[strum(serialize = "documents_tags_count_max")] - TagsCountMax, - - // Document metadata constraints - #[strum(serialize = "documents_metadata_size")] - MetadataSize, - - // Document chronological constraints - #[strum(serialize = "documents_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "documents_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "documents_deleted_after_updated")] - DeletedAfterUpdated, -} - -impl DocumentConstraints { - /// Creates a new [`DocumentConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentConstraints::DisplayNameLength - | DocumentConstraints::DescriptionLengthMax - | DocumentConstraints::TagsCountMax - | DocumentConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentConstraints::UpdatedAfterCreated - | DocumentConstraints::DeletedAfterCreated - | DocumentConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/file_annotations.rs b/crates/nvisy-postgres/src/types/constraint/file_annotations.rs new file mode 100644 index 0000000..a942af3 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/file_annotations.rs @@ -0,0 +1,64 @@ +//! File annotations table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// File annotations table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum FileAnnotationConstraints { + // Annotation content constraints + #[strum(serialize = "file_annotations_content_length")] + ContentLength, + + // Annotation metadata constraints + #[strum(serialize = "file_annotations_metadata_size")] + MetadataSize, + + // Annotation chronological constraints + #[strum(serialize = "file_annotations_updated_after_created")] + UpdatedAfterCreated, + #[strum(serialize = "file_annotations_deleted_after_created")] + DeletedAfterCreated, + #[strum(serialize = "file_annotations_deleted_after_updated")] + DeletedAfterUpdated, +} + +impl FileAnnotationConstraints { + /// Creates a new [`FileAnnotationConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + FileAnnotationConstraints::ContentLength | FileAnnotationConstraints::MetadataSize => { + ConstraintCategory::Validation + } + + FileAnnotationConstraints::UpdatedAfterCreated + | FileAnnotationConstraints::DeletedAfterCreated + | FileAnnotationConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: FileAnnotationConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for FileAnnotationConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/file_chunks.rs b/crates/nvisy-postgres/src/types/constraint/file_chunks.rs new file mode 100644 index 0000000..d75354f --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/file_chunks.rs @@ -0,0 +1,79 @@ +//! File chunks table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// File chunks table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum FileChunkConstraints { + // Chunk position constraints + #[strum(serialize = "file_chunks_chunk_index_min")] + ChunkIndexMin, + + // Content constraints + #[strum(serialize = "file_chunks_content_sha256_length")] + ContentSha256Length, + #[strum(serialize = "file_chunks_content_size_min")] + ContentSizeMin, + #[strum(serialize = "file_chunks_token_count_min")] + TokenCountMin, + + // Embedding constraints + #[strum(serialize = "file_chunks_embedding_model_format")] + EmbeddingModelFormat, + + // Metadata constraints + #[strum(serialize = "file_chunks_metadata_size")] + MetadataSize, + + // Chronological constraints + #[strum(serialize = "file_chunks_updated_after_created")] + UpdatedAfterCreated, + + // Uniqueness constraints + #[strum(serialize = "file_chunks_file_chunk_unique")] + FileChunkUnique, +} + +impl FileChunkConstraints { + /// Creates a new [`FileChunkConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + FileChunkConstraints::ChunkIndexMin + | FileChunkConstraints::ContentSha256Length + | FileChunkConstraints::ContentSizeMin + | FileChunkConstraints::TokenCountMin + | FileChunkConstraints::EmbeddingModelFormat + | FileChunkConstraints::MetadataSize => ConstraintCategory::Validation, + + FileChunkConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, + + FileChunkConstraints::FileChunkUnique => ConstraintCategory::Uniqueness, + } + } +} + +impl From for String { + #[inline] + fn from(val: FileChunkConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for FileChunkConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/files.rs b/crates/nvisy-postgres/src/types/constraint/files.rs new file mode 100644 index 0000000..07b9c03 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/files.rs @@ -0,0 +1,94 @@ +//! Files table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Files table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum FileConstraints { + // File identity validation constraints + #[strum(serialize = "files_display_name_length")] + DisplayNameLength, + #[strum(serialize = "files_original_filename_length")] + OriginalFilenameLength, + #[strum(serialize = "files_file_extension_format")] + FileExtensionFormat, + #[strum(serialize = "files_mime_type_format")] + MimeTypeFormat, + #[strum(serialize = "files_tags_count_max")] + TagsCountMax, + + // File storage constraints + #[strum(serialize = "files_file_size_min")] + FileSizeMin, + #[strum(serialize = "files_storage_path_not_empty")] + StoragePathNotEmpty, + #[strum(serialize = "files_storage_bucket_not_empty")] + StorageBucketNotEmpty, + #[strum(serialize = "files_file_hash_sha256_length")] + FileHashSha256Length, + + // File metadata constraints + #[strum(serialize = "files_metadata_size")] + MetadataSize, + + // File version constraints + #[strum(serialize = "files_version_number_min")] + VersionNumberMin, + + // File chronological constraints + #[strum(serialize = "files_updated_after_created")] + UpdatedAfterCreated, + #[strum(serialize = "files_deleted_after_created")] + DeletedAfterCreated, + #[strum(serialize = "files_deleted_after_updated")] + DeletedAfterUpdated, +} + +impl FileConstraints { + /// Creates a new [`FileConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + FileConstraints::DisplayNameLength + | FileConstraints::OriginalFilenameLength + | FileConstraints::FileExtensionFormat + | FileConstraints::MimeTypeFormat + | FileConstraints::TagsCountMax + | FileConstraints::FileSizeMin + | FileConstraints::StoragePathNotEmpty + | FileConstraints::StorageBucketNotEmpty + | FileConstraints::FileHashSha256Length + | FileConstraints::MetadataSize + | FileConstraints::VersionNumberMin => ConstraintCategory::Validation, + + FileConstraints::UpdatedAfterCreated + | FileConstraints::DeletedAfterCreated + | FileConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: FileConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for FileConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/mod.rs b/crates/nvisy-postgres/src/types/constraint/mod.rs index a53c34e..edc1b98 100644 --- a/crates/nvisy-postgres/src/types/constraint/mod.rs +++ b/crates/nvisy-postgres/src/types/constraint/mod.rs @@ -4,51 +4,49 @@ //! organized into logical groups for better maintainability. // Account-related constraint modules -pub mod account_action_tokens; -pub mod account_api_tokens; -pub mod account_notifications; -pub mod accounts; +mod account_action_tokens; +mod account_api_tokens; +mod account_notifications; +mod accounts; // Workspace-related constraint modules -pub mod workspace_activities; -pub mod workspace_integrations; -pub mod workspace_invites; -pub mod workspace_members; -pub mod workspace_webhooks; -pub mod workspaces; - -// Document-related constraint modules -pub mod document_annotations; -pub mod document_chunks; -pub mod document_comments; -pub mod document_files; -pub mod document_versions; -pub mod documents; - -// Workspace run constraint modules -pub mod workspace_integration_runs; +mod workspace_activities; +mod workspace_integration_runs; +mod workspace_integrations; +mod workspace_invites; +mod workspace_members; +mod workspace_webhooks; +mod workspaces; + +// File-related constraint modules +mod file_annotations; +mod file_chunks; +mod files; + +// Pipeline-related constraint modules +mod pipeline_runs; +mod pipelines; use std::fmt; -pub use account_action_tokens::AccountActionTokenConstraints; -pub use account_api_tokens::AccountApiTokenConstraints; -// Re-export all constraint types for convenience -pub use account_notifications::AccountNotificationConstraints; -pub use accounts::AccountConstraints; -pub use document_annotations::DocumentAnnotationConstraints; -pub use document_chunks::DocumentChunkConstraints; -pub use document_comments::DocumentCommentConstraints; -pub use document_files::DocumentFileConstraints; -pub use document_versions::DocumentVersionConstraints; -pub use documents::DocumentConstraints; use serde::{Deserialize, Serialize}; -pub use workspace_activities::WorkspaceActivitiesConstraints; -pub use workspace_integration_runs::WorkspaceIntegrationRunConstraints; -pub use workspace_integrations::WorkspaceIntegrationConstraints; -pub use workspace_invites::WorkspaceInviteConstraints; -pub use workspace_members::WorkspaceMemberConstraints; -pub use workspace_webhooks::WorkspaceWebhookConstraints; -pub use workspaces::WorkspaceConstraints; + +pub use self::account_action_tokens::AccountActionTokenConstraints; +pub use self::account_api_tokens::AccountApiTokenConstraints; +pub use self::account_notifications::AccountNotificationConstraints; +pub use self::accounts::AccountConstraints; +pub use self::file_annotations::FileAnnotationConstraints; +pub use self::file_chunks::FileChunkConstraints; +pub use self::files::FileConstraints; +pub use self::pipeline_runs::PipelineRunConstraints; +pub use self::pipelines::PipelineConstraints; +pub use self::workspace_activities::WorkspaceActivitiesConstraints; +pub use self::workspace_integration_runs::WorkspaceIntegrationRunConstraints; +pub use self::workspace_integrations::WorkspaceIntegrationConstraints; +pub use self::workspace_invites::WorkspaceInviteConstraints; +pub use self::workspace_members::WorkspaceMemberConstraints; +pub use self::workspace_webhooks::WorkspaceWebhookConstraints; +pub use self::workspaces::WorkspaceConstraints; /// Unified constraint violation enum that can represent any database constraint. /// @@ -73,13 +71,14 @@ pub enum ConstraintViolation { WorkspaceWebhook(WorkspaceWebhookConstraints), WorkspaceIntegrationRun(WorkspaceIntegrationRunConstraints), - // Document-related constraints - Document(DocumentConstraints), - DocumentAnnotation(DocumentAnnotationConstraints), - DocumentChunk(DocumentChunkConstraints), - DocumentComment(DocumentCommentConstraints), - DocumentFile(DocumentFileConstraints), - DocumentVersion(DocumentVersionConstraints), + // File-related constraints + File(FileConstraints), + FileAnnotation(FileAnnotationConstraints), + FileChunk(FileChunkConstraints), + + // Pipeline-related constraints + Pipeline(PipelineConstraints), + PipelineRun(PipelineRunConstraints), } /// Categories of database constraint violations. @@ -148,14 +147,13 @@ impl ConstraintViolation { WorkspaceWebhookConstraints::new => WorkspaceWebhook, WorkspaceIntegrationRunConstraints::new => WorkspaceIntegrationRun, }, - "documents" => try_parse!(DocumentConstraints::new => Document), - "document" => try_parse! { - DocumentAnnotationConstraints::new => DocumentAnnotation, - DocumentChunkConstraints::new => DocumentChunk, - DocumentCommentConstraints::new => DocumentComment, - DocumentFileConstraints::new => DocumentFile, - DocumentVersionConstraints::new => DocumentVersion, + "files" => try_parse!(FileConstraints::new => File), + "file" => try_parse! { + FileAnnotationConstraints::new => FileAnnotation, + FileChunkConstraints::new => FileChunk, }, + "pipelines" => try_parse!(PipelineConstraints::new => Pipeline), + "pipeline" => try_parse!(PipelineRunConstraints::new => PipelineRun), _ => None, } } @@ -180,13 +178,14 @@ impl ConstraintViolation { ConstraintViolation::WorkspaceWebhook(_) => "workspace_webhooks", ConstraintViolation::WorkspaceIntegrationRun(_) => "workspace_integration_runs", - // Document-related tables - ConstraintViolation::Document(_) => "documents", - ConstraintViolation::DocumentAnnotation(_) => "document_annotations", - ConstraintViolation::DocumentChunk(_) => "document_chunks", - ConstraintViolation::DocumentComment(_) => "document_comments", - ConstraintViolation::DocumentFile(_) => "document_files", - ConstraintViolation::DocumentVersion(_) => "document_versions", + // File-related tables + ConstraintViolation::File(_) => "files", + ConstraintViolation::FileAnnotation(_) => "file_annotations", + ConstraintViolation::FileChunk(_) => "file_chunks", + + // Pipeline-related tables + ConstraintViolation::Pipeline(_) => "pipelines", + ConstraintViolation::PipelineRun(_) => "pipeline_runs", } } @@ -194,7 +193,6 @@ impl ConstraintViolation { /// /// This groups constraints by their business domain for higher-level categorization. pub fn functional_area(&self) -> &'static str { - // TODO: Implement functional area enumeration. match self { ConstraintViolation::Account(_) | ConstraintViolation::AccountNotification(_) @@ -209,12 +207,11 @@ impl ConstraintViolation { | ConstraintViolation::WorkspaceWebhook(_) | ConstraintViolation::WorkspaceIntegrationRun(_) => "workspaces", - ConstraintViolation::Document(_) - | ConstraintViolation::DocumentAnnotation(_) - | ConstraintViolation::DocumentChunk(_) - | ConstraintViolation::DocumentComment(_) - | ConstraintViolation::DocumentFile(_) - | ConstraintViolation::DocumentVersion(_) => "documents", + ConstraintViolation::File(_) + | ConstraintViolation::FileAnnotation(_) + | ConstraintViolation::FileChunk(_) => "files", + + ConstraintViolation::Pipeline(_) | ConstraintViolation::PipelineRun(_) => "pipelines", } } @@ -236,12 +233,12 @@ impl ConstraintViolation { ConstraintViolation::WorkspaceWebhook(c) => c.categorize(), ConstraintViolation::WorkspaceIntegrationRun(c) => c.categorize(), - ConstraintViolation::Document(c) => c.categorize(), - ConstraintViolation::DocumentAnnotation(c) => c.categorize(), - ConstraintViolation::DocumentChunk(c) => c.categorize(), - ConstraintViolation::DocumentComment(c) => c.categorize(), - ConstraintViolation::DocumentFile(c) => c.categorize(), - ConstraintViolation::DocumentVersion(c) => c.categorize(), + ConstraintViolation::File(c) => c.categorize(), + ConstraintViolation::FileAnnotation(c) => c.categorize(), + ConstraintViolation::FileChunk(c) => c.categorize(), + + ConstraintViolation::Pipeline(c) => c.categorize(), + ConstraintViolation::PipelineRun(c) => c.categorize(), } } @@ -268,12 +265,12 @@ impl fmt::Display for ConstraintViolation { ConstraintViolation::WorkspaceWebhook(c) => write!(f, "{}", c), ConstraintViolation::WorkspaceIntegrationRun(c) => write!(f, "{}", c), - ConstraintViolation::Document(c) => write!(f, "{}", c), - ConstraintViolation::DocumentAnnotation(c) => write!(f, "{}", c), - ConstraintViolation::DocumentChunk(c) => write!(f, "{}", c), - ConstraintViolation::DocumentComment(c) => write!(f, "{}", c), - ConstraintViolation::DocumentFile(c) => write!(f, "{}", c), - ConstraintViolation::DocumentVersion(c) => write!(f, "{}", c), + ConstraintViolation::File(c) => write!(f, "{}", c), + ConstraintViolation::FileAnnotation(c) => write!(f, "{}", c), + ConstraintViolation::FileChunk(c) => write!(f, "{}", c), + + ConstraintViolation::Pipeline(c) => write!(f, "{}", c), + ConstraintViolation::PipelineRun(c) => write!(f, "{}", c), } } } @@ -307,10 +304,8 @@ mod tests { ); assert_eq!( - ConstraintViolation::new("document_versions_version_number_min"), - Some(ConstraintViolation::DocumentVersion( - DocumentVersionConstraints::VersionNumberMin - )) + ConstraintViolation::new("files_version_number_min"), + Some(ConstraintViolation::File(FileConstraints::VersionNumberMin)) ); assert_eq!(ConstraintViolation::new("unknown_constraint"), None); @@ -324,9 +319,8 @@ mod tests { let violation = ConstraintViolation::Workspace(WorkspaceConstraints::DisplayNameLength); assert_eq!(violation.table_name(), "workspaces"); - let violation = - ConstraintViolation::DocumentFile(DocumentFileConstraints::StoragePathNotEmpty); - assert_eq!(violation.table_name(), "document_files"); + let violation = ConstraintViolation::File(FileConstraints::StoragePathNotEmpty); + assert_eq!(violation.table_name(), "files"); } #[test] @@ -342,9 +336,8 @@ mod tests { ConstraintViolation::WorkspaceMember(WorkspaceMemberConstraints::ShowOrderRange); assert_eq!(violation.functional_area(), "workspaces"); - let violation = - ConstraintViolation::DocumentVersion(DocumentVersionConstraints::VersionNumberMin); - assert_eq!(violation.functional_area(), "documents"); + let violation = ConstraintViolation::File(FileConstraints::VersionNumberMin); + assert_eq!(violation.functional_area(), "files"); } #[test] diff --git a/crates/nvisy-postgres/src/types/constraint/pipeline_runs.rs b/crates/nvisy-postgres/src/types/constraint/pipeline_runs.rs new file mode 100644 index 0000000..4d92803 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/pipeline_runs.rs @@ -0,0 +1,73 @@ +//! Pipeline runs table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Pipeline runs table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum PipelineRunConstraints { + // Pipeline run input/output config constraints + #[strum(serialize = "pipeline_runs_input_config_size")] + InputConfigSize, + #[strum(serialize = "pipeline_runs_output_config_size")] + OutputConfigSize, + + // Pipeline run definition snapshot constraints + #[strum(serialize = "pipeline_runs_definition_snapshot_size")] + DefinitionSnapshotSize, + + // Pipeline run error constraints + #[strum(serialize = "pipeline_runs_error_size")] + ErrorSize, + + // Pipeline run metrics constraints + #[strum(serialize = "pipeline_runs_metrics_size")] + MetricsSize, + + // Pipeline run chronological constraints + #[strum(serialize = "pipeline_runs_started_after_created")] + StartedAfterCreated, + #[strum(serialize = "pipeline_runs_completed_after_started")] + CompletedAfterStarted, +} + +impl PipelineRunConstraints { + /// Creates a new [`PipelineRunConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + PipelineRunConstraints::InputConfigSize + | PipelineRunConstraints::OutputConfigSize + | PipelineRunConstraints::DefinitionSnapshotSize + | PipelineRunConstraints::ErrorSize + | PipelineRunConstraints::MetricsSize => ConstraintCategory::Validation, + + PipelineRunConstraints::StartedAfterCreated + | PipelineRunConstraints::CompletedAfterStarted => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: PipelineRunConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for PipelineRunConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/pipelines.rs b/crates/nvisy-postgres/src/types/constraint/pipelines.rs new file mode 100644 index 0000000..a21f9de --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/pipelines.rs @@ -0,0 +1,71 @@ +//! Pipelines table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Pipelines table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum PipelineConstraints { + // Pipeline name validation constraints + #[strum(serialize = "pipelines_name_length")] + NameLength, + + // Pipeline description validation constraints + #[strum(serialize = "pipelines_description_length")] + DescriptionLength, + + // Pipeline definition constraints + #[strum(serialize = "pipelines_definition_size")] + DefinitionSize, + + // Pipeline metadata constraints + #[strum(serialize = "pipelines_metadata_size")] + MetadataSize, + + // Pipeline chronological constraints + #[strum(serialize = "pipelines_updated_after_created")] + UpdatedAfterCreated, + #[strum(serialize = "pipelines_deleted_after_created")] + DeletedAfterCreated, +} + +impl PipelineConstraints { + /// Creates a new [`PipelineConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + PipelineConstraints::NameLength + | PipelineConstraints::DescriptionLength + | PipelineConstraints::DefinitionSize + | PipelineConstraints::MetadataSize => ConstraintCategory::Validation, + + PipelineConstraints::UpdatedAfterCreated | PipelineConstraints::DeletedAfterCreated => { + ConstraintCategory::Chronological + } + } + } +} + +impl From for String { + #[inline] + fn from(val: PipelineConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for PipelineConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/enums/content_segmentation.rs b/crates/nvisy-postgres/src/types/enums/content_segmentation.rs deleted file mode 100644 index 7019d0e..0000000 --- a/crates/nvisy-postgres/src/types/enums/content_segmentation.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Content segmentation enumeration for knowledge extraction. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Defines the content segmentation strategy for document processing. -/// -/// This enumeration corresponds to the `CONTENT_SEGMENTATION` PostgreSQL enum and is used -/// to specify how document content should be segmented for knowledge extraction. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::ContentSegmentation"] -pub enum ContentSegmentation { - /// No segmentation applied - process content as a whole - #[db_rename = "none"] - #[serde(rename = "none")] - None, - - /// Semantic-based segmentation - split by meaning and context - #[db_rename = "semantic"] - #[serde(rename = "semantic")] - #[default] - Semantic, - - /// Fixed-size segmentation - split by character or token count - #[db_rename = "fixed"] - #[serde(rename = "fixed")] - Fixed, -} - -impl ContentSegmentation { - /// Returns whether segmentation is disabled. - #[inline] - pub fn is_disabled(self) -> bool { - matches!(self, ContentSegmentation::None) - } - - /// Returns whether this strategy uses semantic analysis. - #[inline] - pub fn is_semantic(self) -> bool { - matches!(self, ContentSegmentation::Semantic) - } - - /// Returns whether this strategy uses fixed-size chunks. - #[inline] - pub fn is_fixed(self) -> bool { - matches!(self, ContentSegmentation::Fixed) - } - - /// Returns whether this strategy preserves context between segments. - #[inline] - pub fn preserves_context(self) -> bool { - self.is_semantic() - } -} diff --git a/crates/nvisy-postgres/src/types/enums/file_source.rs b/crates/nvisy-postgres/src/types/enums/file_source.rs new file mode 100644 index 0000000..3c33723 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/file_source.rs @@ -0,0 +1,66 @@ +//! File source enumeration indicating how a file was created. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines how a file was created in the system. +/// +/// This enumeration corresponds to the `FILE_SOURCE` PostgreSQL enum and is used +/// to track the origin of files - whether they were uploaded by users, imported +/// from external sources, or generated by the system. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::FileSource"] +pub enum FileSource { + /// File was manually uploaded by a user + #[db_rename = "uploaded"] + #[serde(rename = "uploaded")] + #[default] + Uploaded, + + /// File was imported from an external source + #[db_rename = "imported"] + #[serde(rename = "imported")] + Imported, + + /// File was generated by the system + #[db_rename = "generated"] + #[serde(rename = "generated")] + Generated, +} + +impl FileSource { + /// Returns whether the file was uploaded by a user. + #[inline] + pub fn is_uploaded(self) -> bool { + matches!(self, FileSource::Uploaded) + } + + /// Returns whether the file was imported from an external source. + #[inline] + pub fn is_imported(self) -> bool { + matches!(self, FileSource::Imported) + } + + /// Returns whether the file was generated by the system. + #[inline] + pub fn is_generated(self) -> bool { + matches!(self, FileSource::Generated) + } + + /// Returns whether the file was created by a user action (uploaded or imported). + #[inline] + pub fn is_user_created(self) -> bool { + matches!(self, FileSource::Uploaded | FileSource::Imported) + } + + /// Returns whether the file was created automatically (generated). + #[inline] + pub fn is_system_created(self) -> bool { + matches!(self, FileSource::Generated) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/mod.rs b/crates/nvisy-postgres/src/types/enums/mod.rs index 2308fd9..b4d867e 100644 --- a/crates/nvisy-postgres/src/types/enums/mod.rs +++ b/crates/nvisy-postgres/src/types/enums/mod.rs @@ -20,23 +20,27 @@ pub mod webhook_status; pub mod webhook_type; pub mod workspace_role; -// Document-related enumerations +// File-related enumerations pub mod annotation_type; -pub mod content_segmentation; -pub mod processing_status; -pub mod require_mode; +pub mod file_source; + +// Pipeline-related enumerations +pub mod pipeline_run_status; +pub mod pipeline_status; +pub mod pipeline_trigger_type; pub use action_token_type::ActionTokenType; pub use activity_type::{ActivityCategory, ActivityType}; pub use annotation_type::AnnotationType; pub use api_token_type::ApiTokenType; -pub use content_segmentation::ContentSegmentation; +pub use file_source::FileSource; pub use integration_status::IntegrationStatus; pub use integration_type::IntegrationType; pub use invite_status::InviteStatus; pub use notification_event::NotificationEvent; -pub use processing_status::ProcessingStatus; -pub use require_mode::RequireMode; +pub use pipeline_run_status::PipelineRunStatus; +pub use pipeline_status::PipelineStatus; +pub use pipeline_trigger_type::PipelineTriggerType; pub use run_type::RunType; pub use webhook_event::WebhookEvent; pub use webhook_status::WebhookStatus; diff --git a/crates/nvisy-postgres/src/types/enums/pipeline_run_status.rs b/crates/nvisy-postgres/src/types/enums/pipeline_run_status.rs new file mode 100644 index 0000000..06aaee2 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/pipeline_run_status.rs @@ -0,0 +1,105 @@ +//! Pipeline run status enumeration indicating the execution state of a pipeline run. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines the execution status of a pipeline run. +/// +/// This enumeration corresponds to the `PIPELINE_RUN_STATUS` PostgreSQL enum and is used +/// to track the current state of a pipeline execution. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::PipelineRunStatus"] +pub enum PipelineRunStatus { + /// Run is waiting to start + #[db_rename = "queued"] + #[serde(rename = "queued")] + #[default] + Queued, + + /// Run is in progress + #[db_rename = "running"] + #[serde(rename = "running")] + Running, + + /// Run finished successfully + #[db_rename = "completed"] + #[serde(rename = "completed")] + Completed, + + /// Run failed with error + #[db_rename = "failed"] + #[serde(rename = "failed")] + Failed, + + /// Run was cancelled by user + #[db_rename = "cancelled"] + #[serde(rename = "cancelled")] + Cancelled, +} + +impl PipelineRunStatus { + /// Returns whether the run is queued. + #[inline] + pub fn is_queued(self) -> bool { + matches!(self, PipelineRunStatus::Queued) + } + + /// Returns whether the run is currently running. + #[inline] + pub fn is_running(self) -> bool { + matches!(self, PipelineRunStatus::Running) + } + + /// Returns whether the run completed successfully. + #[inline] + pub fn is_completed(self) -> bool { + matches!(self, PipelineRunStatus::Completed) + } + + /// Returns whether the run failed. + #[inline] + pub fn is_failed(self) -> bool { + matches!(self, PipelineRunStatus::Failed) + } + + /// Returns whether the run was cancelled. + #[inline] + pub fn is_cancelled(self) -> bool { + matches!(self, PipelineRunStatus::Cancelled) + } + + /// Returns whether the run is still active (queued or running). + #[inline] + pub fn is_active(self) -> bool { + matches!(self, PipelineRunStatus::Queued | PipelineRunStatus::Running) + } + + /// Returns whether the run has finished (completed, failed, or cancelled). + #[inline] + pub fn is_finished(self) -> bool { + matches!( + self, + PipelineRunStatus::Completed | PipelineRunStatus::Failed | PipelineRunStatus::Cancelled + ) + } + + /// Returns whether the run finished with a terminal error state. + #[inline] + pub fn is_terminal_error(self) -> bool { + matches!(self, PipelineRunStatus::Failed) + } + + /// Returns whether the run can be retried. + #[inline] + pub fn is_retriable(self) -> bool { + matches!( + self, + PipelineRunStatus::Failed | PipelineRunStatus::Cancelled + ) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/pipeline_status.rs b/crates/nvisy-postgres/src/types/enums/pipeline_status.rs new file mode 100644 index 0000000..d41d586 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/pipeline_status.rs @@ -0,0 +1,53 @@ +//! Pipeline status enumeration indicating the lifecycle state of a pipeline. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines the lifecycle status of a pipeline definition. +/// +/// This enumeration corresponds to the `PIPELINE_STATUS` PostgreSQL enum and is used +/// to track whether a pipeline is being configured, enabled and ready to run, or disabled. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::PipelineStatus"] +pub enum PipelineStatus { + /// Pipeline is being configured + #[db_rename = "draft"] + #[serde(rename = "draft")] + #[default] + Draft, + + /// Pipeline is ready to run + #[db_rename = "enabled"] + #[serde(rename = "enabled")] + Enabled, + + /// Pipeline is disabled + #[db_rename = "disabled"] + #[serde(rename = "disabled")] + Disabled, +} + +impl PipelineStatus { + /// Returns whether the pipeline is in draft status. + #[inline] + pub fn is_draft(self) -> bool { + matches!(self, PipelineStatus::Draft) + } + + /// Returns whether the pipeline is enabled. + #[inline] + pub fn is_enabled(self) -> bool { + matches!(self, PipelineStatus::Enabled) + } + + /// Returns whether the pipeline is disabled. + #[inline] + pub fn is_disabled(self) -> bool { + matches!(self, PipelineStatus::Disabled) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/pipeline_trigger_type.rs b/crates/nvisy-postgres/src/types/enums/pipeline_trigger_type.rs new file mode 100644 index 0000000..b071977 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/pipeline_trigger_type.rs @@ -0,0 +1,68 @@ +//! Pipeline trigger type enumeration indicating how a pipeline run was initiated. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines how a pipeline run was initiated. +/// +/// This enumeration corresponds to the `PIPELINE_TRIGGER_TYPE` PostgreSQL enum and is used +/// to track whether a run was manually triggered, triggered by a source connector, or scheduled. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::PipelineTriggerType"] +pub enum PipelineTriggerType { + /// Manually triggered by user + #[db_rename = "manual"] + #[serde(rename = "manual")] + #[default] + Manual, + + /// Triggered by source connector (upload, webhook, etc.) + #[db_rename = "source"] + #[serde(rename = "source")] + Source, + + /// Triggered by schedule + #[db_rename = "scheduled"] + #[serde(rename = "scheduled")] + Scheduled, +} + +impl PipelineTriggerType { + /// Returns whether the run was manually triggered. + #[inline] + pub fn is_manual(self) -> bool { + matches!(self, PipelineTriggerType::Manual) + } + + /// Returns whether the run was triggered by a source connector. + #[inline] + pub fn is_source(self) -> bool { + matches!(self, PipelineTriggerType::Source) + } + + /// Returns whether the run was scheduled. + #[inline] + pub fn is_scheduled(self) -> bool { + matches!(self, PipelineTriggerType::Scheduled) + } + + /// Returns whether the run was triggered automatically (source or scheduled). + #[inline] + pub fn is_automatic(self) -> bool { + matches!( + self, + PipelineTriggerType::Source | PipelineTriggerType::Scheduled + ) + } + + /// Returns whether the run was triggered by user action. + #[inline] + pub fn is_user_initiated(self) -> bool { + matches!(self, PipelineTriggerType::Manual) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/processing_status.rs b/crates/nvisy-postgres/src/types/enums/processing_status.rs deleted file mode 100644 index 14fd140..0000000 --- a/crates/nvisy-postgres/src/types/enums/processing_status.rs +++ /dev/null @@ -1,111 +0,0 @@ -//! Processing status enumeration for document and file processing operations. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Defines the current processing status of a file in the processing pipeline. -/// -/// This enumeration corresponds to the `PROCESSING_STATUS` PostgreSQL enum and is used -/// to track the state of files as they progress through various processing stages -/// such as text extraction, OCR, transcription, and analysis. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::ProcessingStatus"] -pub enum ProcessingStatus { - /// File is queued for processing and waiting to be picked up - #[db_rename = "pending"] - #[serde(rename = "pending")] - #[default] - Pending, - - /// File is currently being processed by the system - #[db_rename = "processing"] - #[serde(rename = "processing")] - Processing, - - /// Processing completed, file is ready for use - #[db_rename = "ready"] - #[serde(rename = "ready")] - Ready, - - /// Processing was canceled by user or system - #[db_rename = "canceled"] - #[serde(rename = "canceled")] - Canceled, -} - -impl ProcessingStatus { - /// Returns whether the file is in a state that allows processing. - #[inline] - pub fn can_be_processed(self) -> bool { - matches!(self, ProcessingStatus::Pending) - } - - /// Returns whether the file is currently being processed. - #[inline] - pub fn is_processing(self) -> bool { - matches!(self, ProcessingStatus::Processing) - } - - /// Returns whether the processing is in a final state. - #[inline] - pub fn is_final(self) -> bool { - matches!(self, ProcessingStatus::Ready | ProcessingStatus::Canceled) - } - - /// Returns whether the file is ready for use. - #[inline] - pub fn is_ready(self) -> bool { - matches!(self, ProcessingStatus::Ready) - } - - /// Returns whether the processing was canceled. - #[inline] - pub fn is_canceled(self) -> bool { - matches!(self, ProcessingStatus::Canceled) - } - - /// Returns whether the processing is pending (waiting to start). - #[inline] - pub fn is_pending(self) -> bool { - matches!(self, ProcessingStatus::Pending) - } - - /// Returns whether the processing can be retried. - #[inline] - pub fn can_be_retried(self) -> bool { - matches!(self, ProcessingStatus::Ready | ProcessingStatus::Canceled) - } - - /// Returns whether the processing can be canceled. - #[inline] - pub fn can_be_canceled(self) -> bool { - matches!( - self, - ProcessingStatus::Pending | ProcessingStatus::Processing - ) - } - - /// Returns whether this status represents an active processing operation. - #[inline] - pub fn is_active(self) -> bool { - matches!( - self, - ProcessingStatus::Pending | ProcessingStatus::Processing - ) - } - - /// Returns processing statuses that are considered active (not final). - pub fn active_statuses() -> &'static [ProcessingStatus] { - &[ProcessingStatus::Pending, ProcessingStatus::Processing] - } - - /// Returns processing statuses that represent final states. - pub fn final_statuses() -> &'static [ProcessingStatus] { - &[ProcessingStatus::Ready, ProcessingStatus::Canceled] - } -} diff --git a/crates/nvisy-postgres/src/types/enums/require_mode.rs b/crates/nvisy-postgres/src/types/enums/require_mode.rs deleted file mode 100644 index e62d09b..0000000 --- a/crates/nvisy-postgres/src/types/enums/require_mode.rs +++ /dev/null @@ -1,129 +0,0 @@ -//! Require mode enumeration for file processing requirements. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Defines the processing requirements for input files. -/// -/// This enumeration corresponds to the `REQUIRE_MODE` PostgreSQL enum and is used -/// to specify what type of processing is needed to extract content from uploaded files. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::RequireMode"] -pub enum RequireMode { - /// No special processing required. - #[db_rename = "none"] - #[serde(rename = "none")] - #[default] - None, - - /// Requires Optical Character Recognition (OCR). - #[db_rename = "optical"] - #[serde(rename = "optical")] - Optical, - - /// Requires Vision Language Model (VLM). - #[db_rename = "language"] - #[serde(rename = "language")] - Language, - - /// Requires both OCR and VLM processing. - #[db_rename = "both"] - #[serde(rename = "both")] - Both, -} - -impl RequireMode { - /// Returns whether this mode requires OCR processing. - #[inline] - pub fn requires_ocr(self) -> bool { - matches!(self, RequireMode::Optical | RequireMode::Both) - } - - /// Returns whether this mode requires VLM processing. - #[inline] - pub fn requires_vlm(self) -> bool { - matches!(self, RequireMode::Language | RequireMode::Both) - } - - /// Returns whether this mode requires any special processing. - #[inline] - pub fn requires_processing(self) -> bool { - !matches!(self, RequireMode::None) - } - - /// Returns whether this mode involves multiple processing types. - #[inline] - pub fn is_complex(self) -> bool { - matches!(self, RequireMode::Both) - } - - /// Returns whether this mode is ready for immediate analysis. - #[inline] - pub fn is_ready_for_analysis(self) -> bool { - matches!(self, RequireMode::None) - } - - /// Returns whether this mode requires external processing services. - #[inline] - pub fn requires_external_services(self) -> bool { - matches!( - self, - RequireMode::Optical | RequireMode::Language | RequireMode::Both - ) - } - - /// Returns whether this mode typically has higher processing costs. - #[inline] - pub fn is_expensive_to_process(self) -> bool { - matches!( - self, - RequireMode::Optical | RequireMode::Language | RequireMode::Both - ) - } - - /// Returns the estimated processing complexity (1 = simple, 5 = very complex). - #[inline] - pub fn processing_complexity(self) -> u8 { - match self { - RequireMode::None => 1, - RequireMode::Optical => 3, - RequireMode::Language => 4, - RequireMode::Both => 5, - } - } - - /// Returns the estimated processing time factor (multiplier for base time). - #[inline] - pub fn processing_time_factor(self) -> f32 { - match self { - RequireMode::None => 1.0, - RequireMode::Optical => 3.0, - RequireMode::Language => 5.0, - RequireMode::Both => 8.0, - } - } - - /// Returns the types of processing that this mode typically involves. - pub fn processing_types(self) -> &'static [&'static str] { - match self { - RequireMode::None => &[], - RequireMode::Optical => &["optical_character_recognition"], - RequireMode::Language => &["vision_language_model"], - RequireMode::Both => &["optical_character_recognition", "vision_language_model"], - } - } - - /// Returns require modes that need external processing. - pub fn external_processing_modes() -> &'static [RequireMode] { - &[ - RequireMode::Optical, - RequireMode::Language, - RequireMode::Both, - ] - } -} diff --git a/crates/nvisy-postgres/src/types/enums/webhook_event.rs b/crates/nvisy-postgres/src/types/enums/webhook_event.rs index cb23b9a..ec684a4 100644 --- a/crates/nvisy-postgres/src/types/enums/webhook_event.rs +++ b/crates/nvisy-postgres/src/types/enums/webhook_event.rs @@ -152,4 +152,26 @@ impl WebhookEvent { | WebhookEvent::IntegrationDesynced => "integration", } } + + /// Returns the event as a subject string for NATS routing. + /// + /// Format: `{category}.{action}` (e.g., "file.created", "member.deleted") + pub fn as_subject(&self) -> &'static str { + match self { + WebhookEvent::DocumentCreated => "document.created", + WebhookEvent::DocumentUpdated => "document.updated", + WebhookEvent::DocumentDeleted => "document.deleted", + WebhookEvent::FileCreated => "file.created", + WebhookEvent::FileUpdated => "file.updated", + WebhookEvent::FileDeleted => "file.deleted", + WebhookEvent::MemberAdded => "member.added", + WebhookEvent::MemberDeleted => "member.deleted", + WebhookEvent::MemberUpdated => "member.updated", + WebhookEvent::IntegrationCreated => "integration.created", + WebhookEvent::IntegrationUpdated => "integration.updated", + WebhookEvent::IntegrationDeleted => "integration.deleted", + WebhookEvent::IntegrationSynced => "integration.synced", + WebhookEvent::IntegrationDesynced => "integration.desynced", + } + } } diff --git a/crates/nvisy-postgres/src/types/mod.rs b/crates/nvisy-postgres/src/types/mod.rs index 77d614f..bd40af2 100644 --- a/crates/nvisy-postgres/src/types/mod.rs +++ b/crates/nvisy-postgres/src/types/mod.rs @@ -1,6 +1,6 @@ //! Contains constraints, enumerations and other custom types. -pub mod constants; +mod constants; mod constraint; mod enums; mod filtering; @@ -8,19 +8,23 @@ mod pagination; mod sorting; mod utilities; +pub use constants::{ + DEFAULT_RETENTION_DAYS, EDIT_GRACE_PERIOD_SECONDS, EMBEDDING_DIMENSIONS, + EXPIRY_WARNING_MINUTES, LONG_LIVED_THRESHOLD_HOURS, RECENTLY_SENT_HOURS, + RECENTLY_UPLOADED_HOURS, +}; pub use constraint::{ AccountActionTokenConstraints, AccountApiTokenConstraints, AccountConstraints, AccountNotificationConstraints, ConstraintCategory, ConstraintViolation, - DocumentAnnotationConstraints, DocumentChunkConstraints, DocumentCommentConstraints, - DocumentConstraints, DocumentFileConstraints, DocumentVersionConstraints, - WorkspaceActivitiesConstraints, WorkspaceConstraints, WorkspaceIntegrationConstraints, - WorkspaceIntegrationRunConstraints, WorkspaceInviteConstraints, WorkspaceMemberConstraints, - WorkspaceWebhookConstraints, + FileAnnotationConstraints, FileChunkConstraints, FileConstraints, PipelineConstraints, + PipelineRunConstraints, WorkspaceActivitiesConstraints, WorkspaceConstraints, + WorkspaceIntegrationConstraints, WorkspaceIntegrationRunConstraints, + WorkspaceInviteConstraints, WorkspaceMemberConstraints, WorkspaceWebhookConstraints, }; pub use enums::{ - ActionTokenType, ActivityCategory, ActivityType, AnnotationType, ApiTokenType, - ContentSegmentation, IntegrationStatus, IntegrationType, InviteStatus, NotificationEvent, - ProcessingStatus, RequireMode, RunType, WebhookEvent, WebhookStatus, WebhookType, + ActionTokenType, ActivityCategory, ActivityType, AnnotationType, ApiTokenType, FileSource, + IntegrationStatus, IntegrationType, InviteStatus, NotificationEvent, PipelineRunStatus, + PipelineStatus, PipelineTriggerType, RunType, WebhookEvent, WebhookStatus, WebhookType, WorkspaceRole, }; pub use filtering::{FileFilter, FileFormat, InviteFilter, MemberFilter}; diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml index dc85011..fe80dd4 100644 --- a/crates/nvisy-rig/Cargo.toml +++ b/crates/nvisy-rig/Cargo.toml @@ -1,29 +1,35 @@ [package] name = "nvisy-rig" +description = "Rig AI framework integration for nvisy" +readme = "./README.md" +keywords = ["rig", "llm", "ai", "client", "rag"] +categories = ["api-bindings", "web-programming::http-client"] + version = { workspace = true } -edition = { workspace = true } rust-version = { workspace = true } +edition = { workspace = true } license = { workspace = true } +publish = { workspace = true } + authors = { workspace = true } repository = { workspace = true } homepage = { workspace = true } documentation = { workspace = true } -publish = { workspace = true } - -description = "Rig AI framework integration for nvisy" -keywords = ["rig", "llm", "ai", "client", "rag"] -categories = ["api-bindings", "web-programming::http-client"] [features] -## Default feature set (none for minimal dependencies) -default = [] +## Default feature set includes Ollama for local development +default = ["ollama"] ## CLI configuration support: enables clap derives for config types ## This allows config types to be used with command-line argument parsing config = ["dep:clap"] +## Ollama local model support +ollama = [] + [dependencies] # Internal crates +nvisy-core = { workspace = true } nvisy-nats = { workspace = true } nvisy-postgres = { workspace = true } @@ -39,12 +45,17 @@ async-trait = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +# Schema generation & validation +schemars = { workspace = true } +jsonschema = { workspace = true } + # Error handling thiserror = { workspace = true } # Derive macros derive_builder = { workspace = true } derive_more = { workspace = true } +strum = { workspace = true } # Observability tracing = { workspace = true } diff --git a/crates/nvisy-rig/README.md b/crates/nvisy-rig/README.md index ab5e2cc..2c194d5 100644 --- a/crates/nvisy-rig/README.md +++ b/crates/nvisy-rig/README.md @@ -1,5 +1,7 @@ # nvisy-rig +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Agent-centric AI framework for document processing, built on [Rig](https://github.com/0xPlaygrounds/rig). ## Overview @@ -44,6 +46,17 @@ let service = RigService::new(providers, tools, sessions); - `edit` - Proposed edit operations - `service` - High-level service API +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + ## License -MIT +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-rig/src/agent/memory/history.rs b/crates/nvisy-rig/src/agent/memory/history.rs new file mode 100644 index 0000000..25169a7 --- /dev/null +++ b/crates/nvisy-rig/src/agent/memory/history.rs @@ -0,0 +1,204 @@ +//! Chat history with automatic compaction strategies. +//! +//! Provides conversation history management with configurable compaction +//! strategies to handle context window limits. + +use rig::message::Message; + +/// Strategy for compacting chat history when capacity is exceeded. +#[derive(Debug, Clone, Default)] +pub enum CompactionStrategy { + /// Truncate oldest messages, keeping the most recent ones. + #[default] + Truncate, + + /// Summarize older messages into a context string. + Summarize { + /// Summary of compacted messages. + summary: String, + }, +} + +/// Chat history with automatic compaction. +/// +/// Manages conversation history with a configurable capacity limit and +/// compaction strategy for handling context window constraints. +#[derive(Debug, Clone)] +pub struct ChatHistory { + /// Messages in the conversation. + messages: Vec, + + /// Maximum number of messages before compaction. + capacity: usize, + + /// Strategy for handling overflow. + strategy: CompactionStrategy, +} + +impl ChatHistory { + /// Creates a new chat history with the given capacity. + /// + /// Uses truncation as the default compaction strategy. + pub fn new(capacity: usize) -> Self { + Self { + messages: Vec::with_capacity(capacity), + capacity, + strategy: CompactionStrategy::Truncate, + } + } + + /// Creates a chat history with a custom compaction strategy. + pub fn with_strategy(capacity: usize, strategy: CompactionStrategy) -> Self { + Self { + messages: Vec::with_capacity(capacity), + capacity, + strategy, + } + } + + /// Adds a message to the history, compacting if necessary. + pub fn push(&mut self, message: Message) { + self.messages.push(message); + + if self.messages.len() > self.capacity { + self.compact(); + } + } + + /// Adds multiple messages to the history. + pub fn extend(&mut self, messages: impl IntoIterator) { + for message in messages { + self.push(message); + } + } + + /// Returns the current messages. + pub fn messages(&self) -> &[Message] { + &self.messages + } + + /// Returns the number of messages currently stored. + pub fn len(&self) -> usize { + self.messages.len() + } + + /// Returns true if the history is empty. + pub fn is_empty(&self) -> bool { + self.messages.is_empty() + } + + /// Clears all messages and resets the summary. + pub fn clear(&mut self) { + self.messages.clear(); + self.strategy = CompactionStrategy::Truncate; + } + + /// Sets a new compaction strategy. + pub fn set_strategy(&mut self, strategy: CompactionStrategy) { + self.strategy = strategy; + } + + /// Updates the summary for summarize strategy. + /// + /// This should be called with an LLM-generated summary of the + /// compacted messages. + pub fn set_summary(&mut self, summary: String) { + self.strategy = CompactionStrategy::Summarize { summary }; + } + + /// Returns the current summary if using summarize strategy. + pub fn summary(&self) -> Option<&str> { + match &self.strategy { + CompactionStrategy::Summarize { summary } => Some(summary), + CompactionStrategy::Truncate => None, + } + } + + /// Compacts the history according to the current strategy. + fn compact(&mut self) { + let keep_count = self.capacity / 2; + let remove_count = self.messages.len().saturating_sub(keep_count); + + if remove_count == 0 { + return; + } + + match &mut self.strategy { + CompactionStrategy::Truncate => { + // Simply remove oldest messages + self.messages.drain(0..remove_count); + } + CompactionStrategy::Summarize { .. } => { + // Remove oldest messages (caller should update summary separately) + self.messages.drain(0..remove_count); + } + } + } +} + +impl Default for ChatHistory { + fn default() -> Self { + Self::new(100) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_history_is_empty() { + let history = ChatHistory::new(10); + assert!(history.is_empty()); + assert_eq!(history.len(), 0); + } + + #[test] + fn push_adds_messages() { + let mut history = ChatHistory::new(10); + history.push(Message::user("Hello")); + history.push(Message::assistant("Hi!")); + + assert_eq!(history.len(), 2); + } + + #[test] + fn truncate_compacts_when_over_capacity() { + let mut history = ChatHistory::new(4); + + for i in 0..6 { + history.push(Message::user(format!("Message {}", i))); + } + + // Should have compacted, keeping capacity/2 = 2 messages + assert!(history.len() <= 4); + } + + #[test] + fn summarize_strategy_stores_summary() { + let mut history = ChatHistory::with_strategy( + 10, + CompactionStrategy::Summarize { + summary: String::new(), + }, + ); + + history.push(Message::user("Hello")); + history.set_summary("User greeted the assistant.".to_string()); + + assert_eq!(history.summary(), Some("User greeted the assistant.")); + assert_eq!(history.len(), 1); + } + + #[test] + fn clear_resets_history() { + let mut history = ChatHistory::new(10); + history.push(Message::user("Test")); + history.set_summary("Summary".to_string()); + + history.clear(); + + assert!(history.is_empty()); + assert!(history.summary().is_none()); + } +} diff --git a/crates/nvisy-rig/src/agent/memory/mod.rs b/crates/nvisy-rig/src/agent/memory/mod.rs new file mode 100644 index 0000000..971b9e7 --- /dev/null +++ b/crates/nvisy-rig/src/agent/memory/mod.rs @@ -0,0 +1,13 @@ +//! Memory module for agent conversation history and context management. +//! +//! This module provides: +//! +//! - [`ChatHistory`] - Conversation history with automatic compaction +//! - [`CompactionStrategy`] - Strategy for handling history overflow (truncate or summarize) +//! - [`WorkingMemory`] - Key-value store for agent working context + +mod history; +mod working; + +pub use history::{ChatHistory, CompactionStrategy}; +pub use working::WorkingMemory; diff --git a/crates/nvisy-rig/src/agent/memory/working.rs b/crates/nvisy-rig/src/agent/memory/working.rs new file mode 100644 index 0000000..fce5e3a --- /dev/null +++ b/crates/nvisy-rig/src/agent/memory/working.rs @@ -0,0 +1,172 @@ +//! Working memory for agent context management. +//! +//! Provides a key-value store for agent working context that persists +//! across turns within a conversation. + +use std::collections::HashMap; + +/// Working memory for storing agent context between turns. +/// +/// This provides a simple key-value store for agents to maintain +/// context information like extracted entities, intermediate results, +/// or user preferences during a conversation. +#[derive(Debug, Clone, Default)] +pub struct WorkingMemory { + /// Key-value storage for context data. + entries: HashMap, + + /// Maximum number of entries to store. + capacity: usize, +} + +impl WorkingMemory { + /// Creates a new working memory with the given capacity. + pub fn new(capacity: usize) -> Self { + Self { + entries: HashMap::with_capacity(capacity), + capacity, + } + } + + /// Stores a value in working memory. + /// + /// If the key already exists, the value is updated. + /// If capacity is exceeded, the oldest entry is removed. + pub fn set(&mut self, key: impl Into, value: impl Into) { + use std::collections::hash_map::Entry; + + let key = key.into(); + let value = value.into(); + + // Check if we need to make room before borrowing via entry() + let needs_eviction = + !self.entries.contains_key(&key) && self.entries.len() >= self.capacity; + + if needs_eviction && let Some(remove_key) = self.entries.keys().next().cloned() { + self.entries.remove(&remove_key); + } + + match self.entries.entry(key) { + Entry::Occupied(mut e) => { + e.insert(value); + } + Entry::Vacant(e) => { + e.insert(value); + } + } + } + + /// Retrieves a value from working memory. + pub fn get(&self, key: &str) -> Option<&str> { + self.entries.get(key).map(|s| s.as_str()) + } + + /// Removes a value from working memory. + pub fn remove(&mut self, key: &str) -> Option { + self.entries.remove(key) + } + + /// Checks if a key exists in working memory. + pub fn contains(&self, key: &str) -> bool { + self.entries.contains_key(key) + } + + /// Returns all keys in working memory. + pub fn keys(&self) -> impl Iterator { + self.entries.keys().map(|s| s.as_str()) + } + + /// Returns the number of entries in working memory. + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Returns true if working memory is empty. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Clears all entries from working memory. + pub fn clear(&mut self) { + self.entries.clear(); + } + + /// Formats working memory as a context string for prompts. + pub fn to_context_string(&self) -> String { + if self.entries.is_empty() { + return String::new(); + } + + let mut context = String::from("Working Memory:\n"); + for (key, value) in &self.entries { + context.push_str(&format!("- {}: {}\n", key, value)); + } + context + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_memory_is_empty() { + let memory = WorkingMemory::new(10); + assert!(memory.is_empty()); + assert_eq!(memory.len(), 0); + } + + #[test] + fn set_and_get() { + let mut memory = WorkingMemory::new(10); + memory.set("user_name", "Alice"); + + assert_eq!(memory.get("user_name"), Some("Alice")); + } + + #[test] + fn update_existing_key() { + let mut memory = WorkingMemory::new(10); + memory.set("count", "1"); + memory.set("count", "2"); + + assert_eq!(memory.get("count"), Some("2")); + assert_eq!(memory.len(), 1); + } + + #[test] + fn remove_entry() { + let mut memory = WorkingMemory::new(10); + memory.set("key", "value"); + + let removed = memory.remove("key"); + assert_eq!(removed, Some("value".to_string())); + assert!(memory.is_empty()); + } + + #[test] + fn respects_capacity() { + let mut memory = WorkingMemory::new(2); + memory.set("a", "1"); + memory.set("b", "2"); + memory.set("c", "3"); + + assert_eq!(memory.len(), 2); + } + + #[test] + fn context_string_format() { + let mut memory = WorkingMemory::new(10); + memory.set("task", "summarize"); + + let context = memory.to_context_string(); + assert!(context.contains("Working Memory:")); + assert!(context.contains("task: summarize")); + } + + #[test] + fn empty_context_string() { + let memory = WorkingMemory::new(10); + assert!(memory.to_context_string().is_empty()); + } +} diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs new file mode 100644 index 0000000..15736de --- /dev/null +++ b/crates/nvisy-rig/src/agent/mod.rs @@ -0,0 +1,39 @@ +//! Agent module for LLM-powered document processing tasks. +//! +//! This module provides specialized agents for different types of tasks: +//! +//! - [`VisionAgent`] - VLM tasks (image description, OCR, object detection) +//! - [`TableAgent`] - Table processing (descriptions, format conversion) +//! - [`TextAnalysisAgent`] - Text analysis (NER, keywords, classification, sentiment) +//! - [`TextGenerationAgent`] - Text generation (summarization, titles) +//! - [`StructuredOutputAgent`] - JSON conversion (structured extraction) +//! +//! # Tool Support +//! +//! Each agent can optionally be created with tools enabled via the `with_tools` +//! parameter. When enabled, agents have access to relevant tools: +//! +//! | Agent | Tools | +//! |-------|-------| +//! | `VisionAgent` | `ScratchpadTool` | +//! | `TextAnalysisAgent` | `ScratchpadTool`, `JsonSchemaTool` | +//! | `TextGenerationAgent` | `ScratchpadTool` | +//! | `TableAgent` | `ScratchpadTool`, `JsonSchemaTool` | +//! | `StructuredOutputAgent` | `ScratchpadTool`, `JsonSchemaTool` | + +pub mod memory; +mod tool; + +mod structured_output; +mod table; +mod text_analysis; +mod text_generation; +mod vision; + +pub use structured_output::{StructuredOutput, StructuredOutputAgent}; +pub use table::{ColumnDescription, TableAgent}; +pub use text_analysis::{ + Classification, Entity, Relationship, Sentiment, TextAnalysisAgent, TextAnalysisOutput, +}; +pub use text_generation::TextGenerationAgent; +pub use vision::VisionAgent; diff --git a/crates/nvisy-rig/src/agent/structured_output.rs b/crates/nvisy-rig/src/agent/structured_output.rs new file mode 100644 index 0000000..81bbb68 --- /dev/null +++ b/crates/nvisy-rig/src/agent/structured_output.rs @@ -0,0 +1,112 @@ +//! Structured output agent for JSON conversion tasks. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use super::tool::{JsonResponse, JsonSchemaTool, ScratchpadTool}; +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "StructuredOutputAgent"; +const DESCRIPTION: &str = + "Agent for converting unstructured text to structured JSON with optional schema validation"; + +const PREAMBLE: &str = "\ +You are a data extraction assistant specialized in converting unstructured text to structured JSON. +Your task is to identify and extract relevant information and format it as valid JSON. +When a schema is provided, strictly adhere to it. Use null for fields that cannot be determined. +Always output valid JSON, no explanations or markdown formatting."; + +const PROMPT_TO_JSON: &str = "\ +Convert the following text to a well-structured JSON object. +Identify the key information and organize it logically. +Only output valid JSON, no explanation."; + +const PROMPT_TO_STRUCTURED_JSON: &str = "\ +Extract information from the following text and format it as JSON matching this schema: + +Schema: +{} + +Only output valid JSON that conforms to the schema, no explanation. +If a field cannot be determined from the text, use null."; + +/// Generic structured output schema for validation. +/// +/// This is a flexible schema that accepts any valid JSON structure. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct StructuredOutput { + /// The extracted data as a JSON value. + #[serde(flatten)] + pub data: Value, +} + +/// Agent for structured output tasks. +/// +/// Handles tasks that convert text to structured JSON: +/// - Free-form JSON conversion +/// - Schema-based structured extraction +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For drafting complex extractions iteratively +/// - `JsonSchemaTool` - For validating output against schemas +pub struct StructuredOutputAgent { + agent: Agent, + model_name: String, +} + +impl StructuredOutputAgent { + /// Creates a new structured output agent with the given completion provider. + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad, schema validation) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE); + + let agent = if with_tools { + builder + .tool(ScratchpadTool::new()) + .tool(JsonSchemaTool::::new()) + .build() + } else { + builder.build() + }; + + Self { agent, model_name } + } + + /// Converts text to JSON format. + /// + /// Attempts to extract structured information from free-form text + /// and represent it as JSON. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] + pub async fn to_json(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_TO_JSON, text); + let response = self.agent.prompt(&prompt).await?; + let value: Value = JsonResponse::parse(&response)?; + tracing::debug!("to_json completed"); + Ok(value) + } + + /// Converts text to JSON matching a specific schema. + /// + /// Extracts information from text and structures it according to + /// the provided JSON schema. + #[tracing::instrument(skip(self, text, schema), fields(agent = NAME, model = %self.model_name, text_len = text.len(), schema_len = schema.len()))] + pub async fn to_structured_json(&self, text: &str, schema: &str) -> Result { + let base_prompt = PROMPT_TO_STRUCTURED_JSON.replace("{}", schema); + let prompt = format!("{}\n\nText:\n{}", base_prompt, text); + let response = self.agent.prompt(&prompt).await?; + let value: Value = JsonResponse::parse(&response)?; + tracing::debug!("to_structured_json completed"); + Ok(value) + } +} diff --git a/crates/nvisy-rig/src/agent/table.rs b/crates/nvisy-rig/src/agent/table.rs new file mode 100644 index 0000000..cfce88c --- /dev/null +++ b/crates/nvisy-rig/src/agent/table.rs @@ -0,0 +1,161 @@ +//! Table agent for table processing tasks. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use super::tool::{JsonResponse, JsonSchemaTool, ScratchpadTool}; +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "TableAgent"; +const DESCRIPTION: &str = "Agent for table processing including description and format conversion (HTML, Markdown, CSV, JSON)"; + +const PREAMBLE: &str = "\ +You are a table processing assistant specialized in understanding and transforming tabular data. +Your task is to analyze tables and either describe their contents or convert them to different formats. +Preserve data accuracy and structure during conversions. +When outputting structured data, use valid JSON format."; + +const PROMPT_DESCRIBE: &str = "\ +Describe this table concisely. Include: +- What data the table contains +- Number of rows and columns +- Key insights or patterns"; + +const PROMPT_DESCRIBE_COLUMNS: &str = "\ +For each column in this table, provide: +- Column name +- Data type (text, number, date, etc.) +- Brief description of what the column contains + +Format as a JSON array with objects containing 'name', 'type', and 'description' fields."; + +const PROMPT_TO_HTML: &str = "\ +Convert this table to clean, semantic HTML. +Use , , , ,
, and tags appropriately. +Do not include any CSS or styling. Only output the HTML, no explanation."; + +const PROMPT_TO_MARKDOWN: &str = "\ +Convert this table to Markdown format. +Use proper Markdown table syntax with | separators and header dividers. +Only output the Markdown table, no explanation."; + +const PROMPT_TO_CSV: &str = "\ +Convert this table to CSV format. +Use commas as delimiters and quote fields containing commas or newlines. +Only output the CSV, no explanation."; + +const PROMPT_TO_JSON: &str = "\ +Convert this table to a JSON array of objects. +Each row should be an object with column names as keys. +Only output valid JSON, no explanation."; + +/// Column description for table schema validation. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct ColumnDescription { + /// Column name. + pub name: String, + /// Data type (text, number, date, etc.). + #[serde(rename = "type")] + pub data_type: String, + /// Brief description of what the column contains. + pub description: String, +} + +/// Agent for table processing tasks. +/// +/// Handles tasks that involve understanding and transforming tables: +/// - Table description +/// - Column descriptions +/// - Format conversion (HTML, Markdown, CSV, JSON) +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For working on format conversions iteratively +/// - `JsonSchemaTool` - For validating JSON output +pub struct TableAgent { + agent: Agent, + model_name: String, +} + +impl TableAgent { + /// Creates a new table agent with the given completion provider. + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad, schema validation) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE); + + let agent = if with_tools { + builder + .tool(ScratchpadTool::new()) + .tool(JsonSchemaTool::>::new()) + .build() + } else { + builder.build() + }; + + Self { agent, model_name } + } + + /// Generates a description of a table. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] + pub async fn describe(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_DESCRIBE, table_content); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "describe completed"); + Ok(response) + } + + /// Generates descriptions for each column in a table. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] + pub async fn describe_columns(&self, table_content: &str) -> Result> { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_DESCRIBE_COLUMNS, table_content); + let response = self.agent.prompt(&prompt).await?; + let columns: Vec = JsonResponse::parse(&response)?; + tracing::debug!(column_count = columns.len(), "describe_columns completed"); + Ok(columns) + } + + /// Converts a table to HTML format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] + pub async fn to_html(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_HTML, table_content); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_html completed"); + Ok(response) + } + + /// Converts a table to Markdown format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] + pub async fn to_markdown(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_MARKDOWN, table_content); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_markdown completed"); + Ok(response) + } + + /// Converts a table to CSV format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] + pub async fn to_csv(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_CSV, table_content); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_csv completed"); + Ok(response) + } + + /// Converts a table to JSON format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] + pub async fn to_json(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_JSON, table_content); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_json completed"); + Ok(response) + } +} diff --git a/crates/nvisy-rig/src/agent/text_analysis.rs b/crates/nvisy-rig/src/agent/text_analysis.rs new file mode 100644 index 0000000..fe5a954 --- /dev/null +++ b/crates/nvisy-rig/src/agent/text_analysis.rs @@ -0,0 +1,219 @@ +//! Text analysis agent for extracting structured information. + +use std::collections::HashMap; + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use super::tool::{JsonResponse, JsonSchemaTool, ScratchpadTool}; +use crate::Result; +use crate::provider::CompletionProvider; + +/// A named entity extracted from text. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct Entity { + /// The text of the entity. + pub text: String, + /// The type of entity (e.g., "person", "organization", "location"). + #[serde(rename = "type")] + pub entity_type: String, + /// The starting character index in the source text. + #[serde(default)] + pub start_index: Option, +} + +/// Classification result with labels and confidence scores. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct Classification { + /// The matched category labels. + pub labels: Vec, + /// Confidence scores for each label (0.0 to 1.0). + pub confidence: HashMap, +} + +/// Sentiment analysis result. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct Sentiment { + /// The overall sentiment: "positive", "negative", "neutral", or "mixed". + pub sentiment: String, + /// Confidence score (0.0 to 1.0). + pub confidence: f64, + /// Brief explanation of the sentiment. + #[serde(default)] + pub explanation: Option, +} + +/// A relationship between two entities. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct Relationship { + /// The first entity in the relationship. + pub subject: String, + /// The type of relationship. + pub predicate: String, + /// The second entity in the relationship. + pub object: String, +} + +/// Combined schema for text analysis outputs. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct TextAnalysisOutput { + /// Extracted entities. + #[serde(default)] + pub entities: Option>, + /// Extracted keywords. + #[serde(default)] + pub keywords: Option>, + /// Classification result. + #[serde(default)] + pub classification: Option, + /// Sentiment analysis result. + #[serde(default)] + pub sentiment: Option, + /// Extracted relationships. + #[serde(default)] + pub relationships: Option>, +} + +const NAME: &str = "TextAnalysisAgent"; +const DESCRIPTION: &str = "Agent for text analysis including entity extraction, keyword extraction, classification, and sentiment analysis"; + +const PREAMBLE: &str = "\ +You are a text analysis assistant specialized in extracting structured information from text. +Your task is to identify entities, relationships, sentiment, and other structured data from unstructured text. +Be precise and comprehensive in your extractions. +Always output valid JSON format matching the requested structure."; + +const PROMPT_EXTRACT_ENTITIES: &str = "\ +Extract all named entities from the following text. +Identify: people, organizations, locations, dates, monetary values, and other notable entities. + +Format as a JSON array with objects containing 'text', 'type', and 'start_index' fields."; + +const PROMPT_EXTRACT_KEYWORDS: &str = "\ +Extract the most important keywords and key phrases from the following text. +Return 5-15 keywords ordered by relevance. + +Format as a JSON array of strings."; + +const PROMPT_CLASSIFY: &str = "\ +Classify the following text into one or more of these categories: {} + +Format as a JSON object with 'labels' (array of matching categories) \ +and 'confidence' (object mapping each label to a confidence score 0-1)."; + +const PROMPT_ANALYZE_SENTIMENT: &str = "\ +Analyze the sentiment of the following text. + +Format as a JSON object with: +- 'sentiment': one of 'positive', 'negative', 'neutral', or 'mixed' +- 'confidence': confidence score 0-1 +- 'explanation': brief explanation of the sentiment"; + +const PROMPT_EXTRACT_RELATIONSHIPS: &str = "\ +Extract relationships between entities in the following text. +Identify how people, organizations, and other entities are connected. + +Format as a JSON array with objects containing: +- 'subject': the first entity +- 'predicate': the relationship type +- 'object': the second entity"; + +/// Agent for text analysis tasks. +/// +/// Handles tasks that extract structured information from text: +/// - Named entity recognition (NER) +/// - Keyword extraction +/// - Classification +/// - Sentiment analysis +/// - Relationship extraction +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For drafting and refining extractions +/// - `JsonSchemaTool` - For validating output against schemas +pub struct TextAnalysisAgent { + agent: Agent, + model_name: String, +} + +impl TextAnalysisAgent { + /// Creates a new text analysis agent with the given completion provider. + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad, schema validation) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE); + + let agent = if with_tools { + builder + .tool(ScratchpadTool::new()) + .tool(JsonSchemaTool::::new()) + .build() + } else { + builder.build() + }; + + Self { agent, model_name } + } + + /// Extracts named entities from text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] + pub async fn extract_entities(&self, text: &str) -> Result> { + let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_ENTITIES, text); + let response = self.agent.prompt(&prompt).await?; + let entities: Vec = JsonResponse::parse(&response)?; + tracing::debug!(entity_count = entities.len(), "extract_entities completed"); + Ok(entities) + } + + /// Extracts keywords from text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] + pub async fn extract_keywords(&self, text: &str) -> Result> { + let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_KEYWORDS, text); + let response = self.agent.prompt(&prompt).await?; + let keywords: Vec = JsonResponse::parse(&response)?; + tracing::debug!(keyword_count = keywords.len(), "extract_keywords completed"); + Ok(keywords) + } + + /// Classifies text into provided categories. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len(), label_count = labels.len()))] + pub async fn classify(&self, text: &str, labels: &[String]) -> Result { + let labels_str = labels.join(", "); + let base_prompt = PROMPT_CLASSIFY.replace("{}", &labels_str); + let prompt = format!("{}\n\nText:\n{}", base_prompt, text); + let response = self.agent.prompt(&prompt).await?; + let classification: Classification = JsonResponse::parse(&response)?; + tracing::debug!(matched_labels = ?classification.labels, "classify completed"); + Ok(classification) + } + + /// Analyzes sentiment of text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] + pub async fn analyze_sentiment(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_ANALYZE_SENTIMENT, text); + let response = self.agent.prompt(&prompt).await?; + let sentiment: Sentiment = JsonResponse::parse(&response)?; + tracing::debug!(sentiment = %sentiment.sentiment, confidence = %sentiment.confidence, "analyze_sentiment completed"); + Ok(sentiment) + } + + /// Extracts relationships between entities in text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] + pub async fn extract_relationships(&self, text: &str) -> Result> { + let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_RELATIONSHIPS, text); + let response = self.agent.prompt(&prompt).await?; + let relationships: Vec = JsonResponse::parse(&response)?; + tracing::debug!( + relationship_count = relationships.len(), + "extract_relationships completed" + ); + Ok(relationships) + } +} diff --git a/crates/nvisy-rig/src/agent/text_generation.rs b/crates/nvisy-rig/src/agent/text_generation.rs new file mode 100644 index 0000000..3faddd2 --- /dev/null +++ b/crates/nvisy-rig/src/agent/text_generation.rs @@ -0,0 +1,112 @@ +//! Text generation agent for creating new text content. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; + +use super::tool::ScratchpadTool; +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "TextGenerationAgent"; +const DESCRIPTION: &str = + "Agent for text generation including summarization, title generation, and contextual chunking"; + +const PREAMBLE: &str = "\ +You are a text generation assistant specialized in creating concise, high-quality content. +Your task is to generate summaries, titles, and contextual information based on input text. +Maintain accuracy while being concise. Preserve the key information and main points."; + +const PROMPT_SUMMARIZE: &str = "\ +Summarize the following text concisely while preserving the key information and main points. +The summary should be about 20-30% of the original length."; + +const PROMPT_GENERATE_TITLE: &str = "\ +Generate a concise, descriptive title for the following text. +The title should capture the main topic and be no more than 10 words. + +Only output the title, no explanation or quotes."; + +const PROMPT_GENERATE_CHUNK_CONTEXT: &str = "\ +Given the following document summary and a specific chunk from that document, \ +generate a brief context statement (1-2 sentences) that situates this chunk \ +within the broader document. This context will be prepended to the chunk \ +to improve retrieval quality. + +Only output the context statement, no explanation."; + +/// Agent for text generation tasks. +/// +/// Handles tasks that generate new text content: +/// - Summarization +/// - Title generation +/// - Contextual chunking (adding context to chunks) +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For drafting and refining content iteratively +pub struct TextGenerationAgent { + agent: Agent, + model_name: String, +} + +impl TextGenerationAgent { + /// Creates a new text generation agent with the given completion provider. + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad for drafting) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE); + + let agent = if with_tools { + builder.tool(ScratchpadTool::new()).build() + } else { + builder.build() + }; + + Self { agent, model_name } + } + + /// Generates a summary of the text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] + pub async fn summarize(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_SUMMARIZE, text); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "summarize completed"); + Ok(response) + } + + /// Generates a title for the text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] + pub async fn generate_title(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_GENERATE_TITLE, text); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(title = %response, "generate_title completed"); + Ok(response) + } + + /// Generates contextual information for a chunk. + /// + /// This is used for contextual chunking, where each chunk is enriched + /// with context about how it fits into the larger document. + #[tracing::instrument(skip(self, chunk, document_summary), fields(agent = NAME, model = %self.model_name, chunk_len = chunk.len(), summary_len = document_summary.len()))] + pub async fn generate_chunk_context( + &self, + chunk: &str, + document_summary: &str, + ) -> Result { + let prompt = format!( + "{}\n\nDocument Summary:\n{}\n\nChunk:\n{}", + PROMPT_GENERATE_CHUNK_CONTEXT, document_summary, chunk + ); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!( + response_len = response.len(), + "generate_chunk_context completed" + ); + Ok(response) + } +} diff --git a/crates/nvisy-rig/src/agent/tool/document_fetch.rs b/crates/nvisy-rig/src/agent/tool/document_fetch.rs new file mode 100644 index 0000000..2daa407 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/document_fetch.rs @@ -0,0 +1,121 @@ +//! Document fetch tool for retrieving documents by ID. + +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +/// A fetched document. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Document { + /// The document ID. + pub id: String, + /// The document content. + pub content: String, + /// Document title if available. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub title: Option, + /// Document metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metadata: Option, +} + +/// Trait for document fetch implementations. +#[async_trait] +pub trait DocumentFetcher: Send + Sync { + /// Fetch a document by ID. + async fn fetch(&self, id: &str) -> Result, DocumentFetchError>; + + /// Fetch multiple documents by IDs. + async fn fetch_many(&self, ids: &[String]) -> Result, DocumentFetchError>; +} + +/// Error type for document fetch operations. +#[derive(Debug, thiserror::Error)] +pub enum DocumentFetchError { + #[error("document not found: {0}")] + NotFound(String), + #[error("fetch failed: {0}")] + Fetch(String), + #[error("connection error: {0}")] + Connection(String), +} + +/// Arguments for document fetch. +#[derive(Debug, Deserialize)] +pub struct DocumentFetchArgs { + /// The document ID to fetch. + #[serde(default)] + pub id: Option, + /// Multiple document IDs to fetch. + #[serde(default)] + pub ids: Option>, +} + +/// Tool for fetching documents by ID. +pub struct DocumentFetchTool { + fetcher: Arc, +} + +impl DocumentFetchTool { + /// Creates a new document fetch tool. + pub fn new(fetcher: F) -> Self { + Self { + fetcher: Arc::new(fetcher), + } + } + + /// Creates a new document fetch tool from an Arc. + pub fn from_arc(fetcher: Arc) -> Self { + Self { fetcher } + } +} + +impl Tool for DocumentFetchTool { + type Args = DocumentFetchArgs; + type Error = DocumentFetchError; + type Output = Vec; + + const NAME: &'static str = "document_fetch"; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Fetch one or more documents by their IDs. Use this to retrieve the full content of documents you've found through search.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "A single document ID to fetch" + }, + "ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Multiple document IDs to fetch" + } + } + }), + } + } + + #[tracing::instrument(skip(self), fields(tool = Self::NAME, id = ?args.id, ids_count = args.ids.as_ref().map(|v| v.len())))] + async fn call(&self, args: Self::Args) -> Result { + let results = match (args.id, args.ids) { + (Some(id), _) => { + let doc = self + .fetcher + .fetch(&id) + .await? + .ok_or(DocumentFetchError::NotFound(id))?; + vec![doc] + } + (None, Some(ids)) => self.fetcher.fetch_many(&ids).await?, + (None, None) => vec![], + }; + tracing::debug!(result_count = results.len(), "document_fetch completed"); + Ok(results) + } +} diff --git a/crates/nvisy-rig/src/agent/tool/json_schema.rs b/crates/nvisy-rig/src/agent/tool/json_schema.rs new file mode 100644 index 0000000..f9fcdf5 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/json_schema.rs @@ -0,0 +1,324 @@ +//! JSON schema validation and response parsing. +//! +//! This module provides: +//! - Schema generation from Rust types via `schemars` +//! - JSON validation against schemas via `jsonschema` +//! - LLM response parsing (handles markdown code blocks, etc.) + +use std::marker::PhantomData; + +use jsonschema::Validator; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::JsonSchema; +use schemars::generate::SchemaSettings; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::{Error, Result}; + +/// Error type for JSON schema operations. +#[derive(Debug, thiserror::Error)] +#[error("json schema error")] +pub struct JsonSchemaError; + +/// Arguments for JSON schema validation. +/// +/// Generic over `T` which defines the expected schema via `schemars::JsonSchema`. +#[derive(Debug, Deserialize)] +pub struct JsonSchemaArgs { + /// The JSON data to validate. + pub data: Value, + #[serde(skip)] + _marker: PhantomData, +} + +/// Result of JSON schema validation. +#[derive(Debug, Serialize)] +pub struct JsonSchemaResult { + /// Whether the data is valid. + pub valid: bool, + /// Validation errors if any. + #[serde(skip_serializing_if = "Vec::is_empty")] + pub errors: Vec, +} + +/// Tool for validating JSON against a schema derived from a Rust type. +/// +/// Uses `schemars` to generate the JSON schema from the type parameter `T`, +/// and `jsonschema` for validation. +pub struct JsonSchemaTool { + validator: Validator, + _marker: PhantomData, +} + +impl JsonSchemaTool { + /// Creates a new JSON schema tool for type `T`. + pub fn new() -> Self { + let mut generator = SchemaSettings::draft07().into_generator(); + let schema = generator.root_schema_for::(); + let schema_value = serde_json::to_value(&schema).expect("schema serialization cannot fail"); + let validator = Validator::new(&schema_value).expect("valid schema"); + + Self { + validator, + _marker: PhantomData, + } + } + + /// Validates JSON data against the schema. + fn validate_data(&self, data: &Value) -> Vec { + self.validator + .iter_errors(data) + .map(|e| e.to_string()) + .collect() + } +} + +impl Default for JsonSchemaTool { + fn default() -> Self { + Self::new() + } +} + +impl Tool for JsonSchemaTool { + type Args = JsonSchemaArgs; + type Error = JsonSchemaError; + type Output = JsonSchemaResult; + + const NAME: &'static str = "json_schema"; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Validate JSON data against a JSON Schema. Use this to verify that structured data conforms to expected format.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "data": { + "description": "The JSON data to validate" + } + }, + "required": ["data"] + }), + } + } + + #[tracing::instrument(skip(self, args), fields(tool = Self::NAME))] + async fn call(&self, args: Self::Args) -> std::result::Result { + let errors = self.validate_data(&args.data); + let valid = errors.is_empty(); + + tracing::debug!(valid, error_count = errors.len(), "json_schema completed"); + + Ok(JsonSchemaResult { valid, errors }) + } +} + +/// Parser for extracting and validating JSON from LLM responses. +/// +/// Handles common LLM output patterns: +/// - Plain JSON +/// - JSON wrapped in markdown code blocks (```json ... ```) +/// - JSON wrapped in generic code blocks (``` ... ```) +/// - JSON with surrounding explanatory text +/// +/// # Example +/// +/// ```ignore +/// use nvisy_rig::agent::tool::JsonResponse; +/// use serde::Deserialize; +/// +/// #[derive(Deserialize)] +/// struct UserInfo { +/// name: String, +/// age: u32, +/// } +/// +/// let response = r#"Here's the extracted data: +/// ```json +/// {"name": "Alice", "age": 30} +/// ```"#; +/// +/// let info: UserInfo = JsonResponse::parse(response)?; +/// ``` +pub struct JsonResponse; + +impl JsonResponse { + /// Extracts JSON content from a response, stripping markdown formatting. + pub fn extract(response: &str) -> &str { + // Try ```json block first + if let Some(start) = response.find("```json") { + let after_marker = &response[start + 7..]; + if let Some(end) = after_marker.find("```") { + return after_marker[..end].trim(); + } + } + + // Try generic ``` block + if let Some(start) = response.find("```") { + let after_marker = &response[start + 3..]; + // Skip language identifier if on same line + let content_start = after_marker.find('\n').map(|i| i + 1).unwrap_or(0); + let after_newline = &after_marker[content_start..]; + if let Some(end) = after_newline.find("```") { + return after_newline[..end].trim(); + } + } + + // Try to find JSON object or array boundaries + let trimmed = response.trim(); + if (trimmed.starts_with('{') && trimmed.ends_with('}')) + || (trimmed.starts_with('[') && trimmed.ends_with(']')) + { + return trimmed; + } + + // Find first { or [ and last } or ] + let start = trimmed.find(['{', '[']).unwrap_or(0); + let end = trimmed + .rfind(['}', ']']) + .map(|i| i + 1) + .unwrap_or(trimmed.len()); + + if start < end { + &trimmed[start..end] + } else { + trimmed + } + } + + /// Parses JSON from an LLM response into the specified type. + /// + /// Automatically strips markdown code blocks and surrounding text. + pub fn parse(response: &str) -> Result { + let json_str = Self::extract(response); + serde_json::from_str(json_str).map_err(|e| Error::parse(format!("invalid JSON: {e}"))) + } +} + +#[cfg(test)] +mod tests { + use schemars::JsonSchema; + use serde::Deserialize; + use serde_json::json; + + use super::*; + + #[derive(Debug, Deserialize, JsonSchema, PartialEq)] + struct TestPerson { + name: String, + age: u32, + } + + #[tokio::test] + async fn test_valid_object() { + let tool = JsonSchemaTool::::new(); + let result = tool + .call(JsonSchemaArgs { + data: json!({ + "name": "Alice", + "age": 30 + }), + _marker: PhantomData, + }) + .await + .unwrap(); + + assert!(result.valid); + assert!(result.errors.is_empty()); + } + + #[tokio::test] + async fn test_missing_required() { + let tool = JsonSchemaTool::::new(); + let result = tool + .call(JsonSchemaArgs { + data: json!({}), + _marker: PhantomData, + }) + .await + .unwrap(); + + assert!(!result.valid); + assert!(!result.errors.is_empty()); + } + + #[tokio::test] + async fn test_type_mismatch() { + let tool = JsonSchemaTool::::new(); + let result = tool + .call(JsonSchemaArgs { + data: json!({ + "name": 123, + "age": 30 + }), + _marker: PhantomData, + }) + .await + .unwrap(); + + assert!(!result.valid); + assert!(!result.errors.is_empty()); + } + + // JsonResponse tests + + #[derive(Debug, Deserialize, PartialEq)] + struct TestData { + key: String, + } + + #[test] + fn parse_plain_json() { + let response = r#"{"key": "value"}"#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_json_with_markdown_block() { + let response = r#"Here's the JSON: +```json +{"key": "value"} +```"#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_json_with_generic_code_block() { + let response = r#"``` +{"key": "value"} +```"#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_json_with_surrounding_text() { + let response = r#"The result is: {"key": "value"} as requested."#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_array() { + let response = r#"[{"key": "a"}, {"key": "b"}]"#; + let result: Vec = JsonResponse::parse(response).unwrap(); + assert_eq!(result.len(), 2); + assert_eq!(result[0].key, "a"); + assert_eq!(result[1].key, "b"); + } + + #[test] + fn extract_returns_json_content() { + let extracted = JsonResponse::extract( + r#"```json +{"key": "value"} +```"#, + ); + assert_eq!(extracted, r#"{"key": "value"}"#); + } +} diff --git a/crates/nvisy-rig/src/agent/tool/metadata_query.rs b/crates/nvisy-rig/src/agent/tool/metadata_query.rs new file mode 100644 index 0000000..c0919f8 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/metadata_query.rs @@ -0,0 +1,182 @@ +//! Metadata query tool for filtering documents by metadata. + +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +/// A metadata filter condition. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetadataFilter { + /// The field name to filter on. + pub field: String, + /// The operator to use. + pub operator: FilterOperator, + /// The value to compare against. + pub value: serde_json::Value, +} + +/// Filter operators for metadata queries. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FilterOperator { + /// Equals. + Eq, + /// Not equals. + Ne, + /// Greater than. + Gt, + /// Greater than or equal. + Gte, + /// Less than. + Lt, + /// Less than or equal. + Lte, + /// Contains (for arrays or strings). + Contains, + /// Starts with (for strings). + StartsWith, + /// Ends with (for strings). + EndsWith, + /// In (value is in array). + In, + /// Not in (value is not in array). + NotIn, + /// Exists (field exists). + Exists, +} + +/// Result from a metadata query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QueryResult { + /// The document ID. + pub id: String, + /// The document content (may be truncated). + pub content: String, + /// The matched metadata fields. + pub metadata: serde_json::Value, +} + +/// Trait for metadata query implementations. +#[async_trait] +pub trait MetadataQuerier: Send + Sync { + /// Query documents by metadata filters. + async fn query( + &self, + filters: &[MetadataFilter], + limit: usize, + offset: usize, + ) -> Result, MetadataQueryError>; +} + +/// Error type for metadata query operations. +#[derive(Debug, thiserror::Error)] +pub enum MetadataQueryError { + #[error("invalid filter: {0}")] + InvalidFilter(String), + #[error("query failed: {0}")] + Query(String), + #[error("connection error: {0}")] + Connection(String), +} + +/// Arguments for metadata query. +#[derive(Debug, Deserialize)] +pub struct MetadataQueryArgs { + /// The filters to apply. + pub filters: Vec, + /// Maximum number of results to return. + #[serde(default = "default_limit")] + pub limit: usize, + /// Number of results to skip. + #[serde(default)] + pub offset: usize, +} + +fn default_limit() -> usize { + 10 +} + +/// Tool for querying documents by metadata. +pub struct MetadataQueryTool { + querier: Arc, +} + +impl MetadataQueryTool { + /// Creates a new metadata query tool. + pub fn new(querier: Q) -> Self { + Self { + querier: Arc::new(querier), + } + } + + /// Creates a new metadata query tool from an Arc. + pub fn from_arc(querier: Arc) -> Self { + Self { querier } + } +} + +impl Tool for MetadataQueryTool { + type Args = MetadataQueryArgs; + type Error = MetadataQueryError; + type Output = Vec; + + const NAME: &'static str = "metadata_query"; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Query documents by their metadata fields. Use this to filter documents by specific attributes like date, author, type, tags, etc.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "filters": { + "type": "array", + "items": { + "type": "object", + "properties": { + "field": { + "type": "string", + "description": "The metadata field name" + }, + "operator": { + "type": "string", + "enum": ["eq", "ne", "gt", "gte", "lt", "lte", "contains", "starts_with", "ends_with", "in", "not_in", "exists"], + "description": "The comparison operator" + }, + "value": { + "description": "The value to compare against" + } + }, + "required": ["field", "operator", "value"] + }, + "description": "The filter conditions to apply" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results (default: 10)", + "default": 10 + }, + "offset": { + "type": "integer", + "description": "Number of results to skip for pagination", + "default": 0 + } + }, + "required": ["filters"] + }), + } + } + + #[tracing::instrument(skip(self), fields(tool = Self::NAME, filter_count = args.filters.len(), limit = args.limit, offset = args.offset))] + async fn call(&self, args: Self::Args) -> Result { + let results = self + .querier + .query(&args.filters, args.limit, args.offset) + .await?; + tracing::debug!(result_count = results.len(), "metadata_query completed"); + Ok(results) + } +} diff --git a/crates/nvisy-rig/src/agent/tool/mod.rs b/crates/nvisy-rig/src/agent/tool/mod.rs new file mode 100644 index 0000000..37f2d07 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/mod.rs @@ -0,0 +1,13 @@ +//! Tools module for agent function calling capabilities. +//! +//! This module provides tools used internally by agents: +//! +//! - [`ScratchpadTool`] - Temporary working storage for drafting +//! - [`JsonSchemaTool`] - Validate JSON against schema (generic over `T: JsonSchema`) +//! - [`JsonResponse`] - Parse JSON from LLM responses (handles markdown blocks, etc.) + +mod json_schema; +mod scratchpad; + +pub use json_schema::{JsonResponse, JsonSchemaTool}; +pub use scratchpad::ScratchpadTool; diff --git a/crates/nvisy-rig/src/agent/tool/scratchpad.rs b/crates/nvisy-rig/src/agent/tool/scratchpad.rs new file mode 100644 index 0000000..d7bfe8f --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/scratchpad.rs @@ -0,0 +1,271 @@ +//! Scratchpad tool for temporary working storage. + +use std::collections::HashMap; +use std::sync::Arc; + +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; +use tokio::sync::RwLock; + +/// Error type for scratchpad operations. +#[derive(Debug, thiserror::Error)] +#[error("scratchpad error")] +pub struct ScratchpadError; + +/// The operation to perform on the scratchpad. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ScratchpadOperation { + /// Write content (replaces existing). + Write { content: String }, + /// Append content. + Append { content: String }, + /// Read all content. + Read, + /// Clear all content. + Clear, + /// Get a named section. + GetSection { name: String }, + /// Set a named section. + SetSection { name: String, content: String }, +} + +/// Arguments for scratchpad operations. +#[derive(Debug, Deserialize)] +pub struct ScratchpadArgs { + /// The operation to perform. + pub operation: ScratchpadOperation, +} + +/// Result of a scratchpad operation. +#[derive(Debug, Serialize)] +pub struct ScratchpadResult { + /// Whether the operation succeeded. + pub success: bool, + /// The content (for read operations). + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, + /// Optional message. + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, +} + +/// In-memory scratchpad storage. +struct InMemoryScratchpad { + content: RwLock, + sections: RwLock>, +} + +impl InMemoryScratchpad { + fn new() -> Self { + Self { + content: RwLock::new(String::new()), + sections: RwLock::new(HashMap::new()), + } + } + + async fn write(&self, content: &str) { + let mut guard = self.content.write().await; + *guard = content.to_string(); + } + + async fn append(&self, content: &str) { + let mut guard = self.content.write().await; + guard.push_str(content); + } + + async fn read(&self) -> String { + let guard = self.content.read().await; + guard.clone() + } + + async fn clear(&self) { + let mut guard = self.content.write().await; + guard.clear(); + let mut sections = self.sections.write().await; + sections.clear(); + } + + async fn get_section(&self, name: &str) -> Option { + let guard = self.sections.read().await; + guard.get(name).cloned() + } + + async fn set_section(&self, name: &str, content: &str) { + let mut guard = self.sections.write().await; + guard.insert(name.to_string(), content.to_string()); + } +} + +/// Tool for temporary working storage. +/// +/// Provides a scratchpad for agents to draft, edit, and organize content +/// during multi-step reasoning tasks. +pub struct ScratchpadTool { + scratchpad: Arc, +} + +impl ScratchpadTool { + /// Creates a new scratchpad tool with in-memory storage. + pub fn new() -> Self { + Self { + scratchpad: Arc::new(InMemoryScratchpad::new()), + } + } +} + +impl Default for ScratchpadTool { + fn default() -> Self { + Self::new() + } +} + +impl Tool for ScratchpadTool { + type Args = ScratchpadArgs; + type Error = ScratchpadError; + type Output = ScratchpadResult; + + const NAME: &'static str = "scratchpad"; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "A temporary workspace for drafting, editing, and organizing content. Use this to work on intermediate results before producing final output.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "operation": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "write": { + "type": "object", + "properties": { + "content": { "type": "string" } + }, + "required": ["content"] + } + } + }, + { + "type": "object", + "properties": { + "append": { + "type": "object", + "properties": { + "content": { "type": "string" } + }, + "required": ["content"] + } + } + }, + { + "type": "object", + "properties": { + "read": { "type": "object" } + } + }, + { + "type": "object", + "properties": { + "clear": { "type": "object" } + } + }, + { + "type": "object", + "properties": { + "get_section": { + "type": "object", + "properties": { + "name": { "type": "string" } + }, + "required": ["name"] + } + } + }, + { + "type": "object", + "properties": { + "set_section": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "content": { "type": "string" } + }, + "required": ["name", "content"] + } + } + } + ], + "description": "The operation: write, append, read, clear, get_section, or set_section" + } + }, + "required": ["operation"] + }), + } + } + + #[tracing::instrument(skip(self, args), fields(tool = Self::NAME, operation = ?std::mem::discriminant(&args.operation)))] + async fn call(&self, args: Self::Args) -> Result { + let result = match args.operation { + ScratchpadOperation::Write { content } => { + tracing::debug!(content_len = content.len(), "scratchpad write"); + self.scratchpad.write(&content).await; + ScratchpadResult { + success: true, + content: None, + message: Some("Content written to scratchpad".to_string()), + } + } + ScratchpadOperation::Append { content } => { + tracing::debug!(content_len = content.len(), "scratchpad append"); + self.scratchpad.append(&content).await; + ScratchpadResult { + success: true, + content: None, + message: Some("Content appended to scratchpad".to_string()), + } + } + ScratchpadOperation::Read => { + let content = self.scratchpad.read().await; + tracing::debug!(content_len = content.len(), "scratchpad read"); + ScratchpadResult { + success: true, + content: Some(content), + message: None, + } + } + ScratchpadOperation::Clear => { + tracing::debug!("scratchpad clear"); + self.scratchpad.clear().await; + ScratchpadResult { + success: true, + content: None, + message: Some("Scratchpad cleared".to_string()), + } + } + ScratchpadOperation::GetSection { name } => { + let content = self.scratchpad.get_section(&name).await; + tracing::debug!(section = %name, found = content.is_some(), "scratchpad get_section"); + ScratchpadResult { + success: content.is_some(), + content, + message: None, + } + } + ScratchpadOperation::SetSection { name, content } => { + tracing::debug!(section = %name, content_len = content.len(), "scratchpad set_section"); + self.scratchpad.set_section(&name, &content).await; + ScratchpadResult { + success: true, + content: None, + message: Some(format!("Section '{name}' updated")), + } + } + }; + Ok(result) + } +} diff --git a/crates/nvisy-rig/src/agent/tool/vector_search.rs b/crates/nvisy-rig/src/agent/tool/vector_search.rs new file mode 100644 index 0000000..d92fea6 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/vector_search.rs @@ -0,0 +1,127 @@ +//! Vector search tool for semantic similarity search. + +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +/// Result from a vector search query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResult { + /// The document/chunk ID. + pub id: String, + /// The text content. + pub content: String, + /// Similarity score (0.0 to 1.0). + pub score: f64, + /// Optional metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metadata: Option, +} + +/// Trait for vector search implementations. +#[async_trait] +pub trait VectorSearcher: Send + Sync { + /// Search for similar documents. + async fn search( + &self, + query: &str, + limit: usize, + threshold: Option, + ) -> Result, VectorSearchError>; +} + +/// Error type for vector search operations. +#[derive(Debug, thiserror::Error)] +pub enum VectorSearchError { + #[error("embedding failed: {0}")] + Embedding(String), + #[error("search failed: {0}")] + Search(String), + #[error("connection error: {0}")] + Connection(String), +} + +/// Arguments for vector search. +#[derive(Debug, Deserialize)] +pub struct VectorSearchArgs { + /// The search query text. + pub query: String, + /// Maximum number of results to return. + #[serde(default = "default_limit")] + pub limit: usize, + /// Minimum similarity threshold (0.0 to 1.0). + #[serde(default)] + pub threshold: Option, +} + +fn default_limit() -> usize { + 5 +} + +/// Tool for searching vector stores. +pub struct VectorSearchTool { + searcher: Arc, +} + +impl VectorSearchTool { + /// Creates a new vector search tool. + pub fn new(searcher: S) -> Self { + Self { + searcher: Arc::new(searcher), + } + } + + /// Creates a new vector search tool from an Arc. + pub fn from_arc(searcher: Arc) -> Self { + Self { searcher } + } +} + +impl Tool for VectorSearchTool { + type Args = VectorSearchArgs; + type Error = VectorSearchError; + type Output = Vec; + + const NAME: &'static str = "vector_search"; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Search for semantically similar documents or chunks using vector embeddings. Returns the most relevant results based on meaning, not just keywords.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query text to find similar documents" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results to return (default: 5)", + "default": 5 + }, + "threshold": { + "type": "number", + "description": "Minimum similarity score threshold (0.0 to 1.0)", + "minimum": 0.0, + "maximum": 1.0 + } + }, + "required": ["query"] + }), + } + } + + #[tracing::instrument(skip(self), fields(tool = Self::NAME, query_len = args.query.len(), limit = args.limit, threshold = ?args.threshold))] + async fn call(&self, args: Self::Args) -> Result { + let results = self + .searcher + .search(&args.query, args.limit, args.threshold) + .await?; + tracing::debug!(result_count = results.len(), "vector_search completed"); + Ok(results) + } +} diff --git a/crates/nvisy-rig/src/agent/vision.rs b/crates/nvisy-rig/src/agent/vision.rs new file mode 100644 index 0000000..a74519f --- /dev/null +++ b/crates/nvisy-rig/src/agent/vision.rs @@ -0,0 +1,115 @@ +//! Vision agent for VLM-powered tasks. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; + +use super::tool::ScratchpadTool; +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "VisionAgent"; +const DESCRIPTION: &str = + "Agent for vision-language model tasks including image description, OCR, and object detection"; + +const PREAMBLE: &str = "\ +You are a vision analysis assistant specialized in understanding and describing visual content. +Your task is to analyze images and provide accurate, detailed information based on what you observe. +Always be precise and factual in your descriptions. If you cannot determine something with certainty, say so. +When outputting structured data, use valid JSON format."; + +const PROMPT_DESCRIBE: &str = "Describe this image concisely in 1-2 sentences."; + +const PROMPT_DESCRIBE_DETAILED: &str = "\ +Provide a detailed description of this image, including: +- Main subjects and objects +- Text visible in the image +- Colors and visual style +- Layout and composition"; + +const PROMPT_EXTRACT_TEXT: &str = "\ +Extract all text visible in this image. +Preserve the original formatting and structure as much as possible. +If no text is visible, respond with 'No text detected.'"; + +const PROMPT_DETECT_OBJECTS: &str = "\ +List all objects and entities visible in this image. +For each object, provide: +- Object type/name +- Brief description +- Approximate location (e.g., top-left, center, bottom-right) + +Format as a JSON array."; + +/// Agent for vision-language model tasks. +/// +/// Handles tasks that require understanding visual content: +/// - Image description (brief and detailed) +/// - Generative OCR (text extraction from images) +/// - Object detection +/// - VLM-based document partitioning +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For drafting and refining descriptions iteratively +pub struct VisionAgent { + agent: Agent, + model_name: String, +} + +impl VisionAgent { + /// Creates a new vision agent with the given completion provider. + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad for drafting) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE); + + let agent = if with_tools { + builder.tool(ScratchpadTool::new()).build() + } else { + builder.build() + }; + + Self { agent, model_name } + } + + /// Generates a brief description of an image. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] + pub async fn describe(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_DESCRIBE, image_base64); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "describe completed"); + Ok(response) + } + + /// Generates a detailed description of an image. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] + pub async fn describe_detailed(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_DESCRIBE_DETAILED, image_base64); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "describe_detailed completed"); + Ok(response) + } + + /// Extracts text from an image using generative OCR. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] + pub async fn extract_text(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_EXTRACT_TEXT, image_base64); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "extract_text completed"); + Ok(response) + } + + /// Detects and lists objects in an image. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] + pub async fn detect_objects(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_DETECT_OBJECTS, image_base64); + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "detect_objects completed"); + Ok(response) + } +} diff --git a/crates/nvisy-rig/src/chat/agent/context.rs b/crates/nvisy-rig/src/chat/agent/context.rs deleted file mode 100644 index d7468e4..0000000 --- a/crates/nvisy-rig/src/chat/agent/context.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! Agent context for a single request. - -use crate::rag::{RetrievedChunk, estimate_tokens}; -use crate::session::Session; - -/// Context for an agent request. -#[derive(Debug, Clone)] -pub struct AgentContext { - /// The session this request belongs to. - session: Session, - - /// The user's message. - message: String, - - /// Retrieved document chunks for RAG. - retrieved_chunks: Vec, -} - -impl AgentContext { - /// Creates a new agent context. - pub fn new(session: Session, message: String, retrieved_chunks: Vec) -> Self { - Self { - session, - message, - retrieved_chunks, - } - } - - /// Returns the session. - pub fn session(&self) -> &Session { - &self.session - } - - /// Returns the user's message. - pub fn message(&self) -> &str { - &self.message - } - - /// Returns the retrieved chunks. - pub fn retrieved_chunks(&self) -> &[RetrievedChunk] { - &self.retrieved_chunks - } - - /// Returns whether there are any retrieved chunks. - pub fn has_context(&self) -> bool { - !self.retrieved_chunks.is_empty() - } - - /// Returns the total token count of retrieved chunks (estimated). - pub fn context_tokens(&self) -> u32 { - self.retrieved_chunks - .iter() - .filter_map(|c| c.content.as_deref()) - .map(estimate_tokens) - .sum() - } -} - -#[cfg(test)] -mod tests { - use uuid::Uuid; - - use super::*; - use crate::session::CreateSession; - - #[test] - fn context_without_chunks() { - let session = Session::new(CreateSession::new( - Uuid::now_v7(), - Uuid::now_v7(), - Uuid::now_v7(), - )); - let context = AgentContext::new(session, "Hello".to_string(), Vec::new()); - - assert!(!context.has_context()); - assert_eq!(context.context_tokens(), 0); - } -} diff --git a/crates/nvisy-rig/src/chat/agent/executor.rs b/crates/nvisy-rig/src/chat/agent/executor.rs deleted file mode 100644 index 02572fd..0000000 --- a/crates/nvisy-rig/src/chat/agent/executor.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! Agent executor that runs the conversation loop. - -use std::sync::Arc; - -use futures::StreamExt; -use futures::stream::BoxStream; - -use super::{AgentConfig, AgentContext, ChatEvent}; -use crate::Result; -use crate::provider::{ModelRef, ProviderRegistry}; -use crate::tool::ToolRegistry; - -/// Executor for running the agent loop. -pub struct AgentExecutor { - config: AgentConfig, - providers: Arc, - tools: Arc, - context: AgentContext, - model_override: Option, -} - -impl AgentExecutor { - /// Creates a new executor. - pub fn new( - config: AgentConfig, - providers: Arc, - tools: Arc, - context: AgentContext, - model_override: Option, - ) -> Self { - Self { - config, - providers, - tools, - context, - model_override, - } - } - - /// Runs the agent loop and returns a stream of events. - pub async fn run(self) -> Result>> { - // TODO: Implement the actual agent loop: - // 1. Build the prompt with system message, context, and history - // 2. Stream completion from the provider - // 3. Parse tool calls from the response - // 4. Execute tools and collect results - // 5. If tools were called, loop back to step 2 - // 6. Extract proposed edits from tool results - // 7. Apply auto-apply policies - // 8. Emit final Done event - - let _ = ( - &self.config, - &self.providers, - &self.tools, - &self.context, - &self.model_override, - ); - - // For now, return an empty stream - let stream = futures::stream::empty(); - Ok(stream.boxed()) - } -} diff --git a/crates/nvisy-rig/src/chat/agent/mod.rs b/crates/nvisy-rig/src/chat/agent/mod.rs deleted file mode 100644 index e0277e1..0000000 --- a/crates/nvisy-rig/src/chat/agent/mod.rs +++ /dev/null @@ -1,141 +0,0 @@ -//! Agent module for orchestrating AI-powered document processing. -//! -//! The agent is responsible for: -//! - Managing the conversation loop with the LLM -//! - Executing tool calls -//! - Proposing and applying edits -//! - Streaming responses back to the client - -mod context; -mod executor; -mod prompt; - -use std::sync::Arc; - -pub use context::AgentContext; -pub use executor::AgentExecutor; -use futures::stream::BoxStream; -pub use prompt::PromptBuilder; -use uuid::Uuid; - -use super::ChatEvent; -use crate::Result; -use crate::provider::{ModelRef, ProviderRegistry}; -use crate::rag::RetrievedChunk; -use crate::session::Session; -use crate::tool::ToolRegistry; -use crate::tool::edit::ProposedEdit; - -/// Configuration for the agent. -#[derive(Debug, Clone)] -pub struct AgentConfig { - /// Maximum number of tool call iterations. - pub max_iterations: usize, - - /// Maximum tokens for completion. - pub max_tokens: u32, - - /// Temperature for generation. - pub temperature: f32, - - /// Whether to include thinking in output. - pub include_thinking: bool, -} - -impl Default for AgentConfig { - fn default() -> Self { - Self { - max_iterations: 10, - max_tokens: 4096, - temperature: 0.7, - include_thinking: false, - } - } -} - -/// The core agent that processes chat messages. -pub struct Agent { - config: AgentConfig, - providers: Arc, - tools: Arc, -} - -impl Agent { - /// Creates a new agent. - pub fn new( - config: AgentConfig, - providers: Arc, - tools: Arc, - ) -> Self { - Self { - config, - providers, - tools, - } - } - - /// Processes a chat message and returns a stream of events. - /// - /// The `retrieved_chunks` should be pre-fetched using the RAG system. - pub async fn process( - &self, - session: &Session, - message: &str, - retrieved_chunks: Vec, - model_override: Option<&ModelRef>, - ) -> Result>> { - // Build context for this request - let context = AgentContext::new(session.clone(), message.to_string(), retrieved_chunks); - - // Create executor - let executor = AgentExecutor::new( - self.config.clone(), - self.providers.clone(), - self.tools.clone(), - context, - model_override.cloned(), - ); - - // Run the agent loop - executor.run().await - } - - /// Returns proposed edits from an agent run. - pub fn extract_edits(&self, _events: &[ChatEvent]) -> Vec { - // Extract proposed edits from the event stream - // This is called after processing to collect all edits - Vec::new() - } -} - -/// Result of an agent run. -#[derive(Debug, Clone)] -pub struct AgentResult { - /// The final response text. - pub response: String, - - /// Message ID. - pub message_id: Uuid, - - /// Proposed edits. - pub proposed_edits: Vec, - - /// Edits that were auto-applied. - pub applied_edits: Vec, - - /// Total tokens used. - pub total_tokens: u32, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn agent_config_defaults() { - let config = AgentConfig::default(); - assert_eq!(config.max_iterations, 10); - assert_eq!(config.max_tokens, 4096); - assert!(!config.include_thinking); - } -} diff --git a/crates/nvisy-rig/src/chat/agent/prompt.rs b/crates/nvisy-rig/src/chat/agent/prompt.rs deleted file mode 100644 index 232ca7e..0000000 --- a/crates/nvisy-rig/src/chat/agent/prompt.rs +++ /dev/null @@ -1,181 +0,0 @@ -//! Prompt building for the agent. - -use crate::rag::RetrievedChunk; -use crate::session::{Message, Session}; -use crate::tool::ToolDefinition; - -/// Builder for constructing agent prompts. -#[derive(Debug, Clone)] -pub struct PromptBuilder { - system_prompt: String, - tools: Vec, - context_chunks: Vec, - history: Vec, - user_message: String, -} - -impl PromptBuilder { - /// Creates a new prompt builder with the default system prompt. - pub fn new() -> Self { - Self { - system_prompt: default_system_prompt(), - tools: Vec::new(), - context_chunks: Vec::new(), - history: Vec::new(), - user_message: String::new(), - } - } - - /// Sets a custom system prompt. - pub fn with_system_prompt(mut self, prompt: impl Into) -> Self { - self.system_prompt = prompt.into(); - self - } - - /// Adds available tools. - pub fn with_tools(mut self, tools: Vec) -> Self { - self.tools = tools; - self - } - - /// Adds retrieved context chunks. - pub fn with_context(mut self, chunks: Vec) -> Self { - self.context_chunks = chunks; - self - } - - /// Adds conversation history from session. - pub fn with_session(mut self, session: &Session) -> Self { - self.history = session.messages().to_vec(); - if let Some(custom_prompt) = session.system_prompt() { - self.system_prompt = custom_prompt.to_string(); - } - self - } - - /// Sets the user message. - pub fn with_user_message(mut self, message: impl Into) -> Self { - self.user_message = message.into(); - self - } - - /// Builds the system prompt with context. - pub fn build_system_prompt(&self) -> String { - let mut prompt = self.system_prompt.clone(); - - // Add tool descriptions - if !self.tools.is_empty() { - prompt.push_str("\n\n## Available Tools\n\n"); - for tool in &self.tools { - prompt.push_str(&format!("### {}\n{}\n\n", tool.name(), tool.description())); - } - } - - // Add context chunks - if !self.context_chunks.is_empty() { - prompt.push_str("\n\n## Document Context\n\n"); - for (i, chunk) in self.context_chunks.iter().enumerate() { - let content = chunk.content_or_placeholder(); - prompt.push_str(&format!( - "### Chunk {} (relevance: {:.2})\n```\n{}\n```\n\n", - i + 1, - chunk.score, - content - )); - } - } - - prompt - } - - /// Builds the complete message list for the API call. - pub fn build_messages(&self) -> Vec { - let mut messages = Vec::new(); - - // Add system message - messages.push(Message::system(self.build_system_prompt())); - - // Add history - messages.extend(self.history.clone()); - - // Add current user message - if !self.user_message.is_empty() { - messages.push(Message::user(&self.user_message)); - } - - messages - } -} - -impl Default for PromptBuilder { - fn default() -> Self { - Self::new() - } -} - -/// Default system prompt for document processing. -fn default_system_prompt() -> String { - r#"You are an AI assistant specialized in document processing and editing. Your role is to help users understand, analyze, and modify their documents. - -## Capabilities - -You can: -- Extract specific content (tables, sections, figures) -- Redact sensitive information (PII, confidential data) -- Summarize or restructure content -- Answer questions about the document -- Make precise edits as requested - -## Guidelines - -1. **Be precise**: When making edits, be specific about locations and changes. -2. **Preserve structure**: Maintain the document's formatting and organization. -3. **Confirm before destructive changes**: For irreversible operations, confirm with the user first. -4. **Reference accurately**: When citing content, use exact quotes or page/section references. -5. **Respect confidentiality**: Handle sensitive content appropriately. - -## Tool Usage - -Use the available tools to: -- Read document content -- Make edits -- Extract specific elements -- Search within the document - -Always explain what you're doing and why."# - .to_string() -} - -#[cfg(test)] -mod tests { - use uuid::Uuid; - - use super::*; - use crate::rag::ChunkMetadata; - - #[test] - fn prompt_builder_default() { - let builder = PromptBuilder::new(); - let system = builder.build_system_prompt(); - - assert!(system.contains("document processing")); - assert!(system.contains("Capabilities")); - } - - #[test] - fn prompt_builder_with_context() { - let chunk = RetrievedChunk::new( - Uuid::nil(), - Uuid::nil(), - 0.95, - ChunkMetadata::new(0, 0, 100), - ) - .with_content("test content".to_string()); - - let builder = PromptBuilder::new().with_context(vec![chunk]); - - let system = builder.build_system_prompt(); - assert!(system.contains("Document Context")); - assert!(system.contains("test content")); - } -} diff --git a/crates/nvisy-rig/src/chat/event.rs b/crates/nvisy-rig/src/chat/event.rs deleted file mode 100644 index 486a0d0..0000000 --- a/crates/nvisy-rig/src/chat/event.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! Chat events emitted during streaming. - -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::ChatResponse; -use crate::tool::edit::ProposedEdit; -use crate::tool::{ToolCall, ToolResult}; - -/// Events emitted during chat processing. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ChatEvent { - /// Agent is thinking/planning. - Thinking { content: String }, - - /// Text delta from the model. - TextDelta { delta: String }, - - /// Agent is calling a tool. - ToolCall { call: ToolCall }, - - /// Tool execution completed. - ToolResult { result: ToolResult }, - - /// Agent proposes an edit to the document. - ProposedEdit { edit: ProposedEdit }, - - /// Edit was auto-applied based on policy. - EditApplied { edit_id: Uuid }, - - /// Chat response completed. - Done { response: ChatResponse }, - - /// Error occurred during processing. - Error { message: String }, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn chat_event_serialization() { - let event = ChatEvent::TextDelta { - delta: "Hello".to_string(), - }; - - let json = serde_json::to_string(&event).expect("ChatEvent should serialize to JSON"); - assert!(json.contains("text_delta")); - assert!(json.contains("Hello")); - } -} diff --git a/crates/nvisy-rig/src/chat/mod.rs b/crates/nvisy-rig/src/chat/mod.rs deleted file mode 100644 index 9766167..0000000 --- a/crates/nvisy-rig/src/chat/mod.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Chat service for AI-powered document conversations. -//! -//! This module provides: -//! - [`ChatService`] - Main entry point for chat functionality -//! - [`ChatStream`] - Streaming chat response -//! - [`ChatEvent`] - Events emitted during chat -//! - [`ChatResponse`] - Complete response after stream ends -//! - [`UsageStats`] - Token usage statistics -//! - [`agent`] - Agent execution for processing chat messages - -pub mod agent; -mod event; -mod response; -mod service; -mod stream; -mod usage; - -pub use agent::{Agent, AgentConfig, AgentContext, AgentExecutor, PromptBuilder}; -pub use event::ChatEvent; -pub use response::ChatResponse; -pub use service::ChatService; -pub use stream::ChatStream; -pub use usage::UsageStats; diff --git a/crates/nvisy-rig/src/chat/response.rs b/crates/nvisy-rig/src/chat/response.rs deleted file mode 100644 index dc3f463..0000000 --- a/crates/nvisy-rig/src/chat/response.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Chat response types. - -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::UsageStats; -use crate::tool::edit::ProposedEdit; - -/// Complete chat response after stream ends. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ChatResponse { - /// Unique message ID. - pub id: Uuid, - - /// Complete response text. - pub content: String, - - /// Model used for completion. - pub model: String, - - /// Token usage statistics. - pub usage: UsageStats, - - /// Proposed edits from this response. - pub proposed_edits: Vec, - - /// Edits that were auto-applied. - pub applied_edits: Vec, -} - -impl ChatResponse { - /// Creates a new chat response. - pub fn new(content: String, model: String, usage: UsageStats) -> Self { - Self { - id: Uuid::now_v7(), - content, - model, - usage, - proposed_edits: Vec::new(), - applied_edits: Vec::new(), - } - } - - /// Adds proposed edits to the response. - pub fn with_proposed_edits(mut self, edits: Vec) -> Self { - self.proposed_edits = edits; - self - } - - /// Adds applied edits to the response. - pub fn with_applied_edits(mut self, edit_ids: Vec) -> Self { - self.applied_edits = edit_ids; - self - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn chat_response_builder() { - let response = ChatResponse::new( - "Test content".to_string(), - "gpt-4".to_string(), - UsageStats::default(), - ); - - assert!(!response.id.is_nil()); - assert_eq!(response.content, "Test content"); - assert_eq!(response.model, "gpt-4"); - } -} diff --git a/crates/nvisy-rig/src/chat/service.rs b/crates/nvisy-rig/src/chat/service.rs deleted file mode 100644 index 698ad1a..0000000 --- a/crates/nvisy-rig/src/chat/service.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Chat service for managing sessions and conversations. - -use std::sync::Arc; - -use nvisy_nats::NatsClient; -use uuid::Uuid; - -use super::ChatStream; -use crate::Result; -use crate::provider::{ModelRef, ProviderRegistry}; -use crate::session::{CreateSession, Session, SessionStore}; -use crate::tool::ToolRegistry; -use crate::tool::edit::ApplyResult; - -/// Inner state for [`ChatService`]. -struct ChatServiceInner { - providers: ProviderRegistry, - tools: ToolRegistry, - sessions: SessionStore, -} - -/// Chat service for AI-powered document conversations. -/// -/// This type is cheap to clone and can be shared across threads. -/// -/// Provides a high-level API for: -/// - Creating and managing chat sessions -/// - Streaming chat responses with tool use -/// - Approving and applying document edits -#[derive(Clone)] -pub struct ChatService { - inner: Arc, -} - -impl ChatService { - /// Creates a new ChatService with automatic ToolRegistry and SessionStore. - pub async fn new(providers: ProviderRegistry, nats: NatsClient) -> Result { - let tools = ToolRegistry::with_defaults(); - let sessions = SessionStore::new(nats).await?; - - Ok(Self { - inner: Arc::new(ChatServiceInner { - providers, - tools, - sessions, - }), - }) - } - - /// Creates a new ChatService with custom tools and session store. - pub fn with_components( - providers: ProviderRegistry, - tools: ToolRegistry, - sessions: SessionStore, - ) -> Self { - Self { - inner: Arc::new(ChatServiceInner { - providers, - tools, - sessions, - }), - } - } - - /// Creates a new chat session for a document. - pub async fn create_session(&self, request: CreateSession) -> Result { - let session = Session::new(request); - self.inner.sessions.create(&session).await?; - Ok(session) - } - - /// Retrieves an existing session. - pub async fn get_session(&self, session_id: Uuid) -> Result> { - self.inner.sessions.get(session_id).await - } - - /// Sends a chat message and returns a streaming response. - /// - /// The stream emits [`ChatEvent`](super::ChatEvent)s as the agent processes the request, - /// including thinking, tool calls, proposed edits, and text deltas. - pub async fn chat(&self, session_id: Uuid, message: &str) -> Result { - // Touch session to reset TTL - self.inner.sessions.touch(session_id).await?; - - // Get session - let session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - // Create chat stream - ChatStream::new(session, message.to_string(), self.clone()).await - } - - /// Sends a chat message with a specific model override. - pub async fn chat_with_model( - &self, - session_id: Uuid, - message: &str, - model: ModelRef, - ) -> Result { - // Touch session to reset TTL - self.inner.sessions.touch(session_id).await?; - - // Get session - let session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - // Create chat stream with model override - ChatStream::with_model(session, message.to_string(), Some(model), self.clone()).await - } - - /// Approves and applies pending edits. - pub async fn apply_edits(&self, session_id: Uuid, edit_ids: &[Uuid]) -> Result { - let mut session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - let result = session.apply_edits(edit_ids)?; - self.inner.sessions.update(&session).await?; - - Ok(result) - } - - /// Rejects pending edits. - pub async fn reject_edits(&self, session_id: Uuid, edit_ids: &[Uuid]) -> Result<()> { - let mut session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - session.reject_edits(edit_ids); - self.inner.sessions.update(&session).await?; - - Ok(()) - } - - /// Ends a session and cleans up all pending edits. - pub async fn end_session(&self, session_id: Uuid) -> Result<()> { - self.inner.sessions.delete(session_id).await - } - - /// Generates embeddings for text. - /// - /// Used for indexing documents into the vector store. - pub async fn embed(&self, text: &str, model: Option<&ModelRef>) -> Result> { - let (_provider, _model_name) = self.inner.providers.resolve_embedding(model)?; - - // TODO: Implement using rig-core embedding - let _ = text; - Err(crate::Error::provider( - "rig", - "embedding not yet implemented", - )) - } - - /// Returns a reference to the provider registry. - pub fn providers(&self) -> &ProviderRegistry { - &self.inner.providers - } - - /// Returns a reference to the tool registry. - pub fn tools(&self) -> &ToolRegistry { - &self.inner.tools - } - - /// Returns a reference to the session store. - pub fn sessions(&self) -> &SessionStore { - &self.inner.sessions - } -} diff --git a/crates/nvisy-rig/src/chat/stream.rs b/crates/nvisy-rig/src/chat/stream.rs deleted file mode 100644 index bff367b..0000000 --- a/crates/nvisy-rig/src/chat/stream.rs +++ /dev/null @@ -1,128 +0,0 @@ -//! Streaming chat response. - -use std::pin::Pin; -use std::task::{Context, Poll}; - -use futures::Stream; -use uuid::Uuid; - -use super::{ChatEvent, ChatResponse, ChatService, UsageStats}; -use crate::Result; -use crate::provider::ModelRef; -use crate::session::Session; -use crate::tool::edit::ProposedEdit; - -/// Streaming chat response. -/// -/// Implements `Stream>` for async iteration. -pub struct ChatStream { - session: Session, - message: String, - model_override: Option, - service: ChatService, - - // State - started: bool, - finished: bool, - accumulated_content: String, - proposed_edits: Vec, - applied_edits: Vec, -} - -impl ChatStream { - /// Creates a new chat stream. - pub async fn new(session: Session, message: String, service: ChatService) -> Result { - Ok(Self { - session, - message, - model_override: None, - service, - started: false, - finished: false, - accumulated_content: String::new(), - proposed_edits: Vec::new(), - applied_edits: Vec::new(), - }) - } - - /// Creates a new chat stream with a model override. - pub async fn with_model( - session: Session, - message: String, - model_override: Option, - service: ChatService, - ) -> Result { - Ok(Self { - session, - message, - model_override, - service, - started: false, - finished: false, - accumulated_content: String::new(), - proposed_edits: Vec::new(), - applied_edits: Vec::new(), - }) - } - - /// Returns the session ID. - pub fn session_id(&self) -> Uuid { - self.session.id() - } - - /// Returns the document ID being processed. - pub fn document_id(&self) -> Uuid { - self.session.document_id() - } - - /// Polls the underlying agent for the next event. - fn poll_next_event(&mut self, _cx: &mut Context<'_>) -> Poll>> { - if self.finished { - return Poll::Ready(None); - } - - if !self.started { - self.started = true; - - // TODO: Start the actual agent pipeline: - // 1. Retrieve relevant context via RAG - // 2. Build prompt with tools, context, and history - // 3. Stream completion from provider - // 4. Handle tool calls and proposed edits - // 5. Apply auto-apply policies - - // For now, emit a placeholder response - // These references silence unused warnings until the pipeline is implemented - let _ = (&self.message, &self.service, &self.accumulated_content); - - // Emit done event with placeholder - self.finished = true; - - let model = self - .model_override - .as_ref() - .map(|m| m.to_string()) - .unwrap_or_else(|| "default".to_string()); - - let response = ChatResponse::new( - "Agent pipeline not yet implemented".to_string(), - model, - UsageStats::default(), - ) - .with_proposed_edits(self.proposed_edits.clone()) - .with_applied_edits(self.applied_edits.clone()); - - return Poll::Ready(Some(Ok(ChatEvent::Done { response }))); - } - - Poll::Ready(None) - } -} - -impl Stream for ChatStream { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.poll_next_event(cx) - } -} diff --git a/crates/nvisy-rig/src/chat/usage.rs b/crates/nvisy-rig/src/chat/usage.rs deleted file mode 100644 index b79f634..0000000 --- a/crates/nvisy-rig/src/chat/usage.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! Token usage statistics. - -use serde::{Deserialize, Serialize}; - -/// Token usage statistics for a chat completion. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct UsageStats { - /// Number of input tokens (prompt). - pub input_tokens: u32, - - /// Number of output tokens (completion). - pub output_tokens: u32, - - /// Number of tokens used for reasoning/thinking. - pub reasoning_tokens: u32, - - /// Total tokens (input + output). - pub total_tokens: u32, - - /// Estimated cost in USD (if available). - pub estimated_cost_usd: Option, -} - -impl UsageStats { - /// Creates new usage stats. - pub fn new(input_tokens: u32, output_tokens: u32) -> Self { - Self { - input_tokens, - output_tokens, - reasoning_tokens: 0, - total_tokens: input_tokens + output_tokens, - estimated_cost_usd: None, - } - } - - /// Adds reasoning tokens. - pub fn with_reasoning_tokens(mut self, reasoning_tokens: u32) -> Self { - self.reasoning_tokens = reasoning_tokens; - self - } - - /// Sets the estimated cost. - pub fn with_cost(mut self, cost_usd: f64) -> Self { - self.estimated_cost_usd = Some(cost_usd); - self - } - - /// Accumulates usage from another stats instance. - pub fn accumulate(&mut self, other: &UsageStats) { - self.input_tokens += other.input_tokens; - self.output_tokens += other.output_tokens; - self.reasoning_tokens += other.reasoning_tokens; - self.total_tokens += other.total_tokens; - - if let Some(other_cost) = other.estimated_cost_usd { - self.estimated_cost_usd = Some(self.estimated_cost_usd.unwrap_or(0.0) + other_cost); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn usage_stats_new() { - let stats = UsageStats::new(100, 50); - assert_eq!(stats.input_tokens, 100); - assert_eq!(stats.output_tokens, 50); - assert_eq!(stats.total_tokens, 150); - } - - #[test] - fn usage_stats_accumulate() { - let mut stats = UsageStats::new(100, 50); - let other = UsageStats::new(200, 100).with_cost(0.01); - - stats.accumulate(&other); - - assert_eq!(stats.input_tokens, 300); - assert_eq!(stats.output_tokens, 150); - // 150 (original) + 300 (other) = 450 - assert_eq!(stats.total_tokens, 450); - assert_eq!(stats.estimated_cost_usd, Some(0.01)); - } -} diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs index 263cb51..926545f 100644 --- a/crates/nvisy-rig/src/error.rs +++ b/crates/nvisy-rig/src/error.rs @@ -2,6 +2,9 @@ use std::fmt; +use rig::completion::{CompletionError, PromptError}; +use rig::embeddings::EmbeddingError; + /// Result type alias for rig operations. pub type Result = std::result::Result; @@ -12,41 +15,29 @@ pub enum Error { #[error("provider error: {provider}: {message}")] Provider { provider: String, message: String }, - /// Session error (not found, expired, etc.) - #[error("session error: {0}")] - Session(String), - - /// Agent execution error. - #[error("agent error: {0}")] - Agent(String), - - /// Tool execution error. - #[error("tool error: {tool}: {message}")] - Tool { tool: String, message: String }, - /// RAG retrieval error. #[error("retrieval error: {0}")] Retrieval(String), /// Embedding error. #[error("embedding error: {0}")] - Embedding(String), + Embedding(#[from] EmbeddingError), + + /// Completion error. + #[error("completion error: {0}")] + Completion(#[from] CompletionError), - /// Edit error. - #[error("edit error: {0}")] - Edit(String), + /// Prompt error. + #[error("prompt error: {0}")] + Prompt(#[from] PromptError), /// Configuration error. #[error("configuration error: {0}")] Config(String), - /// Serialization error. - #[error("serialization error: {0}")] - Serialization(#[from] serde_json::Error), - - /// I/O error. - #[error("io error: {0}")] - Io(#[from] std::io::Error), + /// Parse error (JSON parsing, etc.) + #[error("parse error: {0}")] + Parse(String), } impl Error { @@ -58,46 +49,44 @@ impl Error { } } - /// Creates a session error. - pub fn session(message: impl fmt::Display) -> Self { - Self::Session(message.to_string()) - } - - /// Creates an agent error. - pub fn agent(message: impl fmt::Display) -> Self { - Self::Agent(message.to_string()) - } - - /// Creates a tool error. - pub fn tool(tool: impl fmt::Display, message: impl fmt::Display) -> Self { - Self::Tool { - tool: tool.to_string(), - message: message.to_string(), - } - } - /// Creates a retrieval error. pub fn retrieval(message: impl fmt::Display) -> Self { Self::Retrieval(message.to_string()) } - /// Creates an embedding error. - pub fn embedding(message: impl fmt::Display) -> Self { - Self::Embedding(message.to_string()) - } - - /// Creates an edit error. - pub fn edit(message: impl fmt::Display) -> Self { - Self::Edit(message.to_string()) - } - /// Creates a configuration error. pub fn config(message: impl fmt::Display) -> Self { Self::Config(message.to_string()) } + /// Creates a parse error. + pub fn parse(message: impl fmt::Display) -> Self { + Self::Parse(message.to_string()) + } + /// Returns true if this error is retryable. pub fn is_retryable(&self) -> bool { - matches!(self, Self::Provider { .. } | Self::Io(_)) + matches!(self, Self::Provider { .. }) + } +} + +impl From for nvisy_core::Error { + fn from(err: Error) -> Self { + let (kind, message) = match &err { + Error::Provider { provider, message } => ( + nvisy_core::ErrorKind::ExternalError, + format!("{}: {}", provider, message), + ), + Error::Retrieval(msg) => (nvisy_core::ErrorKind::ExternalError, msg.clone()), + Error::Embedding(_) => (nvisy_core::ErrorKind::ExternalError, err.to_string()), + Error::Completion(_) => (nvisy_core::ErrorKind::ExternalError, err.to_string()), + Error::Prompt(_) => (nvisy_core::ErrorKind::InvalidInput, err.to_string()), + Error::Config(msg) => (nvisy_core::ErrorKind::Configuration, msg.clone()), + Error::Parse(msg) => (nvisy_core::ErrorKind::Serialization, msg.clone()), + }; + + nvisy_core::Error::new(kind) + .with_message(message) + .with_source(err) } } diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs index ace76bf..38416f0 100644 --- a/crates/nvisy-rig/src/lib.rs +++ b/crates/nvisy-rig/src/lib.rs @@ -2,16 +2,12 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![doc = include_str!("../README.md")] -pub mod chat; +pub mod agent; mod error; pub mod provider; pub mod rag; -mod service; -mod session; -mod tool; pub use error::{Error, Result}; -pub use service::{RigConfig, RigService}; /// Tracing target for the main library. pub const TRACING_TARGET: &str = "nvisy_rig"; diff --git a/crates/nvisy-rig/src/provider/completion/credentials.rs b/crates/nvisy-rig/src/provider/completion/credentials.rs new file mode 100644 index 0000000..7b0bcbd --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/credentials.rs @@ -0,0 +1,32 @@ +//! Completion provider credentials. + +use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; + +pub use super::super::credentials::{ApiKeyCredentials, OllamaCredentials}; + +/// Credentials for completion providers. +#[derive(Debug, Clone, Serialize, Deserialize, IntoStaticStr)] +#[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] +pub enum CompletionCredentials { + /// OpenAI credentials. + OpenAi(ApiKeyCredentials), + /// Anthropic credentials. + Anthropic(ApiKeyCredentials), + /// Cohere credentials. + Cohere(ApiKeyCredentials), + /// Google Gemini credentials. + Gemini(ApiKeyCredentials), + /// Perplexity credentials. + Perplexity(ApiKeyCredentials), + /// Ollama credentials (local, no API key required). + Ollama(OllamaCredentials), +} + +impl CompletionCredentials { + /// Returns the provider kind as a string. + pub fn kind(&self) -> &'static str { + self.into() + } +} diff --git a/crates/nvisy-rig/src/provider/completion/mod.rs b/crates/nvisy-rig/src/provider/completion/mod.rs new file mode 100644 index 0000000..bb505a4 --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/mod.rs @@ -0,0 +1,17 @@ +//! Completion models and providers. + +mod credentials; +mod model; +mod provider; +mod response; +mod rig_impl; + +pub use credentials::CompletionCredentials; +pub use model::{ + AnthropicModel, CohereCompletionModel, CompletionModel, GeminiCompletionModel, + OpenAiCompletionModel, PerplexityModel, +}; +pub use provider::CompletionProvider; +// Response types are part of the public API for CompletionModel trait consumers +#[allow(unused_imports)] +pub use response::{ProviderResponse, ProviderStreamingResponse}; diff --git a/crates/nvisy-rig/src/provider/completion/model.rs b/crates/nvisy-rig/src/provider/completion/model.rs new file mode 100644 index 0000000..b2f59bb --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/model.rs @@ -0,0 +1,135 @@ +//! Type-safe completion model references. + +use serde::{Deserialize, Serialize}; +use strum::{AsRefStr, Display, EnumString}; + +/// Reference to a completion/chat model. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "provider", content = "model", rename_all = "snake_case")] +pub enum CompletionModel { + /// OpenAI completion models. + OpenAi(OpenAiCompletionModel), + /// Anthropic models. + Anthropic(AnthropicModel), + /// Cohere completion models. + Cohere(CohereCompletionModel), + /// Google Gemini completion models. + Gemini(GeminiCompletionModel), + /// Perplexity models. + Perplexity(PerplexityModel), + /// Ollama local models (model name as string). + #[cfg(feature = "ollama")] + Ollama(String), +} + +/// OpenAI completion models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum OpenAiCompletionModel { + /// GPT-4o (multimodal flagship) + #[strum(serialize = "gpt-4o")] + Gpt4o, + /// GPT-4o mini (fast, affordable) + #[strum(serialize = "gpt-4o-mini")] + Gpt4oMini, + /// GPT-4 Turbo + #[strum(serialize = "gpt-4-turbo")] + Gpt4Turbo, + /// o1 (reasoning) + #[strum(serialize = "o1")] + O1, + /// o1 mini (fast reasoning) + #[strum(serialize = "o1-mini")] + O1Mini, + /// o3 mini (latest reasoning) + #[strum(serialize = "o3-mini")] + O3Mini, +} + +/// Anthropic models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum AnthropicModel { + /// Claude Opus 4 (most capable) + #[strum(serialize = "claude-opus-4-20250514")] + ClaudeOpus4, + /// Claude Sonnet 4 (balanced) + #[strum(serialize = "claude-sonnet-4-20250514")] + ClaudeSonnet4, + /// Claude Haiku 3.5 (fast) + #[strum(serialize = "claude-3-5-haiku-20241022")] + ClaudeHaiku35, +} + +/// Cohere completion models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum CohereCompletionModel { + /// Command R+ (most capable) + #[strum(serialize = "command-r-plus")] + CommandRPlus, + /// Command R (balanced) + #[strum(serialize = "command-r")] + CommandR, + /// Command (legacy) + #[strum(serialize = "command")] + Command, +} + +/// Google Gemini completion models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum GeminiCompletionModel { + /// Gemini 2.0 Flash (fast, multimodal) + #[strum(serialize = "gemini-2.0-flash")] + Gemini20Flash, + /// Gemini 2.0 Flash Thinking (reasoning) + #[strum(serialize = "gemini-2.0-flash-thinking-exp")] + Gemini20FlashThinking, + /// Gemini 1.5 Pro (long context) + #[strum(serialize = "gemini-1.5-pro")] + Gemini15Pro, + /// Gemini 1.5 Flash (fast) + #[strum(serialize = "gemini-1.5-flash")] + Gemini15Flash, +} + +/// Perplexity models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum PerplexityModel { + /// Sonar (online, search-augmented) + #[strum(serialize = "sonar")] + Sonar, + /// Sonar Pro (online, more capable) + #[strum(serialize = "sonar-pro")] + SonarPro, + /// Sonar Reasoning (online, reasoning) + #[strum(serialize = "sonar-reasoning")] + SonarReasoning, +} + +impl CompletionModel { + /// Returns the model identifier string. + pub fn as_str(&self) -> &str { + match self { + Self::OpenAi(m) => m.as_ref(), + Self::Anthropic(m) => m.as_ref(), + Self::Cohere(m) => m.as_ref(), + Self::Gemini(m) => m.as_ref(), + Self::Perplexity(m) => m.as_ref(), + #[cfg(feature = "ollama")] + Self::Ollama(m) => m.as_str(), + } + } +} diff --git a/crates/nvisy-rig/src/provider/completion/provider.rs b/crates/nvisy-rig/src/provider/completion/provider.rs new file mode 100644 index 0000000..946e25d --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/provider.rs @@ -0,0 +1,284 @@ +//! Completion provider abstraction. + +use std::sync::Arc; + +use nvisy_core::Provider; +#[cfg(feature = "ollama")] +use rig::client::Nothing; +use rig::completion::{AssistantContent, CompletionError, CompletionModel as RigCompletionModel}; +use rig::message::Message; +use rig::one_or_many::OneOrMany; +use rig::prelude::CompletionClient; +#[cfg(feature = "ollama")] +use rig::providers::ollama; +use rig::providers::{anthropic, cohere, gemini, openai, perplexity}; + +use super::credentials::CompletionCredentials; +use super::model::{AnthropicModel, CompletionModel}; +use crate::Error; + +/// Completion provider that wraps different rig completion model implementations. +/// +/// This is a cheaply cloneable wrapper around an `Arc`. +#[derive(Clone)] +pub struct CompletionProvider(Arc); + +pub(crate) enum CompletionService { + OpenAi { + model: openai::CompletionModel, + model_name: String, + }, + Anthropic { + model: anthropic::completion::CompletionModel, + model_name: String, + }, + Cohere { + model: cohere::CompletionModel, + model_name: String, + }, + Gemini { + model: gemini::completion::CompletionModel, + model_name: String, + }, + Perplexity { + model: perplexity::CompletionModel, + model_name: String, + }, + #[cfg(feature = "ollama")] + Ollama { + client: ollama::Client, + model_name: String, + }, +} + +#[async_trait::async_trait] +impl Provider for CompletionProvider { + type Credentials = CompletionCredentials; + type Params = CompletionModel; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let inner = match (credentials, params) { + (CompletionCredentials::OpenAi(c), CompletionModel::OpenAi(m)) => { + let client = openai::Client::new(&c.api_key) + .map_err(|e| Error::provider("openai", e.to_string()))? + .completions_api(); + CompletionService::OpenAi { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Anthropic(c), CompletionModel::Anthropic(m)) => { + let client = anthropic::Client::new(&c.api_key) + .map_err(|e| Error::provider("anthropic", e.to_string()))?; + CompletionService::Anthropic { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Cohere(c), CompletionModel::Cohere(m)) => { + let client = cohere::Client::new(&c.api_key) + .map_err(|e| Error::provider("cohere", e.to_string()))?; + CompletionService::Cohere { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Gemini(c), CompletionModel::Gemini(m)) => { + let client = gemini::Client::new(&c.api_key) + .map_err(|e| Error::provider("gemini", e.to_string()))?; + CompletionService::Gemini { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Perplexity(c), CompletionModel::Perplexity(m)) => { + let client = perplexity::Client::new(&c.api_key) + .map_err(|e| Error::provider("perplexity", e.to_string()))?; + CompletionService::Perplexity { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + #[cfg(feature = "ollama")] + (CompletionCredentials::Ollama(c), CompletionModel::Ollama(model_name)) => { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(&c.base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + CompletionService::Ollama { + client, + model_name: model_name.clone(), + } + } + #[allow(unreachable_patterns)] + _ => return Err(Error::config("mismatched credentials and model provider").into()), + }; + Ok(Self(Arc::new(inner))) + } +} + +impl CompletionProvider { + /// Returns a reference to the inner provider. + pub(crate) fn inner(&self) -> &CompletionService { + &self.0 + } + + /// Creates an Ollama completion provider (convenience for local development). + #[cfg(feature = "ollama")] + pub fn ollama(base_url: &str, model_name: &str) -> nvisy_core::Result { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + Ok(Self(Arc::new(CompletionService::Ollama { + client, + model_name: model_name.to_string(), + }))) + } + + /// Creates an Anthropic completion provider with a specific model. + pub fn anthropic(api_key: &str, model: AnthropicModel) -> nvisy_core::Result { + let client = anthropic::Client::new(api_key) + .map_err(|e| Error::provider("anthropic", e.to_string()))?; + Ok(Self(Arc::new(CompletionService::Anthropic { + model: client.completion_model(model.as_ref()), + model_name: model.as_ref().to_string(), + }))) + } + + /// Returns the model name. + pub fn model_name(&self) -> &str { + match self.0.as_ref() { + CompletionService::OpenAi { model_name, .. } => model_name, + CompletionService::Anthropic { model_name, .. } => model_name, + CompletionService::Cohere { model_name, .. } => model_name, + CompletionService::Gemini { model_name, .. } => model_name, + CompletionService::Perplexity { model_name, .. } => model_name, + #[cfg(feature = "ollama")] + CompletionService::Ollama { model_name, .. } => model_name, + } + } + + /// Returns the provider name. + pub fn provider_name(&self) -> &'static str { + match self.0.as_ref() { + CompletionService::OpenAi { .. } => "openai", + CompletionService::Anthropic { .. } => "anthropic", + CompletionService::Cohere { .. } => "cohere", + CompletionService::Gemini { .. } => "gemini", + CompletionService::Perplexity { .. } => "perplexity", + #[cfg(feature = "ollama")] + CompletionService::Ollama { .. } => "ollama", + } + } + + /// Sends a completion request with the given prompt and chat history. + pub async fn complete( + &self, + prompt: &str, + chat_history: Vec, + ) -> nvisy_core::Result { + let model_name = self.model_name().to_string(); + let map_err = |e: CompletionError| { + nvisy_core::Error::from(Error::provider(&model_name, e.to_string())) + }; + + match self.0.as_ref() { + CompletionService::OpenAi { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Anthropic { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Cohere { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Gemini { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Perplexity { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + #[cfg(feature = "ollama")] + CompletionService::Ollama { client, model_name } => { + let model = client.completion_model(model_name); + model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err) + } + } + } +} + +/// Extracts text content from assistant content choices. +fn extract_text_content(choice: &OneOrMany) -> String { + choice + .iter() + .filter_map(|content| match content { + AssistantContent::Text(text) => Some(text.text()), + _ => None, + }) + .collect::>() + .join("") +} + +impl std::fmt::Debug for CompletionProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.0.as_ref() { + CompletionService::OpenAi { model_name, .. } => f + .debug_struct("CompletionProvider::OpenAi") + .field("model", model_name) + .finish(), + CompletionService::Anthropic { model_name, .. } => f + .debug_struct("CompletionProvider::Anthropic") + .field("model", model_name) + .finish(), + CompletionService::Cohere { model_name, .. } => f + .debug_struct("CompletionProvider::Cohere") + .field("model", model_name) + .finish(), + CompletionService::Gemini { model_name, .. } => f + .debug_struct("CompletionProvider::Gemini") + .field("model", model_name) + .finish(), + CompletionService::Perplexity { model_name, .. } => f + .debug_struct("CompletionProvider::Perplexity") + .field("model", model_name) + .finish(), + #[cfg(feature = "ollama")] + CompletionService::Ollama { model_name, .. } => f + .debug_struct("CompletionProvider::Ollama") + .field("model", model_name) + .finish(), + } + } +} diff --git a/crates/nvisy-rig/src/provider/completion/response.rs b/crates/nvisy-rig/src/provider/completion/response.rs new file mode 100644 index 0000000..8d82b4f --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/response.rs @@ -0,0 +1,38 @@ +//! Response types for completion provider. + +use rig::completion::{GetTokenUsage, Usage}; +use serde::{Deserialize, Serialize}; + +/// Unified raw response type for CompletionProvider. +/// +/// This type normalizes responses from different providers into a common format. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProviderResponse { + /// The provider name. + pub provider: String, + /// The model name used. + pub model: String, +} + +impl GetTokenUsage for ProviderResponse { + fn token_usage(&self) -> Option { + None + } +} + +/// Streaming response placeholder for CompletionProvider. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProviderStreamingResponse { + /// The provider name. + pub provider: String, + /// The model name used. + pub model: String, + /// Token usage if available. + pub usage: Option, +} + +impl GetTokenUsage for ProviderStreamingResponse { + fn token_usage(&self) -> Option { + self.usage + } +} diff --git a/crates/nvisy-rig/src/provider/completion/rig_impl.rs b/crates/nvisy-rig/src/provider/completion/rig_impl.rs new file mode 100644 index 0000000..2da6597 --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/rig_impl.rs @@ -0,0 +1,153 @@ +//! rig-core trait implementations for CompletionProvider. + +use rig::completion::{ + CompletionError, CompletionModel as RigCompletionModel, CompletionRequest, CompletionResponse, +}; +use rig::message::Message; +use rig::one_or_many::OneOrMany; +#[cfg(feature = "ollama")] +use rig::prelude::CompletionClient; +use rig::streaming::StreamingCompletionResponse; + +use super::provider::{CompletionProvider, CompletionService}; +use super::response::{ProviderResponse, ProviderStreamingResponse}; + +impl RigCompletionModel for CompletionProvider { + type Client = (); + type Response = ProviderResponse; + type StreamingResponse = ProviderStreamingResponse; + + fn make(_client: &Self::Client, _model: impl Into) -> Self { + // This is a no-op since CompletionProvider is constructed via its own methods + panic!("CompletionProvider should be constructed via CompletionProvider::new()") + } + + async fn completion( + &self, + request: CompletionRequest, + ) -> std::result::Result, CompletionError> { + // Extract the prompt from the request's chat history (last message) + let last_message = request.chat_history.last(); + let prompt = match last_message { + Message::User { content } => content + .iter() + .filter_map(|c| match c { + rig::message::UserContent::Text(t) => Some(t.text()), + _ => None, + }) + .collect::>() + .join(""), + _ => String::new(), + }; + + // Get chat history without the last message (which is the prompt) + let chat_history: Vec = if request.chat_history.len() > 1 { + request + .chat_history + .iter() + .take(request.chat_history.len() - 1) + .cloned() + .collect() + } else { + vec![] + }; + + // Build the full prompt with preamble if present + let full_prompt = match &request.preamble { + Some(preamble) => format!("{}\n\n{}", preamble, prompt), + None => prompt, + }; + + // Delegate to the underlying model based on variant + let (choice, usage) = match self.inner() { + CompletionService::OpenAi { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Anthropic { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Cohere { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Gemini { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Perplexity { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + #[cfg(feature = "ollama")] + CompletionService::Ollama { client, model_name } => { + let model = client.completion_model(model_name); + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + }; + + Ok(CompletionResponse { + choice, + usage, + raw_response: ProviderResponse { + provider: self.provider_name().to_string(), + model: self.model_name().to_string(), + }, + }) + } + + async fn stream( + &self, + request: CompletionRequest, + ) -> std::result::Result, CompletionError> + { + // For now, streaming is not fully implemented - we'd need to unify the streaming types + // This is a placeholder that returns an error + let _ = request; + Err(CompletionError::RequestError( + "Streaming not yet implemented for CompletionProvider".into(), + )) + } +} + +/// Builds a completion request for delegation to underlying models. +fn build_request( + prompt: &str, + chat_history: &[Message], + original: &CompletionRequest, +) -> CompletionRequest { + CompletionRequest { + preamble: None, // Already incorporated into prompt + chat_history: { + let mut history = chat_history.to_vec(); + history.push(Message::User { + content: OneOrMany::one(rig::message::UserContent::text(prompt)), + }); + OneOrMany::many(history).unwrap_or_else(|_| { + OneOrMany::one(Message::User { + content: OneOrMany::one(rig::message::UserContent::text(prompt)), + }) + }) + }, + documents: original.documents.clone(), + tools: original.tools.clone(), + temperature: original.temperature, + max_tokens: original.max_tokens, + tool_choice: original.tool_choice.clone(), + additional_params: original.additional_params.clone(), + } +} diff --git a/crates/nvisy-rig/src/provider/config.rs b/crates/nvisy-rig/src/provider/config.rs deleted file mode 100644 index 1eb6e89..0000000 --- a/crates/nvisy-rig/src/provider/config.rs +++ /dev/null @@ -1,155 +0,0 @@ -//! Provider configuration types. - -use serde::{Deserialize, Serialize}; - -/// Supported AI providers. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum ProviderKind { - /// OpenAI (GPT-4, embeddings, etc.) - OpenAi, - /// Anthropic (Claude models) - Anthropic, - /// Cohere (Command, embeddings) - Cohere, - /// Google Gemini - Gemini, - /// Perplexity - Perplexity, -} - -impl ProviderKind { - /// Returns the provider name as a string. - pub fn as_str(&self) -> &'static str { - match self { - Self::OpenAi => "openai", - Self::Anthropic => "anthropic", - Self::Cohere => "cohere", - Self::Gemini => "gemini", - Self::Perplexity => "perplexity", - } - } - - /// Default completion model for this provider. - pub fn default_completion_model(&self) -> &'static str { - match self { - Self::OpenAi => "gpt-4o", - Self::Anthropic => "claude-sonnet-4-20250514", - Self::Cohere => "command-r-plus", - Self::Gemini => "gemini-2.0-flash", - Self::Perplexity => "sonar", - } - } - - /// Default embedding model for this provider. - pub fn default_embedding_model(&self) -> &'static str { - match self { - Self::OpenAi => "text-embedding-3-small", - Self::Anthropic => "text-embedding-3-small", // Uses OpenAI - Self::Cohere => "embed-english-v3.0", - Self::Gemini => "text-embedding-004", - Self::Perplexity => "text-embedding-3-small", // Uses OpenAI - } - } -} - -impl std::fmt::Display for ProviderKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -/// Configuration for a single provider. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProviderConfig { - /// Unique identifier for this provider instance. - pub id: String, - - /// The provider type. - pub kind: ProviderKind, - - /// API key for authentication. - pub api_key: String, - - /// Optional base URL override. - #[serde(default)] - pub base_url: Option, - - /// Model configuration. - #[serde(default)] - pub models: ModelConfig, -} - -impl ProviderConfig { - /// Creates a new provider configuration. - pub fn new(id: impl Into, kind: ProviderKind, api_key: impl Into) -> Self { - Self { - id: id.into(), - kind, - api_key: api_key.into(), - base_url: None, - models: ModelConfig::default_for(kind), - } - } - - /// Sets the base URL. - pub fn with_base_url(mut self, url: impl Into) -> Self { - self.base_url = Some(url.into()); - self - } - - /// Sets the model configuration. - pub fn with_models(mut self, models: ModelConfig) -> Self { - self.models = models; - self - } -} - -/// Model configuration for a provider. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ModelConfig { - /// Model for completions/chat. - pub completion: String, - - /// Model for embeddings. - pub embedding: String, - - /// Model for vision tasks. - #[serde(default)] - pub vision: Option, - - /// Maximum tokens for completions. - #[serde(default = "default_max_tokens")] - pub max_tokens: usize, - - /// Temperature for completions (0.0 - 2.0). - #[serde(default = "default_temperature")] - pub temperature: f32, -} - -fn default_max_tokens() -> usize { - 4096 -} - -fn default_temperature() -> f32 { - 0.7 -} - -impl ModelConfig { - /// Creates default model config for a provider. - pub fn default_for(kind: ProviderKind) -> Self { - Self { - completion: kind.default_completion_model().to_string(), - embedding: kind.default_embedding_model().to_string(), - vision: None, - max_tokens: default_max_tokens(), - temperature: default_temperature(), - } - } -} - -impl Default for ModelConfig { - fn default() -> Self { - Self::default_for(ProviderKind::OpenAi) - } -} diff --git a/crates/nvisy-rig/src/provider/credentials.rs b/crates/nvisy-rig/src/provider/credentials.rs new file mode 100644 index 0000000..a5b0ca8 --- /dev/null +++ b/crates/nvisy-rig/src/provider/credentials.rs @@ -0,0 +1,17 @@ +//! Shared credential types for AI providers. + +use serde::{Deserialize, Serialize}; + +/// API key credentials for AI providers. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ApiKeyCredentials { + /// API key. + pub api_key: String, +} + +/// Ollama credentials (local deployment, no API key required). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OllamaCredentials { + /// Base URL for the Ollama server. + pub base_url: String, +} diff --git a/crates/nvisy-rig/src/provider/embedding.rs b/crates/nvisy-rig/src/provider/embedding.rs deleted file mode 100644 index 418f226..0000000 --- a/crates/nvisy-rig/src/provider/embedding.rs +++ /dev/null @@ -1,71 +0,0 @@ -//! Embedding provider abstraction. -//! -//! Wraps different embedding model providers into a unified enum, -//! eliminating the need for generic parameters throughout the codebase. - -use rig::embeddings::{Embedding, EmbeddingError, EmbeddingModel}; -use rig::providers::ollama; - -/// Embedding provider that wraps different model implementations. -/// -/// This enum provides a concrete type for embedding operations, -/// removing the need for generic `M: EmbeddingModel` parameters. -/// -/// Implements [`EmbeddingModel`] so it can be used directly with rig's -/// APIs like `VectorStoreIndex` and `EmbeddingsBuilder`. -#[derive(Clone)] -pub enum EmbeddingProvider { - /// Ollama embedding model. - Ollama(ollama::EmbeddingModel), -} - -impl EmbeddingProvider { - /// Creates a new Ollama embedding provider. - pub fn ollama(base_url: &str, model: &str) -> Self { - let client = ollama::Client::from_url(base_url); - Self::Ollama(client.embedding_model(model)) - } - - /// Creates a new Ollama embedding provider with custom dimensions. - pub fn ollama_with_ndims(base_url: &str, model: &str, ndims: usize) -> Self { - let client = ollama::Client::from_url(base_url); - Self::Ollama(client.embedding_model_with_ndims(model, ndims)) - } - - /// Returns the model name. - pub fn model_name(&self) -> &str { - match self { - Self::Ollama(model) => &model.model, - } - } -} - -impl EmbeddingModel for EmbeddingProvider { - const MAX_DOCUMENTS: usize = 1024; - - fn ndims(&self) -> usize { - match self { - Self::Ollama(model) => model.ndims(), - } - } - - async fn embed_texts( - &self, - texts: impl IntoIterator + Send, - ) -> Result, EmbeddingError> { - match self { - Self::Ollama(model) => model.embed_texts(texts).await, - } - } -} - -impl std::fmt::Debug for EmbeddingProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Ollama(model) => f - .debug_struct("EmbeddingProvider::Ollama") - .field("model", &model.model) - .finish(), - } - } -} diff --git a/crates/nvisy-rig/src/provider/embedding/credentials.rs b/crates/nvisy-rig/src/provider/embedding/credentials.rs new file mode 100644 index 0000000..5c3c41a --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/credentials.rs @@ -0,0 +1,29 @@ +//! Embedding provider credentials. + +use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; + +pub use super::super::credentials::{ApiKeyCredentials, OllamaCredentials}; + +/// Credentials for embedding providers. +#[derive(Debug, Clone, Serialize, Deserialize, IntoStaticStr)] +#[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] +pub enum EmbeddingCredentials { + /// OpenAI credentials. + OpenAi(ApiKeyCredentials), + /// Cohere credentials. + Cohere(ApiKeyCredentials), + /// Google Gemini credentials. + Gemini(ApiKeyCredentials), + /// Ollama credentials. + #[cfg(feature = "ollama")] + Ollama(OllamaCredentials), +} + +impl EmbeddingCredentials { + /// Returns the provider kind as a string. + pub fn kind(&self) -> &'static str { + self.into() + } +} diff --git a/crates/nvisy-rig/src/provider/embedding/mod.rs b/crates/nvisy-rig/src/provider/embedding/mod.rs new file mode 100644 index 0000000..4f970a8 --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/mod.rs @@ -0,0 +1,12 @@ +//! Embedding models and providers. + +mod credentials; +mod model; +mod provider; +mod rig_impl; + +pub use credentials::EmbeddingCredentials; +#[cfg(feature = "ollama")] +pub use model::OllamaEmbeddingModel; +pub use model::{CohereEmbeddingModel, EmbeddingModel, GeminiEmbeddingModel, OpenAiEmbeddingModel}; +pub use provider::EmbeddingProvider; diff --git a/crates/nvisy-rig/src/provider/embedding/model.rs b/crates/nvisy-rig/src/provider/embedding/model.rs new file mode 100644 index 0000000..3a63713 --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/model.rs @@ -0,0 +1,155 @@ +//! Type-safe embedding model references. + +use serde::{Deserialize, Serialize}; +use strum::{AsRefStr, Display, EnumString}; + +/// Reference to an embedding model. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "provider", content = "model", rename_all = "snake_case")] +pub enum EmbeddingModel { + /// OpenAI embedding models. + OpenAi(OpenAiEmbeddingModel), + /// Cohere embedding models. + Cohere(CohereEmbeddingModel), + /// Google Gemini embedding models. + Gemini(GeminiEmbeddingModel), + /// Ollama local models. + #[cfg(feature = "ollama")] + Ollama(OllamaEmbeddingModel), +} + +/// OpenAI embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum OpenAiEmbeddingModel { + /// text-embedding-3-small (1536 dimensions) + #[strum(serialize = "text-embedding-3-small")] + TextEmbedding3Small, + /// text-embedding-3-large (3072 dimensions) + #[strum(serialize = "text-embedding-3-large")] + TextEmbedding3Large, + /// text-embedding-ada-002 (legacy, 1536 dimensions) + #[strum(serialize = "text-embedding-ada-002")] + TextEmbeddingAda002, +} + +impl OpenAiEmbeddingModel { + /// Returns the embedding dimensions for this model. + pub fn dimensions(&self) -> usize { + match self { + Self::TextEmbedding3Small => 1536, + Self::TextEmbedding3Large => 3072, + Self::TextEmbeddingAda002 => 1536, + } + } +} + +/// Cohere embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum CohereEmbeddingModel { + /// embed-english-v3.0 (1024 dimensions) + #[strum(serialize = "embed-english-v3.0")] + EmbedEnglishV3, + /// embed-multilingual-v3.0 (1024 dimensions) + #[strum(serialize = "embed-multilingual-v3.0")] + EmbedMultilingualV3, + /// embed-english-light-v3.0 (384 dimensions) + #[strum(serialize = "embed-english-light-v3.0")] + EmbedEnglishLightV3, + /// embed-multilingual-light-v3.0 (384 dimensions) + #[strum(serialize = "embed-multilingual-light-v3.0")] + EmbedMultilingualLightV3, +} + +impl CohereEmbeddingModel { + /// Returns the embedding dimensions for this model. + pub fn dimensions(&self) -> usize { + match self { + Self::EmbedEnglishV3 | Self::EmbedMultilingualV3 => 1024, + Self::EmbedEnglishLightV3 | Self::EmbedMultilingualLightV3 => 384, + } + } +} + +/// Google Gemini embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum GeminiEmbeddingModel { + /// text-embedding-004 (768 dimensions) + #[strum(serialize = "text-embedding-004")] + TextEmbedding004, +} + +impl GeminiEmbeddingModel { + /// Returns the embedding dimensions for this model. + pub fn dimensions(&self) -> usize { + 768 + } +} + +/// Ollama embedding model configuration. +#[cfg(feature = "ollama")] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct OllamaEmbeddingModel { + /// Model name (e.g., "nomic-embed-text", "mxbai-embed-large"). + pub name: String, + /// Embedding dimensions. + pub dimensions: usize, +} + +#[cfg(feature = "ollama")] +impl OllamaEmbeddingModel { + /// Creates a new Ollama embedding model configuration. + pub fn new(name: impl Into, dimensions: usize) -> Self { + Self { + name: name.into(), + dimensions, + } + } + + /// nomic-embed-text (768 dimensions) + pub fn nomic_embed_text() -> Self { + Self::new("nomic-embed-text", 768) + } + + /// mxbai-embed-large (1024 dimensions) + pub fn mxbai_embed_large() -> Self { + Self::new("mxbai-embed-large", 1024) + } + + /// all-minilm (384 dimensions) + pub fn all_minilm() -> Self { + Self::new("all-minilm", 384) + } +} + +impl EmbeddingModel { + /// Returns the model identifier string. + pub fn as_str(&self) -> &str { + match self { + Self::OpenAi(m) => m.as_ref(), + Self::Cohere(m) => m.as_ref(), + Self::Gemini(m) => m.as_ref(), + #[cfg(feature = "ollama")] + Self::Ollama(m) => &m.name, + } + } + + /// Returns the embedding dimensions for this model. + pub fn dimensions(&self) -> usize { + match self { + Self::OpenAi(m) => m.dimensions(), + Self::Cohere(m) => m.dimensions(), + Self::Gemini(m) => m.dimensions(), + #[cfg(feature = "ollama")] + Self::Ollama(m) => m.dimensions, + } + } +} diff --git a/crates/nvisy-rig/src/provider/embedding/provider.rs b/crates/nvisy-rig/src/provider/embedding/provider.rs new file mode 100644 index 0000000..877bffd --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/provider.rs @@ -0,0 +1,203 @@ +//! Embedding provider abstraction. + +use std::sync::Arc; + +use nvisy_core::Provider; +#[cfg(feature = "ollama")] +use rig::client::Nothing; +use rig::embeddings::{Embedding, EmbeddingModel as RigEmbeddingModel}; +use rig::prelude::EmbeddingsClient; +#[cfg(feature = "ollama")] +use rig::providers::ollama; +use rig::providers::{cohere, gemini, openai}; + +use super::credentials::EmbeddingCredentials; +use super::model::EmbeddingModel; +#[cfg(feature = "ollama")] +use super::model::OllamaEmbeddingModel; +use crate::Error; + +/// Default maximum documents per embedding request. +/// +/// This is a conservative default; individual providers may support more. +pub(crate) const DEFAULT_MAX_DOCUMENTS: usize = 96; + +/// Embedding provider that wraps different rig embedding model implementations. +/// +/// This is a cheaply cloneable wrapper around an `Arc`. +#[derive(Clone)] +pub struct EmbeddingProvider(Arc); + +pub(crate) enum EmbeddingService { + OpenAi { + model: openai::EmbeddingModel, + model_name: String, + }, + Cohere { + model: cohere::EmbeddingModel, + model_name: String, + }, + Gemini { + model: gemini::embedding::EmbeddingModel, + model_name: String, + }, + #[cfg(feature = "ollama")] + Ollama { + client: ollama::Client, + model_name: String, + ndims: usize, + }, +} + +#[async_trait::async_trait] +impl Provider for EmbeddingProvider { + type Credentials = EmbeddingCredentials; + type Params = EmbeddingModel; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let inner = match (credentials, params) { + (EmbeddingCredentials::OpenAi(c), EmbeddingModel::OpenAi(m)) => { + let client = openai::Client::new(&c.api_key) + .map_err(|e| Error::provider("openai", e.to_string()))?; + EmbeddingService::OpenAi { + model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), + model_name: m.as_ref().to_string(), + } + } + (EmbeddingCredentials::Cohere(c), EmbeddingModel::Cohere(m)) => { + let client = cohere::Client::new(&c.api_key) + .map_err(|e| Error::provider("cohere", e.to_string()))?; + EmbeddingService::Cohere { + model: client.embedding_model_with_ndims( + m.as_ref(), + "search_document", + m.dimensions(), + ), + model_name: m.as_ref().to_string(), + } + } + (EmbeddingCredentials::Gemini(c), EmbeddingModel::Gemini(m)) => { + let client = gemini::Client::new(&c.api_key) + .map_err(|e| Error::provider("gemini", e.to_string()))?; + EmbeddingService::Gemini { + model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), + model_name: m.as_ref().to_string(), + } + } + #[cfg(feature = "ollama")] + (EmbeddingCredentials::Ollama(c), EmbeddingModel::Ollama(m)) => { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(&c.base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + EmbeddingService::Ollama { + client, + model_name: m.name.clone(), + ndims: m.dimensions, + } + } + #[allow(unreachable_patterns)] + _ => return Err(Error::config("mismatched credentials and model provider").into()), + }; + Ok(Self(Arc::new(inner))) + } +} + +impl EmbeddingProvider { + /// Returns a reference to the inner provider. + pub(crate) fn inner(&self) -> &EmbeddingService { + &self.0 + } + + /// Creates an Ollama embedding provider (convenience for local development). + #[cfg(feature = "ollama")] + pub fn ollama(base_url: &str, model: OllamaEmbeddingModel) -> nvisy_core::Result { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + Ok(Self(Arc::new(EmbeddingService::Ollama { + client, + model_name: model.name, + ndims: model.dimensions, + }))) + } + + /// Returns the model name. + pub fn model_name(&self) -> &str { + match self.0.as_ref() { + EmbeddingService::OpenAi { model_name, .. } => model_name, + EmbeddingService::Cohere { model_name, .. } => model_name, + EmbeddingService::Gemini { model_name, .. } => model_name, + #[cfg(feature = "ollama")] + EmbeddingService::Ollama { model_name, .. } => model_name, + } + } + + /// Returns the provider name. + pub fn provider_name(&self) -> &'static str { + match self.0.as_ref() { + EmbeddingService::OpenAi { .. } => "openai", + EmbeddingService::Cohere { .. } => "cohere", + EmbeddingService::Gemini { .. } => "gemini", + #[cfg(feature = "ollama")] + EmbeddingService::Ollama { .. } => "ollama", + } + } + + /// Embed a single text document. + /// + /// This is a convenience method that delegates to the trait implementation. + pub async fn embed_text(&self, text: &str) -> nvisy_core::Result { + RigEmbeddingModel::embed_text(self, text) + .await + .map_err(|e| Error::provider(self.provider_name(), e.to_string()).into()) + } + + /// Embed multiple text documents. + /// + /// This is a convenience method that delegates to the trait implementation. + pub async fn embed_texts( + &self, + texts: impl IntoIterator + Send, + ) -> nvisy_core::Result> { + RigEmbeddingModel::embed_texts(self, texts) + .await + .map_err(|e| Error::provider(self.provider_name(), e.to_string()).into()) + } +} + +impl std::fmt::Debug for EmbeddingProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.0.as_ref() { + EmbeddingService::OpenAi { model, model_name } => f + .debug_struct("EmbeddingProvider::OpenAi") + .field("model", model_name) + .field("ndims", &model.ndims()) + .finish(), + EmbeddingService::Cohere { model, model_name } => f + .debug_struct("EmbeddingProvider::Cohere") + .field("model", model_name) + .field("ndims", &model.ndims()) + .finish(), + EmbeddingService::Gemini { model, model_name } => f + .debug_struct("EmbeddingProvider::Gemini") + .field("model", model_name) + .field("ndims", &model.ndims()) + .finish(), + #[cfg(feature = "ollama")] + EmbeddingService::Ollama { + model_name, ndims, .. + } => f + .debug_struct("EmbeddingProvider::Ollama") + .field("model", model_name) + .field("ndims", ndims) + .finish(), + } + } +} diff --git a/crates/nvisy-rig/src/provider/embedding/rig_impl.rs b/crates/nvisy-rig/src/provider/embedding/rig_impl.rs new file mode 100644 index 0000000..380bd4f --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/rig_impl.rs @@ -0,0 +1,48 @@ +//! rig-core trait implementations for EmbeddingProvider. + +use rig::embeddings::{Embedding, EmbeddingError, EmbeddingModel as RigEmbeddingModel}; +#[cfg(feature = "ollama")] +use rig::providers::ollama; + +use super::provider::{DEFAULT_MAX_DOCUMENTS, EmbeddingProvider, EmbeddingService}; + +impl RigEmbeddingModel for EmbeddingProvider { + type Client = (); + + const MAX_DOCUMENTS: usize = DEFAULT_MAX_DOCUMENTS; + + fn make(_client: &Self::Client, _model: impl Into, _dims: Option) -> Self { + // This is a no-op since EmbeddingProvider is constructed via its own methods + panic!("EmbeddingProvider should be constructed via EmbeddingProvider::new()") + } + + fn ndims(&self) -> usize { + match self.inner() { + EmbeddingService::OpenAi { model, .. } => model.ndims(), + EmbeddingService::Cohere { model, .. } => model.ndims(), + EmbeddingService::Gemini { model, .. } => model.ndims(), + #[cfg(feature = "ollama")] + EmbeddingService::Ollama { ndims, .. } => *ndims, + } + } + + async fn embed_texts( + &self, + texts: impl IntoIterator + Send, + ) -> std::result::Result, EmbeddingError> { + match self.inner() { + EmbeddingService::OpenAi { model, .. } => model.embed_texts(texts).await, + EmbeddingService::Cohere { model, .. } => model.embed_texts(texts).await, + EmbeddingService::Gemini { model, .. } => model.embed_texts(texts).await, + #[cfg(feature = "ollama")] + EmbeddingService::Ollama { + client, + model_name, + ndims, + } => { + let model = ollama::EmbeddingModel::new(client.clone(), model_name, *ndims); + model.embed_texts(texts).await + } + } + } +} diff --git a/crates/nvisy-rig/src/provider/mod.rs b/crates/nvisy-rig/src/provider/mod.rs index 17abbde..97945d8 100644 --- a/crates/nvisy-rig/src/provider/mod.rs +++ b/crates/nvisy-rig/src/provider/mod.rs @@ -1,15 +1,19 @@ //! Multi-provider management for AI inference. -//! -//! This module provides: -//! - [`ProviderRegistry`] - Registry of configured providers -//! - [`ProviderConfig`] - Configuration for individual providers -//! - [`ModelRef`] - Reference to a specific model (provider/model) -//! - [`EmbeddingProvider`] - Unified embedding provider enum -mod config; +mod completion; +mod credentials; mod embedding; -mod registry; +pub mod splitting; -pub use config::{ModelConfig, ProviderConfig, ProviderKind}; -pub use embedding::EmbeddingProvider; -pub use registry::{ModelRef, ProviderRegistry}; +pub use completion::{ + AnthropicModel, CohereCompletionModel, CompletionCredentials, CompletionModel, + CompletionProvider, GeminiCompletionModel, OpenAiCompletionModel, PerplexityModel, +}; +pub use credentials::{ApiKeyCredentials, OllamaCredentials}; +#[cfg(feature = "ollama")] +pub use embedding::OllamaEmbeddingModel; +pub use embedding::{ + CohereEmbeddingModel, EmbeddingCredentials, EmbeddingModel, EmbeddingProvider, + GeminiEmbeddingModel, OpenAiEmbeddingModel, +}; +pub use splitting::{Chunk, ChunkMetadata, OwnedChunk, TextSplitter}; diff --git a/crates/nvisy-rig/src/provider/registry.rs b/crates/nvisy-rig/src/provider/registry.rs deleted file mode 100644 index 9bb3bb3..0000000 --- a/crates/nvisy-rig/src/provider/registry.rs +++ /dev/null @@ -1,204 +0,0 @@ -//! Provider registry for managing multiple AI providers. - -use std::collections::HashMap; -use std::str::FromStr; -use std::sync::Arc; - -use serde::{Deserialize, Serialize}; - -use super::config::ProviderConfig; -use crate::{Error, Result}; - -/// Reference to a specific model in format "provider_id/model_name". -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct ModelRef { - /// Provider ID. - pub provider_id: String, - /// Model name. - pub model: String, -} - -impl ModelRef { - /// Creates a new model reference. - pub fn new(provider_id: impl Into, model: impl Into) -> Self { - Self { - provider_id: provider_id.into(), - model: model.into(), - } - } -} - -impl FromStr for ModelRef { - type Err = Error; - - fn from_str(s: &str) -> Result { - let (provider_id, model) = s.split_once('/').ok_or_else(|| { - Error::config(format!( - "invalid model reference '{}': expected 'provider/model'", - s - )) - })?; - - Ok(Self::new(provider_id, model)) - } -} - -impl std::fmt::Display for ModelRef { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}/{}", self.provider_id, self.model) - } -} - -/// Default models for different tasks. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DefaultModels { - /// Default model for embeddings. - pub embedding: ModelRef, - /// Default model for completions/chat. - pub completion: ModelRef, - /// Default model for vision tasks. - pub vision: ModelRef, -} - -/// Registry of configured AI providers. -/// -/// Allows selecting providers per-request from a set of globally configured providers. -pub struct ProviderRegistry { - providers: HashMap>, - defaults: DefaultModels, -} - -impl ProviderRegistry { - /// Creates an empty provider registry with placeholder defaults. - /// - /// This is useful for testing or when providers will be configured later. - /// Note: Attempting to resolve models will fail until providers are added. - pub fn empty() -> Self { - let placeholder = ModelRef::new("none", "none"); - Self { - providers: HashMap::new(), - defaults: DefaultModels { - embedding: placeholder.clone(), - completion: placeholder.clone(), - vision: placeholder, - }, - } - } - - /// Creates a new provider registry. - pub fn new(providers: Vec, defaults: DefaultModels) -> Result { - let mut provider_map = HashMap::new(); - - for config in providers { - if provider_map.contains_key(&config.id) { - return Err(Error::config(format!( - "duplicate provider id: {}", - config.id - ))); - } - provider_map.insert(config.id.clone(), Arc::new(config)); - } - - // Validate defaults exist - if !provider_map.contains_key(&defaults.embedding.provider_id) { - return Err(Error::config(format!( - "default embedding provider not found: {}", - defaults.embedding.provider_id - ))); - } - if !provider_map.contains_key(&defaults.completion.provider_id) { - return Err(Error::config(format!( - "default completion provider not found: {}", - defaults.completion.provider_id - ))); - } - if !provider_map.contains_key(&defaults.vision.provider_id) { - return Err(Error::config(format!( - "default vision provider not found: {}", - defaults.vision.provider_id - ))); - } - - Ok(Self { - providers: provider_map, - defaults, - }) - } - - /// Gets a provider by ID. - pub fn get(&self, id: &str) -> Option<&ProviderConfig> { - self.providers.get(id).map(|p| p.as_ref()) - } - - /// Gets the provider for a model reference, falling back to defaults. - pub fn resolve_embedding( - &self, - model_ref: Option<&ModelRef>, - ) -> Result<(&ProviderConfig, String)> { - let model_ref = model_ref.unwrap_or(&self.defaults.embedding); - self.resolve(model_ref) - } - - /// Gets the provider for a completion model reference, falling back to defaults. - pub fn resolve_completion( - &self, - model_ref: Option<&ModelRef>, - ) -> Result<(&ProviderConfig, String)> { - let model_ref = model_ref.unwrap_or(&self.defaults.completion); - self.resolve(model_ref) - } - - /// Gets the provider for a vision model reference, falling back to defaults. - pub fn resolve_vision( - &self, - model_ref: Option<&ModelRef>, - ) -> Result<(&ProviderConfig, String)> { - let model_ref = model_ref.unwrap_or(&self.defaults.vision); - self.resolve(model_ref) - } - - /// Resolves a model reference to provider config and model name. - fn resolve(&self, model_ref: &ModelRef) -> Result<(&ProviderConfig, String)> { - let provider = self.providers.get(&model_ref.provider_id).ok_or_else(|| { - Error::config(format!("provider not found: {}", model_ref.provider_id)) - })?; - - Ok((provider.as_ref(), model_ref.model.clone())) - } - - /// Returns all registered provider IDs. - pub fn provider_ids(&self) -> impl Iterator { - self.providers.keys().map(|s| s.as_str()) - } - - /// Returns the default models. - pub fn defaults(&self) -> &DefaultModels { - &self.defaults - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_model_ref_parse() { - let model_ref: ModelRef = "openai/gpt-4o" - .parse() - .expect("valid model ref format should parse"); - assert_eq!(model_ref.provider_id, "openai"); - assert_eq!(model_ref.model, "gpt-4o"); - } - - #[test] - fn test_model_ref_display() { - let model_ref = ModelRef::new("anthropic", "claude-sonnet-4-20250514"); - assert_eq!(model_ref.to_string(), "anthropic/claude-sonnet-4-20250514"); - } - - #[test] - fn test_model_ref_invalid() { - let result: Result = "invalid".parse(); - assert!(result.is_err()); - } -} diff --git a/crates/nvisy-rig/src/provider/splitting/chunk.rs b/crates/nvisy-rig/src/provider/splitting/chunk.rs new file mode 100644 index 0000000..87471fe --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/chunk.rs @@ -0,0 +1,43 @@ +//! Split chunk types. + +use super::ChunkMetadata; + +/// A chunk produced by the text splitter (borrows from source text). +#[derive(Debug)] +pub struct Chunk<'a> { + /// The chunk text content (borrowed from original). + pub text: &'a str, + /// Metadata about the chunk's position. + pub metadata: ChunkMetadata, +} + +impl<'a> Chunk<'a> { + /// Creates a new chunk. + pub fn new(text: &'a str, metadata: ChunkMetadata) -> Self { + Self { text, metadata } + } + + /// Converts to an owned chunk. + pub fn into_owned(self) -> OwnedChunk { + OwnedChunk { + text: self.text.to_string(), + metadata: self.metadata, + } + } +} + +/// An owned version of Chunk. +#[derive(Debug, Clone)] +pub struct OwnedChunk { + /// The chunk text content. + pub text: String, + /// Metadata about the chunk's position. + pub metadata: ChunkMetadata, +} + +impl OwnedChunk { + /// Creates a new owned chunk. + pub fn new(text: String, metadata: ChunkMetadata) -> Self { + Self { text, metadata } + } +} diff --git a/crates/nvisy-rig/src/provider/splitting/metadata.rs b/crates/nvisy-rig/src/provider/splitting/metadata.rs new file mode 100644 index 0000000..e3d4c40 --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/metadata.rs @@ -0,0 +1,92 @@ +//! Split chunk metadata. + +use std::num::NonZeroU32; + +use serde::{Deserialize, Serialize}; + +/// Metadata about a chunk's location in the source document. +/// +/// This is the unified chunk metadata type used throughout the system: +/// - Created during text splitting with offset information +/// - Stored in the database with the chunk +/// - Retrieved during search operations +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct ChunkMetadata { + /// Chunk index within the source (0-based). + pub index: u32, + + /// Start byte offset in the source text. + pub start_offset: u32, + + /// End byte offset in the source text. + pub end_offset: u32, + + /// Page number (1-indexed, if applicable). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub page: Option, + + /// Section or heading the chunk belongs to. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub section: Option, +} + +impl ChunkMetadata { + /// Creates metadata with index and offset information. + pub fn new(index: u32, start_offset: u32, end_offset: u32) -> Self { + Self { + index, + start_offset, + end_offset, + page: None, + section: None, + } + } + + /// Creates metadata from JSON (used when loading from database). + /// + /// The `index` parameter overrides any index value in the JSON. + pub fn from_json(json: &serde_json::Value, index: u32) -> Self { + let mut metadata: Self = serde_json::from_value(json.clone()).unwrap_or_default(); + metadata.index = index; + metadata + } + + /// Sets the page number. + pub fn with_page(mut self, page: NonZeroU32) -> Self { + self.page = Some(page); + self + } + + /// Sets the section name. + pub fn with_section(mut self, section: impl Into) -> Self { + self.section = Some(section.into()); + self + } + + /// Returns the byte length of the chunk. + pub fn byte_len(&self) -> u32 { + self.end_offset.saturating_sub(self.start_offset) + } + + /// Returns the byte range for content extraction. + pub fn byte_range(&self) -> std::ops::Range { + self.start_offset as usize..self.end_offset as usize + } + + /// Returns a location string for display (e.g., "page 5, 'Introduction', chunk 3"). + pub fn location_string(&self) -> String { + let mut parts = Vec::new(); + + if let Some(page) = self.page { + parts.push(format!("page {page}")); + } + + if let Some(section) = &self.section { + parts.push(format!("'{section}'")); + } + + parts.push(format!("chunk {}", self.index + 1)); + + parts.join(", ") + } +} diff --git a/crates/nvisy-rig/src/provider/splitting/mod.rs b/crates/nvisy-rig/src/provider/splitting/mod.rs new file mode 100644 index 0000000..37b2ca7 --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/mod.rs @@ -0,0 +1,9 @@ +//! Text splitting for chunk creation. + +mod chunk; +mod metadata; +mod splitter; + +pub use chunk::{Chunk, OwnedChunk}; +pub use metadata::ChunkMetadata; +pub use splitter::TextSplitter; diff --git a/crates/nvisy-rig/src/provider/splitting/splitter.rs b/crates/nvisy-rig/src/provider/splitting/splitter.rs new file mode 100644 index 0000000..f9b0a2d --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/splitter.rs @@ -0,0 +1,182 @@ +//! Text splitting implementation. + +use std::num::NonZeroU32; + +use text_splitter::{ChunkConfig, TextSplitter as TextSplitterImpl}; + +use super::{Chunk, ChunkMetadata, OwnedChunk}; + +/// Text splitter for creating document chunks. +#[derive(Debug, Clone)] +pub struct TextSplitter { + max_characters: u32, + overlap_characters: Option, + trim_whitespace: bool, +} + +impl TextSplitter { + /// Creates a new text splitter. + pub fn new( + max_characters: u32, + overlap_characters: Option, + trim_whitespace: bool, + ) -> Self { + tracing::debug!( + max_characters, + ?overlap_characters, + trim_whitespace, + "created text splitter" + ); + Self { + max_characters, + overlap_characters, + trim_whitespace, + } + } + + /// Creates a splitter with default settings (512 chars, no overlap, trimmed). + pub fn with_defaults() -> Self { + Self::new(512, None, true) + } + + /// Returns the maximum characters per chunk. + pub fn max_characters(&self) -> u32 { + self.max_characters + } + + /// Returns the overlap between chunks. + pub fn overlap_characters(&self) -> Option { + self.overlap_characters + } + + /// Splits text into chunks with byte offset tracking. + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split<'a>(&self, text: &'a str) -> Vec> { + let overlap = self.overlap_characters.map_or(0, |v| v.get() as usize); + let chunk_config = ChunkConfig::new(self.max_characters as usize) + .with_overlap(overlap) + .expect("overlap must be less than max_characters") + .with_trim(self.trim_whitespace); + + let splitter = TextSplitterImpl::new(chunk_config); + + let chunks: Vec<_> = splitter + .chunk_indices(text) + .enumerate() + .map(|(index, (byte_offset, chunk_text))| { + let end_offset = byte_offset + chunk_text.len(); + Chunk::new( + chunk_text, + ChunkMetadata::new(index as u32, byte_offset as u32, end_offset as u32), + ) + }) + .collect(); + + tracing::debug!(chunk_count = chunks.len(), "split text into chunks"); + chunks + } + + /// Splits text and returns owned chunks. + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split_owned(&self, text: &str) -> Vec { + self.split(text) + .into_iter() + .map(|c| c.into_owned()) + .collect() + } + + /// Splits text with page awareness. + /// + /// Page breaks are indicated by form feed characters (`\x0c`). + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split_with_pages<'a>(&self, text: &'a str) -> Vec> { + let page_breaks: Vec = text + .char_indices() + .filter(|(_, c)| *c == '\x0c') + .map(|(i, _)| i as u32) + .collect(); + + tracing::debug!(page_count = page_breaks.len() + 1, "detected pages"); + + self.split(text) + .into_iter() + .map(|chunk| { + let page_num = page_breaks + .iter() + .take_while(|&&pos| pos < chunk.metadata.start_offset) + .count() as u32 + + 1; + + // SAFETY: page_num is always >= 1 + let page = NonZeroU32::new(page_num).expect("page number is always >= 1"); + + Chunk { + text: chunk.text, + metadata: chunk.metadata.with_page(page), + } + }) + .collect() + } + + /// Splits text with page awareness and returns owned chunks. + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split_with_pages_owned(&self, text: &str) -> Vec { + self.split_with_pages(text) + .into_iter() + .map(|c| c.into_owned()) + .collect() + } +} + +impl Default for TextSplitter { + fn default() -> Self { + Self::with_defaults() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_basic() { + let splitter = TextSplitter::new(50, None, true); + let text = "Hello world. This is a test. Another sentence here."; + let chunks = splitter.split(text); + + assert!(!chunks.is_empty()); + for chunk in &chunks { + assert!(chunk.text.len() <= 50); + } + } + + #[test] + fn test_split_with_overlap() { + let splitter = TextSplitter::new(20, NonZeroU32::new(5), true); + let text = "The quick brown fox jumps over the lazy dog."; + let chunks = splitter.split(text); + + assert!(chunks.len() > 1); + } + + #[test] + fn test_split_with_pages() { + let splitter = TextSplitter::new(100, None, true); + let text = "Page one content.\x0cPage two content.\x0cPage three."; + let chunks = splitter.split_with_pages(text); + + assert!(!chunks.is_empty()); + assert_eq!(chunks[0].metadata.page, NonZeroU32::new(1)); + } + + #[test] + fn test_metadata_offsets() { + let splitter = TextSplitter::new(500, None, false); + let text = "Hello world"; + let chunks = splitter.split(text); + + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0].metadata.start_offset, 0); + assert_eq!(chunks[0].metadata.end_offset, text.len() as u32); + } +} diff --git a/crates/nvisy-rig/src/rag/config.rs b/crates/nvisy-rig/src/rag/config.rs index 02e914e..c54c075 100644 --- a/crates/nvisy-rig/src/rag/config.rs +++ b/crates/nvisy-rig/src/rag/config.rs @@ -1,5 +1,7 @@ //! RAG system configuration. +use std::num::NonZeroU32; + /// Configuration for the RAG system. #[derive(Debug, Clone)] pub struct RagConfig { @@ -7,10 +9,10 @@ pub struct RagConfig { pub max_chunk_characters: u32, /// Number of characters to overlap between chunks. - pub chunk_overlap: u32, + pub chunk_overlap_characters: Option, /// Whether to trim whitespace from chunks. - pub trim_chunks: bool, + pub trim_whitespace: bool, /// Maximum chunks to retrieve per query. pub max_results: u32, @@ -23,8 +25,8 @@ impl Default for RagConfig { fn default() -> Self { Self { max_chunk_characters: 1000, - chunk_overlap: 0, - trim_chunks: true, + chunk_overlap_characters: None, + trim_whitespace: true, max_results: 5, min_score: None, } diff --git a/crates/nvisy-rig/src/rag/indexer/indexed.rs b/crates/nvisy-rig/src/rag/indexer/indexed.rs index bcc1d0b..244e823 100644 --- a/crates/nvisy-rig/src/rag/indexer/indexed.rs +++ b/crates/nvisy-rig/src/rag/indexer/indexed.rs @@ -1,6 +1,6 @@ //! Indexed chunk result type. -use nvisy_postgres::model::DocumentChunk; +use nvisy_postgres::model::FileChunk; use uuid::Uuid; /// Result of indexing a single chunk. @@ -8,21 +8,21 @@ use uuid::Uuid; pub struct IndexedChunk { /// Database ID of the created chunk. pub id: Uuid, - /// Index of the chunk within the file. - pub chunk_index: i32, + /// Index of the chunk within the file (0-based). + pub index: u32, /// Size of the chunk content in bytes. - pub content_size: i32, + pub content_size: u32, /// Number of tokens in the chunk. - pub token_count: i32, + pub token_count: u32, } -impl From for IndexedChunk { - fn from(chunk: DocumentChunk) -> Self { +impl From for IndexedChunk { + fn from(chunk: FileChunk) -> Self { Self { id: chunk.id, - chunk_index: chunk.chunk_index, - content_size: chunk.content_size, - token_count: chunk.token_count, + index: chunk.chunk_index as u32, + content_size: chunk.content_size as u32, + token_count: chunk.token_count as u32, } } } diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs index 37bf65e..e5e2a00 100644 --- a/crates/nvisy-rig/src/rag/indexer/mod.rs +++ b/crates/nvisy-rig/src/rag/indexer/mod.rs @@ -1,28 +1,22 @@ //! Document chunk indexing pipeline. -//! -//! Provides batch embedding and storage of document chunks using pgvector. mod indexed; -use nvisy_postgres::model::NewDocumentChunk; -use nvisy_postgres::query::DocumentChunkRepository; +use nvisy_postgres::model::NewFileChunk; +use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; -use rig::embeddings::EmbeddingModel; use sha2::{Digest, Sha256}; use uuid::Uuid; pub use self::indexed::IndexedChunk; -use super::splitter::{OwnedSplitChunk, Splitter, estimate_tokens}; -use crate::provider::EmbeddingProvider; +use crate::provider::{EmbeddingProvider, OwnedChunk, TextSplitter}; use crate::{Error, Result}; /// Indexer for batch-embedding and storing document chunks. -/// -/// Handles text splitting, embedding, and storage in PostgreSQL. pub struct Indexer { provider: EmbeddingProvider, db: PgClient, - splitter: Splitter, + splitter: TextSplitter, file_id: Uuid, } @@ -31,7 +25,7 @@ impl Indexer { pub(crate) fn new( provider: EmbeddingProvider, db: PgClient, - splitter: Splitter, + splitter: TextSplitter, file_id: Uuid, ) -> Self { Self { @@ -48,58 +42,60 @@ impl Indexer { } /// Indexes text by splitting, embedding, and storing chunks. + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn index(&self, text: &str) -> Result> { let chunks = self.splitter.split_owned(text); self.index_chunks(chunks).await } /// Indexes text with page awareness. - /// - /// Page breaks should be indicated by form feed characters (`\x0c`). + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn index_with_pages(&self, text: &str) -> Result> { let chunks = self.splitter.split_with_pages_owned(text); self.index_chunks(chunks).await } /// Deletes all existing chunks for the file before indexing. + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn reindex(&self, text: &str) -> Result> { let chunks = self.splitter.split_owned(text); self.reindex_chunks(chunks).await } /// Deletes all existing chunks for the file before indexing with page awareness. + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn reindex_with_pages(&self, text: &str) -> Result> { let chunks = self.splitter.split_with_pages_owned(text); self.reindex_chunks(chunks).await } - async fn index_chunks(&self, chunks: Vec) -> Result> { + async fn index_chunks(&self, chunks: Vec) -> Result> { if chunks.is_empty() { + tracing::debug!("no chunks to index"); return Ok(vec![]); } - // Extract texts for embedding let texts: Vec = chunks.iter().map(|c| c.text.clone()).collect(); + let chunk_count = texts.len(); - // Batch embed all texts + tracing::debug!(chunk_count, "embedding chunks"); let embeddings = self .provider .embed_texts(texts) .await - .map_err(|e| Error::embedding(format!("failed to embed chunks: {e}")))?; + .map_err(|e| Error::provider("embedding", e.to_string()))?; - if embeddings.len() != chunks.len() { - return Err(Error::embedding(format!( + if embeddings.len() != chunk_count { + return Err(Error::config(format!( "embedding count mismatch: expected {}, got {}", - chunks.len(), + chunk_count, embeddings.len() ))); } - // Prepare new chunk records let model_name = self.provider.model_name(); - let new_chunks: Vec = chunks + let new_chunks: Vec = chunks .iter() .zip(embeddings.iter()) .enumerate() @@ -108,29 +104,28 @@ impl Indexer { let content_sha256 = Sha256::digest(content_bytes).to_vec(); let content_size = content_bytes.len() as i32; - // Convert f64 embeddings to f32 for pgvector let embedding_vec: Vec = embedding.vec.iter().map(|&x| x as f32).collect(); let metadata = serde_json::json!({ + "index": chunk.metadata.index, "start_offset": chunk.metadata.start_offset, "end_offset": chunk.metadata.end_offset, "page": chunk.metadata.page, }); - NewDocumentChunk { + NewFileChunk { file_id: self.file_id, chunk_index: Some(idx as i32), content_sha256, content_size: Some(content_size), - token_count: Some(estimate_tokens(&chunk.text) as i32), + token_count: None, embedding: Vector::from(embedding_vec), - embedding_model: Some(model_name.to_owned()), + embedding_model: model_name.to_owned(), metadata: Some(metadata), } }) .collect(); - // Store in database let mut conn = self .db .get_connection() @@ -138,15 +133,15 @@ impl Indexer { .map_err(|e| Error::retrieval(format!("failed to get connection: {e}")))?; let created = conn - .create_document_chunks(new_chunks) + .create_file_chunks(new_chunks) .await .map_err(|e| Error::retrieval(format!("failed to create chunks: {e}")))?; + tracing::debug!(created_count = created.len(), "stored chunks"); Ok(created.into_iter().map(IndexedChunk::from).collect()) } - async fn reindex_chunks(&self, chunks: Vec) -> Result> { - // Delete existing chunks first + async fn reindex_chunks(&self, chunks: Vec) -> Result> { let mut conn = self .db .get_connection() @@ -154,16 +149,15 @@ impl Indexer { .map_err(|e| Error::retrieval(format!("failed to get connection: {e}")))?; let deleted = conn - .delete_document_file_chunks(self.file_id) + .delete_file_chunks(self.file_id) .await .map_err(|e| Error::retrieval(format!("failed to delete chunks: {e}")))?; if deleted > 0 { - tracing::debug!(file_id = %self.file_id, deleted, "Deleted existing chunks"); + tracing::debug!(deleted, "deleted existing chunks"); } drop(conn); - self.index_chunks(chunks).await } } diff --git a/crates/nvisy-rig/src/rag/mod.rs b/crates/nvisy-rig/src/rag/mod.rs index 45f4181..d658ad4 100644 --- a/crates/nvisy-rig/src/rag/mod.rs +++ b/crates/nvisy-rig/src/rag/mod.rs @@ -1,51 +1,27 @@ //! RAG (Retrieval-Augmented Generation) module. //! //! Provides document indexing and semantic search over document chunks. -//! -//! # Security -//! -//! All searches must be scoped to specific files or documents via [`SearchScope`]. -//! -//! # Example -//! -//! ```ignore -//! use nvisy_rig::rag::{RagService, SearchScope}; -//! -//! let rag = RagService::new(embedding_provider, pg, &nats).await?; -//! -//! // Index a file -//! let indexed = rag.indexer(file_id).index(&content).await?; -//! -//! // Search within a document -//! let results = rag -//! .search(SearchScope::document(doc_id)) -//! .query("How does auth work?", 5) -//! .await?; -//! ``` mod config; mod indexer; mod searcher; -mod splitter; +mod vector_store; use std::sync::Arc; use nvisy_nats::NatsClient; -use nvisy_nats::object::{DocumentStore, Files}; +use nvisy_nats::object::{FileKey, FilesBucket, ObjectStore}; use nvisy_postgres::PgClient; use uuid::Uuid; pub use self::config::RagConfig; pub use self::indexer::{IndexedChunk, Indexer}; pub use self::searcher::{ChunkMetadata, RetrievedChunk, SearchScope, Searcher}; -use self::splitter::Splitter; -pub use self::splitter::estimate_tokens; +pub use self::vector_store::{ChunkDocument, PgFilter, PgVectorStore}; use crate::Result; -use crate::provider::EmbeddingProvider; +use crate::provider::{EmbeddingProvider, TextSplitter}; /// High-level RAG service for document indexing and semantic search. -/// -/// The service is cheap to clone and can be shared across threads. #[derive(Clone)] pub struct RagService { inner: Arc, @@ -54,7 +30,7 @@ pub struct RagService { struct RagServiceInner { provider: EmbeddingProvider, db: PgClient, - files: DocumentStore, + files: ObjectStore, config: RagConfig, } @@ -67,9 +43,9 @@ impl RagService { nats: NatsClient, ) -> Result { let files = nats - .document_store::() + .object_store::() .await - .map_err(|e| crate::Error::retrieval(format!("failed to open document store: {e}")))?; + .map_err(|e| crate::Error::retrieval(format!("failed to open file store: {e}")))?; let inner = RagServiceInner { provider, @@ -90,10 +66,10 @@ impl RagService { /// Creates an indexer for a specific file. pub fn indexer(&self, file_id: Uuid) -> Indexer { - let splitter = Splitter::new( + let splitter = TextSplitter::new( self.inner.config.max_chunk_characters, - self.inner.config.chunk_overlap, - self.inner.config.trim_chunks, + self.inner.config.chunk_overlap_characters, + self.inner.config.trim_whitespace, ); Indexer::new( diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index 1d22b7d..ce280dc 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -7,11 +7,10 @@ mod scope; use std::collections::HashMap; -use nvisy_nats::object::{DocumentKey, DocumentStore, Files}; -use nvisy_postgres::model::ScoredDocumentChunk; -use nvisy_postgres::query::DocumentChunkRepository; +use nvisy_nats::object::{FileKey, FilesBucket, ObjectStore}; +use nvisy_postgres::model::ScoredFileChunk; +use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; -use rig::embeddings::EmbeddingModel; use tokio::io::AsyncReadExt; use uuid::Uuid; @@ -26,7 +25,7 @@ use crate::{Error, Result}; pub struct Searcher { provider: EmbeddingProvider, db: PgClient, - files: DocumentStore, + files: ObjectStore, scope: SearchScope, min_score: Option, } @@ -36,7 +35,7 @@ impl Searcher { pub(crate) fn new( provider: EmbeddingProvider, db: PgClient, - files: DocumentStore, + files: ObjectStore, scope: SearchScope, ) -> Self { Self { @@ -60,12 +59,13 @@ impl Searcher { } /// Searches for relevant chunks without loading content. + #[tracing::instrument(skip(self, query), fields(query_len = query.len(), limit, scope = ?self.scope))] pub async fn query(&self, query: &str, limit: u32) -> Result> { let embedding = self .provider .embed_text(query) .await - .map_err(|e| Error::embedding(format!("failed to embed query: {e}")))?; + .map_err(|e| Error::provider("embedding", e.to_string()))?; let query_vector: Vector = embedding .vec @@ -82,15 +82,15 @@ impl Searcher { let min_score = self.min_score.unwrap_or(0.0); - let scored_chunks: Vec = match &self.scope { + let scored_chunks: Vec = match &self.scope { SearchScope::Files(file_ids) => { conn.search_scored_chunks_in_files(query_vector, file_ids, min_score, limit as i64) .await } - SearchScope::Documents(doc_ids) => { - conn.search_scored_chunks_in_documents( + SearchScope::Workspace(workspace_id) => { + conn.search_scored_chunks_in_workspace( query_vector, - doc_ids, + *workspace_id, min_score, limit as i64, ) @@ -99,26 +99,30 @@ impl Searcher { } .map_err(|e| Error::retrieval(format!("vector search failed: {e}")))?; - let chunks = scored_chunks + let chunks: Vec = scored_chunks .into_iter() .map(|scored| { let chunk = scored.chunk; - let metadata = ChunkMetadata::from_json(&chunk.metadata, chunk.chunk_index); + let metadata = ChunkMetadata::from_json(&chunk.metadata, chunk.chunk_index as u32); RetrievedChunk::new(chunk.id, chunk.file_id, scored.score, metadata) }) .collect(); + tracing::debug!(result_count = chunks.len(), "query completed"); Ok(chunks) } /// Searches for relevant chunks and loads their content. + #[tracing::instrument(skip(self, query), fields(query_len = query.len(), limit))] pub async fn query_with_content(&self, query: &str, limit: u32) -> Result> { let mut chunks = self.query(query, limit).await?; self.load_content(&mut chunks).await?; + tracing::debug!(result_count = chunks.len(), "query_with_content completed"); Ok(chunks) } /// Loads content for retrieved chunks from NATS. + #[tracing::instrument(skip(self, chunks), fields(chunk_count = chunks.len()))] pub async fn load_content(&self, chunks: &mut [RetrievedChunk]) -> Result<()> { let mut by_file: HashMap> = HashMap::new(); for (idx, chunk) in chunks.iter().enumerate() { @@ -127,11 +131,14 @@ impl Searcher { } } + let file_count = by_file.len(); + tracing::debug!(file_count, "loading content from files"); + for (file_id, indices) in by_file { let file_content = match self.fetch_file(file_id).await { Ok(content) => content, Err(e) => { - tracing::warn!(file_id = %file_id, error = %e, "Failed to fetch file"); + tracing::warn!(file_id = %file_id, error = %e, "failed to fetch file"); continue; } }; @@ -150,8 +157,9 @@ impl Searcher { Ok(()) } + #[tracing::instrument(skip(self), fields(%file_id))] async fn fetch_file(&self, file_id: Uuid) -> Result> { - let key = DocumentKey::from_parts(Uuid::nil(), file_id); + let key = FileKey::from_parts(Uuid::nil(), file_id); let mut result = self .files @@ -167,6 +175,7 @@ impl Searcher { .await .map_err(|e| Error::retrieval(format!("failed to read file: {e}")))?; + tracing::debug!(content_len = content.len(), "file fetched"); Ok(content) } } diff --git a/crates/nvisy-rig/src/rag/searcher/retrieved.rs b/crates/nvisy-rig/src/rag/searcher/retrieved.rs index 423542c..bb40fd4 100644 --- a/crates/nvisy-rig/src/rag/searcher/retrieved.rs +++ b/crates/nvisy-rig/src/rag/searcher/retrieved.rs @@ -3,103 +3,8 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; -/// Metadata about a chunk's location in the source document. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ChunkMetadata { - /// Page number (1-indexed, if applicable). - #[serde(skip_serializing_if = "Option::is_none")] - pub page: Option, - - /// Section or heading the chunk belongs to. - #[serde(skip_serializing_if = "Option::is_none")] - pub section: Option, - - /// Start byte offset in the source file. - pub start_offset: u32, - - /// End byte offset in the source file. - pub end_offset: u32, - - /// Chunk index within the file (0-based). - pub chunk_index: u32, -} - -impl ChunkMetadata { - /// Creates metadata with offset information. - pub fn new(chunk_index: u32, start_offset: u32, end_offset: u32) -> Self { - Self { - page: None, - section: None, - start_offset, - end_offset, - chunk_index, - } - } - - /// Creates metadata from JSON and chunk index. - pub fn from_json(json: &serde_json::Value, chunk_index: i32) -> Self { - let start_offset = json - .get("start_offset") - .and_then(|v| v.as_u64()) - .unwrap_or(0) as u32; - - let end_offset = json.get("end_offset").and_then(|v| v.as_u64()).unwrap_or(0) as u32; - - let page = json.get("page").and_then(|v| v.as_u64()).map(|p| p as u32); - - let section = json - .get("section") - .and_then(|v| v.as_str()) - .map(String::from); - - Self { - page, - section, - start_offset, - end_offset, - chunk_index: chunk_index as u32, - } - } - - /// Sets the page number. - pub fn with_page(mut self, page: u32) -> Self { - self.page = Some(page); - self - } - - /// Sets the section name. - pub fn with_section(mut self, section: impl Into) -> Self { - self.section = Some(section.into()); - self - } - - /// Returns the byte range for content extraction. - pub fn byte_range(&self) -> std::ops::Range { - self.start_offset as usize..self.end_offset as usize - } - - /// Returns the content length in bytes. - pub fn content_len(&self) -> u32 { - self.end_offset.saturating_sub(self.start_offset) - } - - /// Returns a location string for display. - pub fn location_string(&self) -> String { - let mut parts = Vec::new(); - - if let Some(page) = self.page { - parts.push(format!("page {page}")); - } - - if let Some(section) = &self.section { - parts.push(format!("'{section}'")); - } - - parts.push(format!("chunk {}", self.chunk_index + 1)); - - parts.join(", ") - } -} +// Re-export ChunkMetadata from the canonical location +pub use crate::provider::splitting::ChunkMetadata; /// A retrieved chunk with content and similarity score. #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/nvisy-rig/src/rag/searcher/scope.rs b/crates/nvisy-rig/src/rag/searcher/scope.rs index 79fc131..b1bc8b0 100644 --- a/crates/nvisy-rig/src/rag/searcher/scope.rs +++ b/crates/nvisy-rig/src/rag/searcher/scope.rs @@ -4,14 +4,14 @@ use uuid::Uuid; /// Search scope for vector queries. /// -/// Restricts search to specific files or documents to prevent cross-user data access. +/// Restricts search to specific files or a workspace to prevent cross-user data access. #[derive(Debug, Clone)] pub enum SearchScope { /// Search within specific files. Files(Vec), - /// Search within specific documents (all files in those documents). - Documents(Vec), + /// Search within a workspace (all files in that workspace). + Workspace(Uuid), } impl SearchScope { @@ -25,13 +25,8 @@ impl SearchScope { Self::Files(file_ids) } - /// Creates a scope for a single document. - pub fn document(document_id: Uuid) -> Self { - Self::Documents(vec![document_id]) - } - - /// Creates a scope for multiple documents. - pub fn documents(document_ids: Vec) -> Self { - Self::Documents(document_ids) + /// Creates a scope for a workspace. + pub fn workspace(workspace_id: Uuid) -> Self { + Self::Workspace(workspace_id) } } diff --git a/crates/nvisy-rig/src/rag/splitter/chunk.rs b/crates/nvisy-rig/src/rag/splitter/chunk.rs deleted file mode 100644 index b2b7cd4..0000000 --- a/crates/nvisy-rig/src/rag/splitter/chunk.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! Split chunk types. - -use super::SplitMetadata; - -/// A chunk produced by the text splitter (borrows from source text). -#[derive(Debug)] -pub struct SplitChunk<'a> { - /// The chunk text content (borrowed from original). - pub text: &'a str, - - /// Metadata about the chunk's position. - pub metadata: SplitMetadata, -} - -impl SplitChunk<'_> { - /// Converts to an owned chunk. - pub fn into_owned(self) -> OwnedSplitChunk { - OwnedSplitChunk { - text: self.text.to_string(), - metadata: self.metadata, - } - } -} - -/// An owned version of SplitChunk. -#[derive(Debug, Clone)] -pub struct OwnedSplitChunk { - /// The chunk text content. - pub text: String, - - /// Metadata about the chunk's position. - pub metadata: SplitMetadata, -} diff --git a/crates/nvisy-rig/src/rag/splitter/metadata.rs b/crates/nvisy-rig/src/rag/splitter/metadata.rs deleted file mode 100644 index 4201323..0000000 --- a/crates/nvisy-rig/src/rag/splitter/metadata.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! Split chunk metadata. - -use serde::{Deserialize, Serialize}; - -/// Metadata about a split chunk's location in the source text. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct SplitMetadata { - /// Page number (1-indexed, if applicable). - #[serde(skip_serializing_if = "Option::is_none")] - pub page: Option, - - /// Start byte offset in the source text. - pub start_offset: u32, - - /// End byte offset in the source text. - pub end_offset: u32, - - /// Chunk index within the source (0-based). - pub chunk_index: u32, -} - -impl SplitMetadata { - /// Creates metadata with offset information. - pub fn new(chunk_index: u32, start_offset: u32, end_offset: u32) -> Self { - Self { - page: None, - start_offset, - end_offset, - chunk_index, - } - } -} diff --git a/crates/nvisy-rig/src/rag/splitter/mod.rs b/crates/nvisy-rig/src/rag/splitter/mod.rs deleted file mode 100644 index e51ad66..0000000 --- a/crates/nvisy-rig/src/rag/splitter/mod.rs +++ /dev/null @@ -1,105 +0,0 @@ -//! Text splitting for chunk creation. - -mod chunk; -mod metadata; - -use text_splitter::{ChunkConfig, TextSplitter}; - -pub(crate) use self::chunk::{OwnedSplitChunk, SplitChunk}; -pub(crate) use self::metadata::SplitMetadata; - -/// Estimates the token count (~4 chars per token). -pub fn estimate_tokens(text: &str) -> u32 { - (text.len() / 4) as u32 -} - -/// Text splitter service for creating document chunks. -#[derive(Clone)] -pub struct Splitter { - max_characters: u32, - overlap: u32, - trim: bool, -} - -impl Splitter { - /// Creates a new text splitter. - pub fn new(max_characters: u32, overlap: u32, trim: bool) -> Self { - Self { - max_characters, - overlap, - trim, - } - } - - /// Splits text into chunks with byte offset tracking. - pub fn split<'a>(&self, text: &'a str) -> Vec> { - let chunk_config = ChunkConfig::new(self.max_characters as usize) - .with_overlap(self.overlap as usize) - .expect("overlap must be less than max_characters") - .with_trim(self.trim); - let splitter = TextSplitter::new(chunk_config); - - splitter - .chunk_indices(text) - .enumerate() - .map(|(chunk_index, (byte_offset, chunk_text))| { - let end_offset = byte_offset + chunk_text.len(); - - SplitChunk { - text: chunk_text, - metadata: SplitMetadata::new( - chunk_index as u32, - byte_offset as u32, - end_offset as u32, - ), - } - }) - .collect() - } - - /// Splits text and returns owned chunks. - pub fn split_owned(&self, text: &str) -> Vec { - self.split(text) - .into_iter() - .map(|c| c.into_owned()) - .collect() - } - - /// Splits text with page awareness. - /// - /// Page breaks should be indicated by form feed characters (`\x0c`). - pub fn split_with_pages<'a>(&self, text: &'a str) -> Vec> { - let page_breaks: Vec = text - .char_indices() - .filter(|(_, c)| *c == '\x0c') - .map(|(i, _)| i as u32) - .collect(); - - self.split(text) - .into_iter() - .map(|mut chunk| { - let page = page_breaks - .iter() - .take_while(|&&pos| pos < chunk.metadata.start_offset) - .count() as u32 - + 1; - chunk.metadata.page = Some(page); - chunk - }) - .collect() - } - - /// Splits text with page awareness and returns owned chunks. - pub fn split_with_pages_owned(&self, text: &str) -> Vec { - self.split_with_pages(text) - .into_iter() - .map(|c| c.into_owned()) - .collect() - } -} - -impl Default for Splitter { - fn default() -> Self { - Self::new(512, 0, true) - } -} diff --git a/crates/nvisy-rig/src/rag/vector_store.rs b/crates/nvisy-rig/src/rag/vector_store.rs new file mode 100644 index 0000000..6ab37f1 --- /dev/null +++ b/crates/nvisy-rig/src/rag/vector_store.rs @@ -0,0 +1,378 @@ +//! Vector store implementation using PostgreSQL with pgvector. +//! +//! Provides rig-core compatible [`VectorStoreIndex`] and [`InsertDocuments`] +//! implementations backed by PostgreSQL for document chunk storage and +//! similarity search. + +use nvisy_postgres::model::NewFileChunk; +use nvisy_postgres::query::FileChunkRepository; +use nvisy_postgres::{PgClient, Vector}; +use rig::embeddings::{Embedding, TextEmbedder}; +use rig::one_or_many::OneOrMany; +use rig::vector_store::request::{SearchFilter, VectorSearchRequest}; +use rig::vector_store::{InsertDocuments, VectorStoreError, VectorStoreIndex}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use uuid::Uuid; + +use super::SearchScope; +use crate::provider::EmbeddingProvider; + +/// PostgreSQL-backed vector store for document chunks. +/// +/// Implements rig-core's [`VectorStoreIndex`] and [`InsertDocuments`] traits, +/// enabling integration with rig's agent and pipeline systems. +#[derive(Clone)] +pub struct PgVectorStore { + provider: EmbeddingProvider, + db: PgClient, + scope: SearchScope, + min_score: Option, +} + +impl PgVectorStore { + /// Creates a new vector store with the given scope. + pub fn new(provider: EmbeddingProvider, db: PgClient, scope: SearchScope) -> Self { + Self { + provider, + db, + scope, + min_score: None, + } + } + + /// Sets the minimum similarity score threshold. + pub fn with_min_score(mut self, min_score: f64) -> Self { + self.min_score = Some(min_score); + self + } + + /// Returns the search scope. + pub fn scope(&self) -> &SearchScope { + &self.scope + } + + /// Returns the embedding provider. + pub fn provider(&self) -> &EmbeddingProvider { + &self.provider + } +} + +/// A document that can be stored in the vector store. +/// +/// Contains the text content and metadata for a document chunk. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChunkDocument { + /// The text content of the chunk. + pub text: String, + /// The file ID this chunk belongs to. + pub file_id: Uuid, + /// The chunk index within the file (0-based). + pub index: u32, + /// Start byte offset in the source file. + pub start_offset: u32, + /// End byte offset in the source file. + pub end_offset: u32, + /// Optional page number (1-indexed). + #[serde(skip_serializing_if = "Option::is_none")] + pub page: Option, +} + +impl ChunkDocument { + /// Creates a new chunk document. + pub fn new( + text: impl Into, + file_id: Uuid, + index: u32, + start_offset: u32, + end_offset: u32, + ) -> Self { + Self { + text: text.into(), + file_id, + index, + start_offset, + end_offset, + page: None, + } + } + + /// Sets the page number. + pub fn with_page(mut self, page: u32) -> Self { + self.page = Some(page); + self + } +} + +impl rig::Embed for ChunkDocument { + fn embed(&self, embedder: &mut TextEmbedder) -> Result<(), rig::embeddings::EmbedError> { + embedder.embed(self.text.clone()); + Ok(()) + } +} + +/// Filter type for PostgreSQL vector store queries. +/// +/// Supports filtering by file ID and workspace scope. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum PgFilter { + /// Filter by exact file ID match. + FileId(Uuid), + /// Filter by workspace ID. + WorkspaceId(Uuid), + /// Combine filters with AND logic. + And(Box, Box), + /// Combine filters with OR logic. + Or(Box, Box), +} + +impl SearchFilter for PgFilter { + type Value = serde_json::Value; + + fn eq(key: impl AsRef, value: Self::Value) -> Self { + match key.as_ref() { + "file_id" => { + if let Some(id) = value.as_str().and_then(|s| Uuid::parse_str(s).ok()) { + Self::FileId(id) + } else { + // Fallback: treat as file ID filter with nil UUID + Self::FileId(Uuid::nil()) + } + } + "workspace_id" => { + if let Some(id) = value.as_str().and_then(|s| Uuid::parse_str(s).ok()) { + Self::WorkspaceId(id) + } else { + Self::WorkspaceId(Uuid::nil()) + } + } + _ => Self::FileId(Uuid::nil()), + } + } + + fn gt(_key: impl AsRef, _value: Self::Value) -> Self { + // Greater-than not meaningful for our use case + Self::FileId(Uuid::nil()) + } + + fn lt(_key: impl AsRef, _value: Self::Value) -> Self { + // Less-than not meaningful for our use case + Self::FileId(Uuid::nil()) + } + + fn and(self, rhs: Self) -> Self { + Self::And(Box::new(self), Box::new(rhs)) + } + + fn or(self, rhs: Self) -> Self { + Self::Or(Box::new(self), Box::new(rhs)) + } +} + +impl InsertDocuments for PgVectorStore { + async fn insert_documents( + &self, + documents: Vec<(Doc, OneOrMany)>, + ) -> Result<(), VectorStoreError> { + if documents.is_empty() { + return Ok(()); + } + + let model_name = self.provider.model_name(); + + let new_chunks: Vec = documents + .into_iter() + .filter_map(|(doc, embeddings)| { + // Serialize the document to extract fields + let json = serde_json::to_value(&doc).ok()?; + + let text = json.get("text")?.as_str()?; + let file_id = json + .get("file_id") + .and_then(|v| v.as_str()) + .and_then(|s| Uuid::parse_str(s).ok())?; + let index = json.get("index").and_then(|v| v.as_u64())? as u32; + let start_offset = json.get("start_offset").and_then(|v| v.as_u64())? as u32; + let end_offset = json.get("end_offset").and_then(|v| v.as_u64())? as u32; + let page = json.get("page").and_then(|v| v.as_u64()).map(|p| p as u32); + + // Get the first embedding + let embedding = embeddings.first(); + let embedding_vec: Vec = embedding.vec.iter().map(|&x| x as f32).collect(); + + let content_bytes = text.as_bytes(); + let content_sha256 = Sha256::digest(content_bytes).to_vec(); + let content_size = content_bytes.len() as i32; + + let metadata = serde_json::json!({ + "index": index, + "start_offset": start_offset, + "end_offset": end_offset, + "page": page, + }); + + Some(NewFileChunk { + file_id, + chunk_index: Some(index as i32), + content_sha256, + content_size: Some(content_size), + token_count: None, + embedding: Vector::from(embedding_vec), + embedding_model: model_name.to_owned(), + metadata: Some(metadata), + }) + }) + .collect(); + + if new_chunks.is_empty() { + return Ok(()); + } + + let mut conn = self.db.get_connection().await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to get connection: {e}" + )))) + })?; + + conn.create_file_chunks(new_chunks).await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to create chunks: {e}" + )))) + })?; + + Ok(()) + } +} + +impl VectorStoreIndex for PgVectorStore { + type Filter = PgFilter; + + async fn top_n Deserialize<'a> + Send>( + &self, + req: VectorSearchRequest, + ) -> Result, VectorStoreError> { + let query = req.query(); + let limit = req.samples() as i64; + let min_score = req.threshold().or(self.min_score).unwrap_or(0.0); + + // Embed the query + let embedding = self.provider.embed_text(query).await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "embedding failed: {e}" + )))) + })?; + + let query_vector: Vector = embedding + .vec + .iter() + .map(|&x| x as f32) + .collect::>() + .into(); + + let mut conn = self.db.get_connection().await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to get connection: {e}" + )))) + })?; + + // Use the scope to determine which search method to use + let scored_chunks = match &self.scope { + SearchScope::Files(file_ids) => { + conn.search_scored_chunks_in_files(query_vector, file_ids, min_score, limit) + .await + } + SearchScope::Workspace(workspace_id) => { + conn.search_scored_chunks_in_workspace( + query_vector, + *workspace_id, + min_score, + limit, + ) + .await + } + } + .map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "vector search failed: {e}" + )))) + })?; + + // Convert to rig format + let results: Vec<(f64, String, T)> = scored_chunks + .into_iter() + .filter_map(|scored| { + let chunk = scored.chunk; + let id = chunk.id.to_string(); + + // Build a document representation from metadata + let doc_json = serde_json::json!({ + "file_id": chunk.file_id.to_string(), + "chunk_index": chunk.chunk_index, + "metadata": chunk.metadata, + }); + + let doc: T = serde_json::from_value(doc_json).ok()?; + Some((scored.score, id, doc)) + }) + .collect(); + + Ok(results) + } + + async fn top_n_ids( + &self, + req: VectorSearchRequest, + ) -> Result, VectorStoreError> { + let query = req.query(); + let limit = req.samples() as i64; + let min_score = req.threshold().or(self.min_score).unwrap_or(0.0); + + // Embed the query + let embedding = self.provider.embed_text(query).await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "embedding failed: {e}" + )))) + })?; + + let query_vector: Vector = embedding + .vec + .iter() + .map(|&x| x as f32) + .collect::>() + .into(); + + let mut conn = self.db.get_connection().await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to get connection: {e}" + )))) + })?; + + let scored_chunks = match &self.scope { + SearchScope::Files(file_ids) => { + conn.search_scored_chunks_in_files(query_vector, file_ids, min_score, limit) + .await + } + SearchScope::Workspace(workspace_id) => { + conn.search_scored_chunks_in_workspace( + query_vector, + *workspace_id, + min_score, + limit, + ) + .await + } + } + .map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "vector search failed: {e}" + )))) + })?; + + let results: Vec<(f64, String)> = scored_chunks + .into_iter() + .map(|scored| (scored.score, scored.chunk.id.to_string())) + .collect(); + + Ok(results) + } +} diff --git a/crates/nvisy-rig/src/service/config.rs b/crates/nvisy-rig/src/service/config.rs deleted file mode 100644 index 399054d..0000000 --- a/crates/nvisy-rig/src/service/config.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! Configuration for the rig service. - -#[cfg(feature = "config")] -use clap::Args; -use serde::{Deserialize, Serialize}; - -use crate::provider::EmbeddingProvider; - -/// Configuration for AI services (chat and RAG). -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "config", derive(Args))] -pub struct RigConfig { - /// Ollama base URL for embeddings. - #[cfg_attr( - feature = "config", - arg( - long, - env = "OLLAMA_BASE_URL", - default_value = "http://localhost:11434" - ) - )] - pub ollama_base_url: String, - - /// Ollama embedding model name. - #[cfg_attr( - feature = "config", - arg( - long, - env = "OLLAMA_EMBEDDING_MODEL", - default_value = "nomic-embed-text" - ) - )] - pub ollama_embedding_model: String, -} - -impl Default for RigConfig { - fn default() -> Self { - Self { - ollama_base_url: "http://localhost:11434".to_string(), - ollama_embedding_model: "nomic-embed-text".to_string(), - } - } -} - -impl RigConfig { - /// Creates an embedding provider from this configuration. - pub(crate) fn embedding_provider(&self) -> EmbeddingProvider { - EmbeddingProvider::ollama(&self.ollama_base_url, &self.ollama_embedding_model) - } -} diff --git a/crates/nvisy-rig/src/service/mod.rs b/crates/nvisy-rig/src/service/mod.rs deleted file mode 100644 index 08e2a4e..0000000 --- a/crates/nvisy-rig/src/service/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Unified AI service combining chat and RAG capabilities. - -mod config; -mod rig; - -pub use config::RigConfig; -pub use rig::RigService; diff --git a/crates/nvisy-rig/src/service/rig.rs b/crates/nvisy-rig/src/service/rig.rs deleted file mode 100644 index adf81e3..0000000 --- a/crates/nvisy-rig/src/service/rig.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Unified AI service combining chat and RAG capabilities. - -use std::sync::Arc; - -use nvisy_nats::NatsClient; -use nvisy_postgres::PgClient; - -use super::RigConfig; -use crate::Result; -use crate::chat::ChatService; -use crate::provider::ProviderRegistry; -use crate::rag::{RagConfig, RagService}; - -/// Inner state for [`RigService`]. -struct RigServiceInner { - chat: ChatService, - rag: RagService, -} - -/// Unified AI service providing chat and RAG capabilities. -/// -/// This type is cheap to clone and can be shared across threads. -#[derive(Clone)] -pub struct RigService { - inner: Arc, -} - -impl RigService { - /// Creates a new RigService from configuration. - pub async fn new(config: RigConfig, db: PgClient, nats: NatsClient) -> Result { - // Initialize RAG service - let embedding_provider = config.embedding_provider(); - let rag_config = RagConfig::default(); - let rag = RagService::new(rag_config, embedding_provider, db, nats.clone()).await?; - - // Initialize Chat service - let providers = ProviderRegistry::empty(); - let chat = ChatService::new(providers, nats).await?; - - Ok(Self { - inner: Arc::new(RigServiceInner { chat, rag }), - }) - } - - /// Returns a reference to the chat service. - pub fn chat(&self) -> &ChatService { - &self.inner.chat - } - - /// Returns a reference to the RAG service. - pub fn rag(&self) -> &RagService { - &self.inner.rag - } -} diff --git a/crates/nvisy-rig/src/session/message.rs b/crates/nvisy-rig/src/session/message.rs deleted file mode 100644 index cf1b405..0000000 --- a/crates/nvisy-rig/src/session/message.rs +++ /dev/null @@ -1,135 +0,0 @@ -//! Chat message types. - -use jiff::Timestamp; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Role of a message in the conversation. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum MessageRole { - /// System prompt. - System, - /// User message. - User, - /// Assistant response. - Assistant, - /// Tool result. - Tool, -} - -/// A message in the conversation history. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Message { - /// Unique message ID. - id: Uuid, - - /// Message role. - role: MessageRole, - - /// Message content. - content: String, - - /// Tool call ID (for tool messages). - tool_call_id: Option, - - /// When the message was created. - created_at: Timestamp, -} - -impl Message { - /// Creates a system message. - pub fn system(content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::System, - content: content.into(), - tool_call_id: None, - created_at: Timestamp::now(), - } - } - - /// Creates a user message. - pub fn user(content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::User, - content: content.into(), - tool_call_id: None, - created_at: Timestamp::now(), - } - } - - /// Creates an assistant message. - pub fn assistant(content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::Assistant, - content: content.into(), - tool_call_id: None, - created_at: Timestamp::now(), - } - } - - /// Creates a tool result message. - pub fn tool(tool_call_id: Uuid, content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::Tool, - content: content.into(), - tool_call_id: Some(tool_call_id), - created_at: Timestamp::now(), - } - } - - /// Returns the message ID. - pub fn id(&self) -> Uuid { - self.id - } - - /// Returns the message role. - pub fn role(&self) -> MessageRole { - self.role - } - - /// Returns the message content. - pub fn content(&self) -> &str { - &self.content - } - - /// Returns the tool call ID if this is a tool message. - pub fn tool_call_id(&self) -> Option { - self.tool_call_id - } - - /// Returns when the message was created. - pub fn created_at(&self) -> Timestamp { - self.created_at - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn message_roles() { - let system = Message::system("You are a helpful assistant"); - let user = Message::user("Hello"); - let assistant = Message::assistant("Hi!"); - let tool = Message::tool(Uuid::now_v7(), "result"); - - assert_eq!(system.role(), MessageRole::System); - assert_eq!(user.role(), MessageRole::User); - assert_eq!(assistant.role(), MessageRole::Assistant); - assert_eq!(tool.role(), MessageRole::Tool); - } - - #[test] - fn tool_message_has_call_id() { - let call_id = Uuid::now_v7(); - let tool = Message::tool(call_id, "result"); - - assert_eq!(tool.tool_call_id(), Some(call_id)); - } -} diff --git a/crates/nvisy-rig/src/session/mod.rs b/crates/nvisy-rig/src/session/mod.rs deleted file mode 100644 index 077f49e..0000000 --- a/crates/nvisy-rig/src/session/mod.rs +++ /dev/null @@ -1,325 +0,0 @@ -//! Session management for chat conversations. -//! -//! Sessions are ephemeral and stored in NATS KV with TTL. -//! They track conversation history, pending edits, and auto-apply policies. - -mod message; -mod policy; -mod store; - -use jiff::Timestamp; -pub use message::Message; -pub use policy::{ApplyPolicy, ApprovalHistory, AutoApplyContext}; -use serde::{Deserialize, Serialize}; -pub use store::SessionStore; -use uuid::Uuid; - -use crate::Result; -use crate::tool::edit::{ApplyResult, ProposedEdit}; - -/// Request to create a new session. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CreateSession { - /// Document being processed. - pub document_id: Uuid, - - /// Workspace context. - pub workspace_id: Uuid, - - /// User initiating the session. - pub user_id: Uuid, - - /// Auto-apply policy for edits. - #[serde(default)] - pub apply_policy: ApplyPolicy, - - /// Initial system prompt override. - pub system_prompt: Option, - - /// Model preference. - pub model: Option, -} - -impl CreateSession { - /// Creates a new session request. - pub fn new(document_id: Uuid, workspace_id: Uuid, user_id: Uuid) -> Self { - Self { - document_id, - workspace_id, - user_id, - apply_policy: ApplyPolicy::default(), - system_prompt: None, - model: None, - } - } - - /// Sets the auto-apply policy. - pub fn with_policy(mut self, policy: ApplyPolicy) -> Self { - self.apply_policy = policy; - self - } - - /// Sets a custom system prompt. - pub fn with_system_prompt(mut self, prompt: impl Into) -> Self { - self.system_prompt = Some(prompt.into()); - self - } - - /// Sets a model preference. - pub fn with_model(mut self, model: impl Into) -> Self { - self.model = Some(model.into()); - self - } -} - -/// An active chat session. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Session { - /// Unique session ID. - id: Uuid, - - /// Document being processed. - document_id: Uuid, - - /// Workspace context. - workspace_id: Uuid, - - /// User who created the session. - user_id: Uuid, - - /// Auto-apply policy. - apply_policy: ApplyPolicy, - - /// Custom system prompt. - system_prompt: Option, - - /// Model preference. - model: Option, - - /// Conversation history. - messages: Vec, - - /// Pending edits awaiting approval. - pending_edits: Vec, - - /// Applied edit IDs. - applied_edits: Vec, - - /// Rejected edit IDs. - rejected_edits: Vec, - - /// Count of auto-applied edits in this session. - auto_applied_count: usize, - - /// Approval history for learning policies. - approval_history: ApprovalHistory, - - /// When the session was created. - created_at: Timestamp, - - /// Last activity time. - last_activity_at: Timestamp, -} - -impl Session { - /// Creates a new session from a request. - pub fn new(request: CreateSession) -> Self { - let now = Timestamp::now(); - Self { - id: Uuid::now_v7(), - document_id: request.document_id, - workspace_id: request.workspace_id, - user_id: request.user_id, - apply_policy: request.apply_policy, - system_prompt: request.system_prompt, - model: request.model, - messages: Vec::new(), - pending_edits: Vec::new(), - applied_edits: Vec::new(), - rejected_edits: Vec::new(), - auto_applied_count: 0, - approval_history: ApprovalHistory::new(), - created_at: now, - last_activity_at: now, - } - } - - /// Returns the session ID. - pub fn id(&self) -> Uuid { - self.id - } - - /// Returns the document ID. - pub fn document_id(&self) -> Uuid { - self.document_id - } - - /// Returns the workspace ID. - pub fn workspace_id(&self) -> Uuid { - self.workspace_id - } - - /// Returns the user ID. - pub fn user_id(&self) -> Uuid { - self.user_id - } - - /// Returns the auto-apply policy. - pub fn apply_policy(&self) -> &ApplyPolicy { - &self.apply_policy - } - - /// Returns the custom system prompt. - pub fn system_prompt(&self) -> Option<&str> { - self.system_prompt.as_deref() - } - - /// Returns the model preference. - pub fn model(&self) -> Option<&str> { - self.model.as_deref() - } - - /// Returns the conversation messages. - pub fn messages(&self) -> &[Message] { - &self.messages - } - - /// Returns pending edits. - pub fn pending_edits(&self) -> &[ProposedEdit] { - &self.pending_edits - } - - /// Returns applied edit IDs. - pub fn applied_edits(&self) -> &[Uuid] { - &self.applied_edits - } - - /// Returns the creation time. - pub fn created_at(&self) -> Timestamp { - self.created_at - } - - /// Returns the last activity time. - pub fn last_activity_at(&self) -> Timestamp { - self.last_activity_at - } - - /// Adds a user message. - pub fn add_user_message(&mut self, content: impl Into) { - self.messages.push(Message::user(content)); - self.last_activity_at = Timestamp::now(); - } - - /// Adds an assistant message. - pub fn add_assistant_message(&mut self, content: impl Into) { - self.messages.push(Message::assistant(content)); - self.last_activity_at = Timestamp::now(); - } - - /// Adds a tool result message. - pub fn add_tool_message(&mut self, tool_call_id: Uuid, content: impl Into) { - self.messages.push(Message::tool(tool_call_id, content)); - self.last_activity_at = Timestamp::now(); - } - - /// Adds a proposed edit. - pub fn add_proposed_edit(&mut self, edit: ProposedEdit) { - self.pending_edits.push(edit); - self.last_activity_at = Timestamp::now(); - } - - /// Checks if an edit should be auto-applied. - pub fn should_auto_apply(&self, edit: &ProposedEdit) -> bool { - let op_type = edit.operation_type(); - let context = AutoApplyContext::new(op_type) - .with_idempotent(edit.is_idempotent()) - .with_auto_applied_count(self.auto_applied_count) - .with_approval_count(self.approval_history.approval_count(op_type)); - - self.apply_policy.should_auto_apply(&context) - } - - /// Records that an edit was auto-applied. - pub fn record_auto_apply(&mut self) { - self.auto_applied_count += 1; - } - - /// Returns the approval history. - pub fn approval_history(&self) -> &ApprovalHistory { - &self.approval_history - } - - /// Applies pending edits by ID. - /// - /// This also records the approval in the history for learning policies. - pub fn apply_edits(&mut self, edit_ids: &[Uuid]) -> Result { - let mut applied = Vec::new(); - let mut not_found = Vec::new(); - - for id in edit_ids { - if let Some(pos) = self.pending_edits.iter().position(|e| e.id() == *id) { - let edit = self.pending_edits.remove(pos); - // Record approval for learning policies - self.approval_history.record_approval(edit.operation_type()); - applied.push(edit); - self.applied_edits.push(*id); - } else { - not_found.push(*id); - } - } - - self.last_activity_at = Timestamp::now(); - - Ok(ApplyResult { - applied, - not_found, - errors: Vec::new(), - }) - } - - /// Rejects pending edits by ID. - pub fn reject_edits(&mut self, edit_ids: &[Uuid]) { - for id in edit_ids { - if let Some(pos) = self.pending_edits.iter().position(|e| e.id() == *id) { - self.pending_edits.remove(pos); - self.rejected_edits.push(*id); - } - } - self.last_activity_at = Timestamp::now(); - } - - /// Touches the session to update last activity time. - pub fn touch(&mut self) { - self.last_activity_at = Timestamp::now(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::session::message::MessageRole; - - fn test_request() -> CreateSession { - CreateSession::new(Uuid::now_v7(), Uuid::now_v7(), Uuid::now_v7()) - } - - #[test] - fn session_creation() { - let session = Session::new(test_request()); - assert!(!session.id().is_nil()); - assert!(session.messages().is_empty()); - assert!(session.pending_edits().is_empty()); - } - - #[test] - fn session_add_messages() { - let mut session = Session::new(test_request()); - - session.add_user_message("Hello"); - session.add_assistant_message("Hi there!"); - - assert_eq!(session.messages().len(), 2); - assert_eq!(session.messages()[0].role(), MessageRole::User); - assert_eq!(session.messages()[1].role(), MessageRole::Assistant); - } -} diff --git a/crates/nvisy-rig/src/session/policy.rs b/crates/nvisy-rig/src/session/policy.rs deleted file mode 100644 index 4719355..0000000 --- a/crates/nvisy-rig/src/session/policy.rs +++ /dev/null @@ -1,352 +0,0 @@ -//! Auto-apply policies for edit approval. - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -/// Context for auto-apply decisions. -#[derive(Debug, Clone)] -pub struct AutoApplyContext { - /// Number of edits already auto-applied in this session. - pub auto_applied_count: usize, - - /// Whether the edit is idempotent. - pub is_idempotent: bool, - - /// The operation type being evaluated (e.g., "replace", "insert", "delete"). - pub operation_type: String, - - /// Number of times user has approved this operation type in this session. - pub approval_count_for_type: usize, -} - -impl AutoApplyContext { - /// Creates a new context for auto-apply evaluation. - pub fn new(operation_type: impl Into) -> Self { - Self { - auto_applied_count: 0, - is_idempotent: false, - operation_type: operation_type.into(), - approval_count_for_type: 0, - } - } - - /// Sets whether the operation is idempotent. - pub fn with_idempotent(mut self, is_idempotent: bool) -> Self { - self.is_idempotent = is_idempotent; - self - } - - /// Sets the number of auto-applied edits in the session. - pub fn with_auto_applied_count(mut self, count: usize) -> Self { - self.auto_applied_count = count; - self - } - - /// Sets the approval count for this operation type. - pub fn with_approval_count(mut self, count: usize) -> Self { - self.approval_count_for_type = count; - self - } -} - -/// Tracks approval history per operation type. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ApprovalHistory { - /// Count of approvals per operation type. - approvals: HashMap, -} - -impl ApprovalHistory { - /// Creates a new empty approval history. - pub fn new() -> Self { - Self::default() - } - - /// Records an approval for the given operation type. - pub fn record_approval(&mut self, operation_type: &str) { - *self - .approvals - .entry(operation_type.to_string()) - .or_insert(0) += 1; - } - - /// Returns the approval count for the given operation type. - pub fn approval_count(&self, operation_type: &str) -> usize { - self.approvals.get(operation_type).copied().unwrap_or(0) - } - - /// Clears all approval history. - pub fn clear(&mut self) { - self.approvals.clear(); - } -} - -/// Policy for automatically applying edits. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[serde(tag = "mode", rename_all = "snake_case")] -pub enum ApplyPolicy { - /// Never auto-apply, always require approval. - #[default] - RequireApproval, - - /// Auto-apply idempotent operations only. - IdempotentOnly, - - /// Auto-apply after user approves similar operations. - LearnFromApproval, - - /// Auto-apply all edits (dangerous). - AutoApplyAll, - - /// Custom policy with specific rules. - Custom(CustomPolicy), -} - -impl ApplyPolicy { - /// Creates a policy that requires approval for everything. - pub fn require_approval() -> Self { - Self::RequireApproval - } - - /// Creates a policy that auto-applies idempotent operations. - pub fn idempotent_only() -> Self { - Self::IdempotentOnly - } - - /// Creates a policy that learns from user approvals. - pub fn learn_from_approval() -> Self { - Self::LearnFromApproval - } - - /// Creates a policy that auto-applies everything. - /// - /// # Warning - /// This is dangerous and should only be used for testing - /// or when the user explicitly opts in. - pub fn auto_apply_all() -> Self { - Self::AutoApplyAll - } - - /// Determines if an edit should be auto-applied. - pub fn should_auto_apply(&self, context: &AutoApplyContext) -> bool { - match self { - Self::RequireApproval => false, - Self::IdempotentOnly => context.is_idempotent, - Self::LearnFromApproval => { - // Auto-apply if idempotent OR if user has approved at least one similar edit - context.is_idempotent || context.approval_count_for_type > 0 - } - Self::AutoApplyAll => true, - Self::Custom(policy) => policy.should_auto_apply(context), - } - } -} - -/// Custom auto-apply policy with fine-grained rules. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CustomPolicy { - /// Auto-apply idempotent operations. - pub auto_apply_idempotent: bool, - - /// Auto-apply after N similar approvals for the same operation type. - pub learn_threshold: Option, - - /// Maximum edits to auto-apply per session. - pub max_auto_apply: Option, - - /// Allowed operation types for auto-apply. - /// If empty, all operation types are considered. - pub allowed_operations: Vec, -} - -impl CustomPolicy { - /// Creates a custom policy that auto-applies idempotent operations. - pub fn idempotent_only() -> Self { - Self { - auto_apply_idempotent: true, - learn_threshold: None, - max_auto_apply: None, - allowed_operations: Vec::new(), - } - } - - /// Creates a custom policy that learns from approvals. - pub fn learning(threshold: usize) -> Self { - Self { - auto_apply_idempotent: true, - learn_threshold: Some(threshold), - max_auto_apply: None, - allowed_operations: Vec::new(), - } - } - - /// Sets the maximum number of auto-applied edits. - pub fn with_max_auto_apply(mut self, max: usize) -> Self { - self.max_auto_apply = Some(max); - self - } - - /// Sets the allowed operation types. - pub fn with_allowed_operations(mut self, operations: Vec) -> Self { - self.allowed_operations = operations; - self - } - - /// Determines if an edit should be auto-applied. - pub fn should_auto_apply(&self, context: &AutoApplyContext) -> bool { - // Check max auto-apply limit - if let Some(max) = self.max_auto_apply - && context.auto_applied_count >= max - { - return false; - } - - // Check if operation type is allowed (empty = all allowed) - if !self.allowed_operations.is_empty() - && !self.allowed_operations.contains(&context.operation_type) - { - return false; - } - - // Check idempotent rule - if self.auto_apply_idempotent && context.is_idempotent { - return true; - } - - // Check learn threshold - if let Some(threshold) = self.learn_threshold - && context.approval_count_for_type >= threshold - { - return true; - } - - false - } -} - -impl Default for CustomPolicy { - fn default() -> Self { - Self { - auto_apply_idempotent: true, - learn_threshold: Some(2), - max_auto_apply: Some(10), - allowed_operations: Vec::new(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn context_for(op_type: &str) -> AutoApplyContext { - AutoApplyContext::new(op_type) - } - - #[test] - fn require_approval_never_auto_applies() { - let policy = ApplyPolicy::require_approval(); - let context = context_for("replace") - .with_idempotent(true) - .with_approval_count(10); - - assert!(!policy.should_auto_apply(&context)); - } - - #[test] - fn idempotent_only_checks_idempotency() { - let policy = ApplyPolicy::idempotent_only(); - - let idempotent = context_for("replace").with_idempotent(true); - let non_idempotent = context_for("replace").with_idempotent(false); - - assert!(policy.should_auto_apply(&idempotent)); - assert!(!policy.should_auto_apply(&non_idempotent)); - } - - #[test] - fn learn_from_approval() { - let policy = ApplyPolicy::learn_from_approval(); - - // Non-idempotent with no approvals - should not auto-apply - let no_approvals = context_for("delete"); - assert!(!policy.should_auto_apply(&no_approvals)); - - // Non-idempotent with approvals - should auto-apply - let with_approvals = context_for("delete").with_approval_count(1); - assert!(policy.should_auto_apply(&with_approvals)); - - // Idempotent without approvals - should still auto-apply - let idempotent = context_for("insert").with_idempotent(true); - assert!(policy.should_auto_apply(&idempotent)); - } - - #[test] - fn auto_apply_all() { - let policy = ApplyPolicy::auto_apply_all(); - let context = context_for("delete"); - - assert!(policy.should_auto_apply(&context)); - } - - #[test] - fn custom_policy_max_limit() { - let policy = CustomPolicy::default().with_max_auto_apply(5); - - // Under limit - let under_limit = context_for("replace") - .with_idempotent(true) - .with_auto_applied_count(4); - assert!(policy.should_auto_apply(&under_limit)); - - // At limit - let at_limit = context_for("replace") - .with_idempotent(true) - .with_auto_applied_count(5); - assert!(!policy.should_auto_apply(&at_limit)); - } - - #[test] - fn custom_policy_learn_threshold() { - let policy = CustomPolicy::learning(3); - - // Below threshold - let below = context_for("delete").with_approval_count(2); - assert!(!policy.should_auto_apply(&below)); - - // At threshold - let at_threshold = context_for("delete").with_approval_count(3); - assert!(policy.should_auto_apply(&at_threshold)); - } - - #[test] - fn custom_policy_allowed_operations() { - let policy = CustomPolicy::idempotent_only() - .with_allowed_operations(vec!["insert".to_string(), "replace".to_string()]); - - // Allowed operation - let allowed = context_for("insert").with_idempotent(true); - assert!(policy.should_auto_apply(&allowed)); - - // Disallowed operation - let disallowed = context_for("delete").with_idempotent(true); - assert!(!policy.should_auto_apply(&disallowed)); - } - - #[test] - fn approval_history_tracking() { - let mut history = ApprovalHistory::new(); - - assert_eq!(history.approval_count("replace"), 0); - - history.record_approval("replace"); - assert_eq!(history.approval_count("replace"), 1); - - history.record_approval("replace"); - history.record_approval("insert"); - assert_eq!(history.approval_count("replace"), 2); - assert_eq!(history.approval_count("insert"), 1); - assert_eq!(history.approval_count("delete"), 0); - } -} diff --git a/crates/nvisy-rig/src/session/store.rs b/crates/nvisy-rig/src/session/store.rs deleted file mode 100644 index d8700a3..0000000 --- a/crates/nvisy-rig/src/session/store.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! Session storage backed by NATS KV. -//! -//! This module provides session persistence using the NATS KV store -//! from nvisy-nats. Sessions are automatically expired based on TTL. - -use std::time::Duration; - -use derive_more::{Deref, DerefMut}; -use nvisy_nats::NatsClient; -use nvisy_nats::kv::ChatHistoryStore; - -use super::Session; -use crate::Result; - -/// Session store backed by NATS KV. -/// -/// This is a thin wrapper around `nvisy_nats::kv::ChatHistoryStore` -/// that provides session persistence for rig agents. -/// -/// This type is cheap to clone and can be shared across threads. -#[derive(Clone, Deref, DerefMut)] -pub struct SessionStore { - #[deref] - #[deref_mut] - inner: ChatHistoryStore, -} - -impl SessionStore { - /// Creates a new session store with default TTL (30 minutes). - pub async fn new(nats: NatsClient) -> Result { - let inner = nats - .chat_history_store(None) - .await - .map_err(|e| crate::Error::session(format!("failed to create store: {e}")))?; - Ok(Self { inner }) - } - - /// Creates a session store with custom TTL. - pub async fn with_ttl(nats: NatsClient, ttl: Duration) -> Result { - let inner = nats - .chat_history_store(Some(ttl)) - .await - .map_err(|e| crate::Error::session(format!("failed to create store: {e}")))?; - Ok(Self { inner }) - } - - /// Creates a new session. - pub async fn create(&self, session: &Session) -> Result<()> { - self.inner - .create(session.id(), session) - .await - .map_err(|e| crate::Error::session(format!("failed to create: {e}"))) - } - - /// Gets a session by ID. - pub async fn get(&self, session_id: uuid::Uuid) -> Result> { - self.inner - .get(session_id) - .await - .map_err(|e| crate::Error::session(format!("failed to get: {e}"))) - } - - /// Updates an existing session (also resets TTL). - pub async fn update(&self, session: &Session) -> Result<()> { - self.inner - .update(session.id(), session) - .await - .map_err(|e| crate::Error::session(format!("failed to update: {e}"))) - } - - /// Touches a session to reset its TTL. - pub async fn touch(&self, session_id: uuid::Uuid) -> Result<()> { - self.inner - .touch(session_id) - .await - .map_err(|e| crate::Error::session(format!("failed to touch: {e}"))) - } - - /// Deletes a session. - pub async fn delete(&self, session_id: uuid::Uuid) -> Result<()> { - self.inner - .delete(session_id) - .await - .map_err(|e| crate::Error::session(format!("failed to delete: {e}"))) - } -} diff --git a/crates/nvisy-rig/src/tool/definition.rs b/crates/nvisy-rig/src/tool/definition.rs deleted file mode 100644 index 7cd7b04..0000000 --- a/crates/nvisy-rig/src/tool/definition.rs +++ /dev/null @@ -1,312 +0,0 @@ -//! Tool definitions and schemas. - -use serde::{Deserialize, Serialize}; -use serde_json::Value; - -/// Definition of a tool available to the agent. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolDefinition { - /// Unique name of the tool. - name: String, - - /// Human-readable description. - description: String, - - /// JSON Schema for the tool's parameters. - parameters: Value, - - /// Whether the tool is idempotent. - idempotent: bool, - - /// Whether the tool requires user confirmation. - requires_confirmation: bool, -} - -impl ToolDefinition { - /// Creates a new tool definition. - pub fn new(name: impl Into, description: impl Into, parameters: Value) -> Self { - Self { - name: name.into(), - description: description.into(), - parameters, - idempotent: false, - requires_confirmation: false, - } - } - - /// Marks the tool as idempotent. - pub fn idempotent(mut self) -> Self { - self.idempotent = true; - self - } - - /// Marks the tool as requiring confirmation. - pub fn with_confirmation(mut self) -> Self { - self.requires_confirmation = true; - self - } - - /// Returns the tool name. - pub fn name(&self) -> &str { - &self.name - } - - /// Returns the tool description. - pub fn description(&self) -> &str { - &self.description - } - - /// Returns the parameter schema. - pub fn parameters(&self) -> &Value { - &self.parameters - } - - /// Returns whether the tool is idempotent. - pub fn is_idempotent(&self) -> bool { - self.idempotent - } - - /// Returns whether the tool requires confirmation. - pub fn requires_confirmation(&self) -> bool { - self.requires_confirmation - } - - /// Converts to OpenAI function format. - pub fn to_openai_function(&self) -> Value { - serde_json::json!({ - "type": "function", - "function": { - "name": self.name, - "description": self.description, - "parameters": self.parameters - } - }) - } - - /// Converts to Anthropic tool format. - pub fn to_anthropic_tool(&self) -> Value { - serde_json::json!({ - "name": self.name, - "description": self.description, - "input_schema": self.parameters - }) - } -} - -/// Builder for common tool definitions. -pub struct ToolBuilder; - -impl ToolBuilder { - /// Creates the search tool definition. - pub fn search() -> ToolDefinition { - ToolDefinition::new( - "search", - "Search for content within the document", - serde_json::json!({ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "The search query" - }, - "max_results": { - "type": "integer", - "description": "Maximum number of results to return", - "default": 5 - } - }, - "required": ["query"] - }), - ) - .idempotent() - } - - /// Creates the read tool definition. - pub fn read() -> ToolDefinition { - ToolDefinition::new( - "read", - "Read a specific section or page of the document", - serde_json::json!({ - "type": "object", - "properties": { - "section": { - "type": "string", - "description": "Section identifier (page number, heading, etc.)" - }, - "range": { - "type": "object", - "properties": { - "start": { "type": "integer" }, - "end": { "type": "integer" } - }, - "description": "Page range to read" - } - } - }), - ) - .idempotent() - } - - /// Creates the extract tool definition. - pub fn extract() -> ToolDefinition { - ToolDefinition::new( - "extract", - "Extract a specific element from the document (table, figure, etc.)", - serde_json::json!({ - "type": "object", - "properties": { - "element_type": { - "type": "string", - "enum": ["table", "figure", "section", "list", "code"], - "description": "Type of element to extract" - }, - "identifier": { - "type": "string", - "description": "Element identifier (e.g., 'Table 12.6', 'Figure 3')" - }, - "format": { - "type": "string", - "enum": ["markdown", "json", "csv", "text"], - "description": "Output format for the extracted content" - } - }, - "required": ["element_type", "identifier"] - }), - ) - .idempotent() - } - - /// Creates the edit tool definition. - pub fn edit() -> ToolDefinition { - ToolDefinition::new( - "edit", - "Edit content in the document", - serde_json::json!({ - "type": "object", - "properties": { - "location": { - "type": "object", - "properties": { - "page": { "type": "integer" }, - "section": { "type": "string" }, - "offset": { "type": "integer" } - }, - "description": "Location of the content to edit" - }, - "original": { - "type": "string", - "description": "Original content to replace" - }, - "replacement": { - "type": "string", - "description": "New content" - }, - "reason": { - "type": "string", - "description": "Reason for the edit" - } - }, - "required": ["location", "original", "replacement"] - }), - ) - .with_confirmation() - } - - /// Creates the insert tool definition. - pub fn insert() -> ToolDefinition { - ToolDefinition::new( - "insert", - "Insert new content into the document", - serde_json::json!({ - "type": "object", - "properties": { - "location": { - "type": "object", - "properties": { - "page": { "type": "integer" }, - "section": { "type": "string" }, - "position": { - "type": "string", - "enum": ["before", "after", "start", "end"] - } - }, - "description": "Where to insert the content" - }, - "content": { - "type": "string", - "description": "Content to insert" - }, - "reason": { - "type": "string", - "description": "Reason for the insertion" - } - }, - "required": ["location", "content"] - }), - ) - .with_confirmation() - } - - /// Creates the redact tool definition. - pub fn redact() -> ToolDefinition { - ToolDefinition::new( - "redact", - "Redact sensitive information from the document", - serde_json::json!({ - "type": "object", - "properties": { - "pattern": { - "type": "string", - "description": "Pattern to match for redaction (regex supported)" - }, - "category": { - "type": "string", - "enum": ["pii", "financial", "medical", "legal", "custom"], - "description": "Category of information to redact" - }, - "replacement": { - "type": "string", - "description": "Replacement text (default: [REDACTED])" - }, - "preview": { - "type": "boolean", - "description": "If true, return matches without redacting", - "default": false - } - }, - "required": ["category"] - }), - ) - .with_confirmation() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tool_builder_search() { - let tool = ToolBuilder::search(); - assert_eq!(tool.name(), "search"); - assert!(tool.is_idempotent()); - assert!(!tool.requires_confirmation()); - } - - #[test] - fn tool_builder_edit() { - let tool = ToolBuilder::edit(); - assert_eq!(tool.name(), "edit"); - assert!(!tool.is_idempotent()); - assert!(tool.requires_confirmation()); - } - - #[test] - fn tool_to_openai_format() { - let tool = ToolBuilder::search(); - let openai = tool.to_openai_function(); - - assert_eq!(openai["type"], "function"); - assert_eq!(openai["function"]["name"], "search"); - } -} diff --git a/crates/nvisy-rig/src/tool/edit/mod.rs b/crates/nvisy-rig/src/tool/edit/mod.rs deleted file mode 100644 index db9aa38..0000000 --- a/crates/nvisy-rig/src/tool/edit/mod.rs +++ /dev/null @@ -1,102 +0,0 @@ -//! Edit module for document modifications. -//! -//! This module handles: -//! - Proposed edits from the agent -//! - Edit approval and rejection -//! - Edit application to documents -//! - Edit preview generation - -mod operation; -mod proposed; - -pub use operation::{EditLocation, EditOperation}; -pub use proposed::ProposedEdit; -use uuid::Uuid; - -/// Result of applying edits. -#[derive(Debug, Clone)] -pub struct ApplyResult { - /// Successfully applied edits. - pub applied: Vec, - - /// Edits that were not found. - pub not_found: Vec, - - /// Edits that failed to apply. - pub errors: Vec, -} - -impl ApplyResult { - /// Returns whether all edits were applied successfully. - pub fn is_success(&self) -> bool { - self.not_found.is_empty() && self.errors.is_empty() - } - - /// Returns the count of successfully applied edits. - pub fn applied_count(&self) -> usize { - self.applied.len() - } - - /// Returns the count of failed edits. - pub fn failed_count(&self) -> usize { - self.not_found.len() + self.errors.len() - } -} - -/// Error that occurred while applying an edit. -#[derive(Debug, Clone)] -pub struct ApplyError { - /// The edit ID that failed. - pub edit_id: Uuid, - - /// Error message. - pub message: String, - - /// Whether the error is recoverable. - pub recoverable: bool, -} - -impl ApplyError { - /// Creates a new apply error. - pub fn new(edit_id: Uuid, message: impl Into) -> Self { - Self { - edit_id, - message: message.into(), - recoverable: false, - } - } - - /// Marks the error as recoverable. - pub fn recoverable(mut self) -> Self { - self.recoverable = true; - self - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn apply_result_success() { - let result = ApplyResult { - applied: vec![], - not_found: vec![], - errors: vec![], - }; - - assert!(result.is_success()); - } - - #[test] - fn apply_result_with_errors() { - let result = ApplyResult { - applied: vec![], - not_found: vec![Uuid::now_v7()], - errors: vec![], - }; - - assert!(!result.is_success()); - assert_eq!(result.failed_count(), 1); - } -} diff --git a/crates/nvisy-rig/src/tool/edit/operation.rs b/crates/nvisy-rig/src/tool/edit/operation.rs deleted file mode 100644 index a03ec99..0000000 --- a/crates/nvisy-rig/src/tool/edit/operation.rs +++ /dev/null @@ -1,164 +0,0 @@ -//! Edit operations and locations. - -use serde::{Deserialize, Serialize}; - -/// Location within a document for an edit. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct EditLocation { - /// Page number (1-indexed). - pub page: Option, - - /// Section identifier. - pub section: Option, - - /// Character offset from start. - pub offset: Option, - - /// Length of the affected region. - pub length: Option, -} - -impl EditLocation { - /// Creates a page-based location. - pub fn page(page: u32) -> Self { - Self { - page: Some(page), - section: None, - offset: None, - length: None, - } - } - - /// Creates a section-based location. - pub fn section(section: impl Into) -> Self { - Self { - page: None, - section: Some(section.into()), - offset: None, - length: None, - } - } - - /// Creates an offset-based location. - pub fn offset(offset: usize, length: usize) -> Self { - Self { - page: None, - section: None, - offset: Some(offset), - length: Some(length), - } - } - - /// Adds page information. - pub fn with_page(mut self, page: u32) -> Self { - self.page = Some(page); - self - } - - /// Adds section information. - pub fn with_section(mut self, section: impl Into) -> Self { - self.section = Some(section.into()); - self - } - - /// Returns a display string for the location. - pub fn display(&self) -> String { - let mut parts = Vec::new(); - - if let Some(page) = self.page { - parts.push(format!("page {page}")); - } - - if let Some(section) = &self.section { - parts.push(format!("'{section}'")); - } - - if let Some(offset) = self.offset { - if let Some(length) = self.length { - parts.push(format!("offset {offset}..{}", offset + length)); - } else { - parts.push(format!("offset {offset}")); - } - } - - if parts.is_empty() { - "unspecified location".to_string() - } else { - parts.join(", ") - } - } -} - -/// Type of edit operation. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum EditOperation { - /// Replace existing content. - Replace, - - /// Insert new content. - Insert, - - /// Delete content. - Delete, - - /// Redact content (replace with placeholder). - Redact, - - /// Extract content (copy without modifying). - Extract, -} - -impl EditOperation { - /// Returns whether this operation is idempotent. - pub fn is_idempotent(&self) -> bool { - matches!(self, Self::Extract) - } - - /// Returns whether this operation modifies the document. - pub fn is_destructive(&self) -> bool { - matches!( - self, - Self::Replace | Self::Insert | Self::Delete | Self::Redact - ) - } - - /// Returns a human-readable name. - pub fn name(&self) -> &'static str { - match self { - Self::Replace => "replace", - Self::Insert => "insert", - Self::Delete => "delete", - Self::Redact => "redact", - Self::Extract => "extract", - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn edit_location_display() { - let loc = EditLocation::page(5).with_section("Introduction"); - let display = loc.display(); - - assert!(display.contains("page 5")); - assert!(display.contains("Introduction")); - } - - #[test] - fn edit_operation_idempotency() { - assert!(EditOperation::Extract.is_idempotent()); - assert!(!EditOperation::Replace.is_idempotent()); - assert!(!EditOperation::Delete.is_idempotent()); - } - - #[test] - fn edit_operation_destructive() { - assert!(EditOperation::Replace.is_destructive()); - assert!(EditOperation::Delete.is_destructive()); - assert!(!EditOperation::Extract.is_destructive()); - } -} diff --git a/crates/nvisy-rig/src/tool/edit/proposed.rs b/crates/nvisy-rig/src/tool/edit/proposed.rs deleted file mode 100644 index d73c803..0000000 --- a/crates/nvisy-rig/src/tool/edit/proposed.rs +++ /dev/null @@ -1,305 +0,0 @@ -//! Proposed edit types. - -use jiff::Timestamp; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::{EditLocation, EditOperation}; - -/// A proposed edit to a document. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProposedEdit { - /// Unique edit ID. - id: Uuid, - - /// Document being edited. - document_id: Uuid, - - /// Type of operation. - operation: EditOperation, - - /// Location of the edit. - location: EditLocation, - - /// Original content (for replace/delete). - original: Option, - - /// New content (for replace/insert). - replacement: Option, - - /// Reason for the edit. - reason: String, - - /// Preview of the result. - preview: Option, - - /// When the edit was proposed. - proposed_at: Timestamp, - - /// Current status. - status: EditStatus, -} - -impl ProposedEdit { - /// Creates a new proposed edit. - pub fn new( - document_id: Uuid, - operation: EditOperation, - location: EditLocation, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation, - location, - original: None, - replacement: None, - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates a replace edit. - pub fn replace( - document_id: Uuid, - location: EditLocation, - original: impl Into, - replacement: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Replace, - location, - original: Some(original.into()), - replacement: Some(replacement.into()), - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates an insert edit. - pub fn insert( - document_id: Uuid, - location: EditLocation, - content: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Insert, - location, - original: None, - replacement: Some(content.into()), - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates a delete edit. - pub fn delete( - document_id: Uuid, - location: EditLocation, - content: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Delete, - location, - original: Some(content.into()), - replacement: None, - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates a redact edit. - pub fn redact( - document_id: Uuid, - location: EditLocation, - content: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Redact, - location, - original: Some(content.into()), - replacement: Some("[REDACTED]".to_string()), - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Adds a preview. - pub fn with_preview(mut self, preview: impl Into) -> Self { - self.preview = Some(preview.into()); - self - } - - /// Returns the edit ID. - pub fn id(&self) -> Uuid { - self.id - } - - /// Returns the document ID. - pub fn document_id(&self) -> Uuid { - self.document_id - } - - /// Returns the operation type. - pub fn operation(&self) -> EditOperation { - self.operation - } - - /// Returns the operation type as a string. - pub fn operation_type(&self) -> &'static str { - self.operation.name() - } - - /// Returns the location. - pub fn location(&self) -> &EditLocation { - &self.location - } - - /// Returns the original content. - pub fn original(&self) -> Option<&str> { - self.original.as_deref() - } - - /// Returns the replacement content. - pub fn replacement(&self) -> Option<&str> { - self.replacement.as_deref() - } - - /// Returns the reason. - pub fn reason(&self) -> &str { - &self.reason - } - - /// Returns the preview. - pub fn preview(&self) -> Option<&str> { - self.preview.as_deref() - } - - /// Returns when the edit was proposed. - pub fn proposed_at(&self) -> Timestamp { - self.proposed_at - } - - /// Returns the current status. - pub fn status(&self) -> EditStatus { - self.status - } - - /// Returns whether this operation is idempotent. - pub fn is_idempotent(&self) -> bool { - self.operation.is_idempotent() - } - - /// Returns whether this edit is pending. - pub fn is_pending(&self) -> bool { - self.status == EditStatus::Pending - } - - /// Marks the edit as applied. - pub fn mark_applied(&mut self) { - self.status = EditStatus::Applied; - } - - /// Marks the edit as rejected. - pub fn mark_rejected(&mut self) { - self.status = EditStatus::Rejected; - } - - /// Returns a summary of the edit for display. - pub fn summary(&self) -> String { - format!( - "{} at {}: {}", - self.operation.name(), - self.location.display(), - self.reason - ) - } -} - -/// Status of a proposed edit. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum EditStatus { - /// Awaiting user approval. - Pending, - - /// Approved and applied. - Applied, - - /// Rejected by user. - Rejected, - - /// Failed to apply. - Failed, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn proposed_edit_replace() { - let edit = ProposedEdit::replace( - Uuid::now_v7(), - EditLocation::page(1), - "old text", - "new text", - "fixing typo", - ); - - assert_eq!(edit.operation(), EditOperation::Replace); - assert_eq!(edit.original(), Some("old text")); - assert_eq!(edit.replacement(), Some("new text")); - assert!(edit.is_pending()); - } - - #[test] - fn proposed_edit_redact() { - let edit = ProposedEdit::redact( - Uuid::now_v7(), - EditLocation::page(5), - "SSN: 123-45-6789", - "removing PII", - ); - - assert_eq!(edit.operation(), EditOperation::Redact); - assert_eq!(edit.replacement(), Some("[REDACTED]")); - } - - #[test] - fn proposed_edit_summary() { - let edit = ProposedEdit::delete( - Uuid::now_v7(), - EditLocation::section("Appendix"), - "old content", - "removing outdated section", - ); - - let summary = edit.summary(); - assert!(summary.contains("delete")); - assert!(summary.contains("Appendix")); - } -} diff --git a/crates/nvisy-rig/src/tool/mod.rs b/crates/nvisy-rig/src/tool/mod.rs deleted file mode 100644 index 8bca68a..0000000 --- a/crates/nvisy-rig/src/tool/mod.rs +++ /dev/null @@ -1,148 +0,0 @@ -//! Tool module for agent capabilities. -//! -//! This module provides tool support for agents, building on rig-core's -//! tool infrastructure while adding document-specific tools and registry. -//! -//! ## Rig-core integration -//! -//! We re-export key types from rig-core: -//! - [`rig::tool::Tool`] - The core tool trait -//! - [`rig::tool::ToolDyn`] - Dynamic dispatch wrapper -//! - [`rig::completion::ToolDefinition`] - Tool schema definition -//! -//! ## Document tools -//! -//! Pre-built tools for document processing: -//! - `search` - Search document content -//! - `read` - Read specific sections -//! - `extract` - Extract elements (tables, figures) -//! - `edit` - Modify document content -//! - `insert` - Add new content -//! - `redact` - Redact sensitive information -//! -//! ## Submodules -//! -//! - [`edit`] - Proposed edits and edit operations - -mod definition; -pub mod edit; -mod registry; -mod types; - -// Re-export rig-core tool types -// Our extensions -pub use definition::ToolDefinition; -pub use registry::ToolRegistry; -pub use rig::tool::{Tool, ToolDyn, ToolError}; -use serde::{Deserialize, Serialize}; -pub use types::{ToolInput, ToolOutput}; -use uuid::Uuid; - -/// A tool call made by the agent. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolCall { - /// Unique ID for this call. - pub id: Uuid, - - /// Name of the tool being called. - pub name: String, - - /// Arguments to the tool (JSON). - pub arguments: serde_json::Value, -} - -impl ToolCall { - /// Creates a new tool call. - pub fn new(name: impl Into, arguments: serde_json::Value) -> Self { - Self { - id: Uuid::now_v7(), - name: name.into(), - arguments, - } - } - - /// Returns whether this tool call is idempotent. - pub fn is_idempotent(&self) -> bool { - matches!(self.name.as_str(), "search" | "extract" | "read") - } - - /// Returns the arguments as a JSON string. - pub fn arguments_string(&self) -> String { - serde_json::to_string(&self.arguments).unwrap_or_default() - } -} - -/// Result of a tool execution. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolResult { - /// The call ID this result is for. - pub call_id: Uuid, - - /// Whether the call succeeded. - pub success: bool, - - /// Output from the tool. - pub output: ToolOutput, - - /// Error message if failed. - pub error: Option, -} - -impl ToolResult { - /// Creates a successful result. - pub fn success(call_id: Uuid, output: ToolOutput) -> Self { - Self { - call_id, - success: true, - output, - error: None, - } - } - - /// Creates a failed result. - pub fn failure(call_id: Uuid, error: impl Into) -> Self { - Self { - call_id, - success: false, - output: ToolOutput::empty(), - error: Some(error.into()), - } - } - - /// Creates a result from a rig tool error. - pub fn from_error(call_id: Uuid, error: ToolError) -> Self { - Self::failure(call_id, error.to_string()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tool_call_idempotency() { - let search = ToolCall::new("search", serde_json::json!({"query": "test"})); - let edit = ToolCall::new("edit", serde_json::json!({"content": "new"})); - - assert!(search.is_idempotent()); - assert!(!edit.is_idempotent()); - } - - #[test] - fn tool_result_success() { - let call_id = Uuid::now_v7(); - let result = ToolResult::success(call_id, ToolOutput::text("done")); - - assert!(result.success); - assert!(result.error.is_none()); - } - - #[test] - fn tool_result_failure() { - let call_id = Uuid::now_v7(); - let result = ToolResult::failure(call_id, "something went wrong"); - - assert!(!result.success); - assert_eq!(result.error, Some("something went wrong".to_string())); - } -} diff --git a/crates/nvisy-rig/src/tool/registry.rs b/crates/nvisy-rig/src/tool/registry.rs deleted file mode 100644 index 612bb99..0000000 --- a/crates/nvisy-rig/src/tool/registry.rs +++ /dev/null @@ -1,152 +0,0 @@ -//! Tool registry for managing available tools. - -use std::collections::HashMap; -use std::sync::Arc; - -use super::{ToolCall, ToolDefinition, ToolInput, ToolOutput, ToolResult}; -use crate::Result; - -/// Handler function for tool execution. -pub type ToolHandler = - Arc futures::future::BoxFuture<'static, Result> + Send + Sync>; - -/// Registry of available tools. -#[derive(Default)] -pub struct ToolRegistry { - definitions: HashMap, - handlers: HashMap, -} - -impl ToolRegistry { - /// Creates a new empty registry. - pub fn new() -> Self { - Self::default() - } - - /// Creates a registry with default tools. - pub fn with_defaults() -> Self { - use super::definition::ToolBuilder; - - let mut registry = Self::new(); - - // Register default tool definitions - registry.register_definition(ToolBuilder::search()); - registry.register_definition(ToolBuilder::read()); - registry.register_definition(ToolBuilder::extract()); - registry.register_definition(ToolBuilder::edit()); - registry.register_definition(ToolBuilder::insert()); - registry.register_definition(ToolBuilder::redact()); - - registry - } - - /// Registers a tool definition. - pub fn register_definition(&mut self, definition: ToolDefinition) { - self.definitions - .insert(definition.name().to_string(), definition); - } - - /// Registers a tool handler. - pub fn register_handler(&mut self, name: impl Into, handler: ToolHandler) { - self.handlers.insert(name.into(), handler); - } - - /// Registers both definition and handler. - pub fn register(&mut self, definition: ToolDefinition, handler: ToolHandler) { - let name = definition.name().to_string(); - self.definitions.insert(name.clone(), definition); - self.handlers.insert(name, handler); - } - - /// Returns a tool definition by name. - pub fn get_definition(&self, name: &str) -> Option<&ToolDefinition> { - self.definitions.get(name) - } - - /// Returns all tool definitions. - pub fn definitions(&self) -> impl Iterator { - self.definitions.values() - } - - /// Returns all tool definitions as a vector. - pub fn definitions_vec(&self) -> Vec { - self.definitions.values().cloned().collect() - } - - /// Returns whether a tool exists. - pub fn has_tool(&self, name: &str) -> bool { - self.definitions.contains_key(name) - } - - /// Returns whether a tool has a handler. - pub fn has_handler(&self, name: &str) -> bool { - self.handlers.contains_key(name) - } - - /// Executes a tool call. - pub async fn execute(&self, call: &ToolCall) -> ToolResult { - let Some(handler) = self.handlers.get(&call.name) else { - return ToolResult::failure(call.id, format!("tool '{}' not found", call.name)); - }; - - let input = ToolInput { - call_id: call.id, - arguments: call.arguments.clone(), - }; - - match handler(input).await { - Ok(output) => ToolResult::success(call.id, output), - Err(e) => ToolResult::failure(call.id, e.to_string()), - } - } - - /// Returns the number of registered tools. - pub fn len(&self) -> usize { - self.definitions.len() - } - - /// Returns whether the registry is empty. - pub fn is_empty(&self) -> bool { - self.definitions.is_empty() - } -} - -impl std::fmt::Debug for ToolRegistry { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ToolRegistry") - .field("definitions", &self.definitions.keys().collect::>()) - .field("handlers", &self.handlers.keys().collect::>()) - .finish() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn registry_with_defaults() { - let registry = ToolRegistry::with_defaults(); - - assert!(registry.has_tool("search")); - assert!(registry.has_tool("read")); - assert!(registry.has_tool("extract")); - assert!(registry.has_tool("edit")); - assert!(registry.has_tool("insert")); - assert!(registry.has_tool("redact")); - } - - #[test] - fn registry_register_definition() { - let mut registry = ToolRegistry::new(); - - registry.register_definition(ToolDefinition::new( - "custom", - "A custom tool", - serde_json::json!({}), - )); - - assert!(registry.has_tool("custom")); - assert!(!registry.has_handler("custom")); - } -} diff --git a/crates/nvisy-rig/src/tool/types.rs b/crates/nvisy-rig/src/tool/types.rs deleted file mode 100644 index c7a3a44..0000000 --- a/crates/nvisy-rig/src/tool/types.rs +++ /dev/null @@ -1,183 +0,0 @@ -//! Tool input/output types. - -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use uuid::Uuid; - -/// Input to a tool execution. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolInput { - /// The call ID. - pub call_id: Uuid, - - /// Arguments from the tool call. - pub arguments: Value, -} - -impl ToolInput { - /// Gets a string argument. - pub fn get_string(&self, key: &str) -> Option<&str> { - self.arguments.get(key).and_then(|v| v.as_str()) - } - - /// Gets an integer argument. - pub fn get_i64(&self, key: &str) -> Option { - self.arguments.get(key).and_then(|v| v.as_i64()) - } - - /// Gets a boolean argument. - pub fn get_bool(&self, key: &str) -> Option { - self.arguments.get(key).and_then(|v| v.as_bool()) - } - - /// Gets an object argument. - pub fn get_object(&self, key: &str) -> Option<&serde_json::Map> { - self.arguments.get(key).and_then(|v| v.as_object()) - } - - /// Gets an array argument. - pub fn get_array(&self, key: &str) -> Option<&Vec> { - self.arguments.get(key).and_then(|v| v.as_array()) - } - - /// Deserializes the arguments to a typed struct. - pub fn parse Deserialize<'de>>(&self) -> Result { - serde_json::from_value(self.arguments.clone()) - } -} - -/// Output from a tool execution. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ToolOutput { - /// Plain text output. - Text { content: String }, - - /// JSON output. - Json { data: Value }, - - /// Binary data (base64 encoded). - Binary { data: String, mime_type: String }, - - /// Proposed edit output. - Edit { - edit_id: Uuid, - description: String, - preview: Option, - }, - - /// Multiple outputs. - Multiple { outputs: Vec }, - - /// Empty output. - Empty, -} - -impl ToolOutput { - /// Creates a text output. - pub fn text(content: impl Into) -> Self { - Self::Text { - content: content.into(), - } - } - - /// Creates a JSON output. - pub fn json(data: Value) -> Self { - Self::Json { data } - } - - /// Creates a binary output. - pub fn binary(data: impl Into, mime_type: impl Into) -> Self { - Self::Binary { - data: data.into(), - mime_type: mime_type.into(), - } - } - - /// Creates an edit output. - pub fn edit(edit_id: Uuid, description: impl Into) -> Self { - Self::Edit { - edit_id, - description: description.into(), - preview: None, - } - } - - /// Creates an edit output with preview. - pub fn edit_with_preview( - edit_id: Uuid, - description: impl Into, - preview: impl Into, - ) -> Self { - Self::Edit { - edit_id, - description: description.into(), - preview: Some(preview.into()), - } - } - - /// Creates a multiple output. - pub fn multiple(outputs: Vec) -> Self { - Self::Multiple { outputs } - } - - /// Creates an empty output. - pub fn empty() -> Self { - Self::Empty - } - - /// Converts to a string representation for the LLM. - pub fn to_llm_string(&self) -> String { - match self { - Self::Text { content } => content.clone(), - Self::Json { data } => serde_json::to_string_pretty(data).unwrap_or_default(), - Self::Binary { mime_type, .. } => format!("[Binary data: {mime_type}]"), - Self::Edit { - edit_id, - description, - preview, - } => { - if let Some(p) = preview { - format!("[Edit proposed: {edit_id}]\n{description}\n\nPreview:\n{p}") - } else { - format!("[Edit proposed: {edit_id}]\n{description}") - } - } - Self::Multiple { outputs } => outputs - .iter() - .map(|o| o.to_llm_string()) - .collect::>() - .join("\n---\n"), - Self::Empty => "[No output]".to_string(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tool_input_get_string() { - let input = ToolInput { - call_id: Uuid::now_v7(), - arguments: serde_json::json!({ - "query": "test", - "count": 5 - }), - }; - - assert_eq!(input.get_string("query"), Some("test")); - assert_eq!(input.get_i64("count"), Some(5)); - assert_eq!(input.get_string("missing"), None); - } - - #[test] - fn tool_output_to_llm_string() { - let text = ToolOutput::text("hello"); - assert_eq!(text.to_llm_string(), "hello"); - - let empty = ToolOutput::empty(); - assert_eq!(empty.to_llm_string(), "[No output]"); - } -} diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml new file mode 100644 index 0000000..660b2c4 --- /dev/null +++ b/crates/nvisy-runtime/Cargo.toml @@ -0,0 +1,64 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-runtime" +description = "Workflow definitions and execution engine for Nvisy pipelines" +readme = "./README.md" +keywords = ["workflow", "runtime", "engine", "pipeline", "execution"] +categories = ["development-tools", "algorithms"] + +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +# Internal crates +nvisy-core = { workspace = true } +nvisy-dal = { workspace = true } +nvisy-rig = { workspace = true } + +# Runtime crates +nvisy-rt-core = { workspace = true } +nvisy-rt-engine = { workspace = true } + +# Async runtime +tokio = { workspace = true, features = ["rt", "sync", "time"] } +futures = { workspace = true, features = [] } + +# Observability +tracing = { workspace = true, features = [] } + +# (De)serialization +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = [] } + +# Derive macros & utilities +async-trait = { workspace = true, features = [] } +thiserror = { workspace = true, features = [] } +derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } +derive_builder = { workspace = true, features = [] } +strum = { workspace = true, features = ["derive"] } + +# Data types +uuid = { workspace = true, features = ["v7", "serde"] } +jiff = { workspace = true, features = ["serde"] } + +# Graph data structures +petgraph = { workspace = true, features = [] } + +# Versioning +semver = { workspace = true, features = [] } + +[dev-dependencies] +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-runtime/README.md b/crates/nvisy-runtime/README.md new file mode 100644 index 0000000..6dac0c7 --- /dev/null +++ b/crates/nvisy-runtime/README.md @@ -0,0 +1,61 @@ +# nvisy-runtime + +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + +Workflow definitions and execution engine for Nvisy pipelines. + +This crate provides the core abstractions for defining and executing +data processing workflows as directed acyclic graphs (DAGs). + +## Architecture + +### Definition vs Compiled Types + +The crate separates workflow representation into two layers: + +- **Definition types** (`definition`): JSON-serializable types for + storing, editing, and transmitting workflows. These include `Workflow`, + `Node`, `NodeKind`, `Input`, `Output`, and `CacheSlot`. + +- **Compiled types** (`graph`): Runtime-optimized types for execution. + These include `CompiledGraph`, `CompiledNode`, and processor types like + `EmbeddingProcessor` and `EnrichProcessor`. + +Use the `Engine` to compile definitions and execute workflows. + +## Example + +```rust,ignore +use nvisy_runtime::definition::{ + Input, Node, NodeKind, Output, Workflow, +}; +use nvisy_runtime::engine::Engine; +use nvisy_runtime::provider::CredentialsRegistry; + +// Create a workflow definition +let mut workflow = Workflow::new(); + +// Add input, transform, and output nodes... +// Connect nodes with edges... + +// Execute the workflow +let engine = Engine::with_defaults(); +let registry = CredentialsRegistry::default(); +let ctx = nvisy_dal::core::Context::default(); +let result = engine.execute(workflow, registry, ctx).await?; +``` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-runtime/src/definition/edge.rs b/crates/nvisy-runtime/src/definition/edge.rs new file mode 100644 index 0000000..aaca631 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/edge.rs @@ -0,0 +1,58 @@ +//! Edge types for connecting nodes in a workflow graph. + +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +use super::NodeId; + +/// An edge connecting two nodes in the workflow graph. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Builder)] +#[builder( + name = "EdgeBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] +pub struct Edge { + /// Source node ID. + pub from: NodeId, + /// Target node ID. + pub to: NodeId, + /// Optional port/slot name on the source node. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub from_port: Option, + /// Optional port/slot name on the target node. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub to_port: Option, +} + +impl EdgeBuilder { + fn validate(&self) -> Result<(), String> { + if self.from.is_none() { + return Err("from is required".into()); + } + if self.to.is_none() { + return Err("to is required".into()); + } + Ok(()) + } +} + +impl Edge { + /// Creates a new edge between two nodes. + pub fn new(from: NodeId, to: NodeId) -> Self { + Self { + from, + to, + from_port: None, + to_port: None, + } + } + + /// Returns a builder for creating an edge. + pub fn builder() -> EdgeBuilder { + EdgeBuilder::default() + } +} diff --git a/crates/nvisy-runtime/src/definition/input.rs b/crates/nvisy-runtime/src/definition/input.rs new file mode 100644 index 0000000..b8126fa --- /dev/null +++ b/crates/nvisy-runtime/src/definition/input.rs @@ -0,0 +1,34 @@ +//! Input node definition types. + +use serde::{Deserialize, Serialize}; + +use super::route::CacheSlot; + +/// Input node definition - source of data for the workflow. +/// +/// Storage provider inputs (S3, Postgres, etc.) are handled externally via Python. +/// This enum only supports cache slots for internal workflow data flow. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "source", rename_all = "snake_case")] +pub enum Input { + /// Read from named cache slot (resolved at compile time). + CacheSlot(CacheSlot), +} + +impl Input { + /// Creates a new input from a cache slot. + pub fn from_cache(slot: impl Into) -> Self { + Self::CacheSlot(CacheSlot { + slot: slot.into(), + priority: None, + }) + } + + /// Creates a new input from a cache slot with priority. + pub fn from_cache_with_priority(slot: impl Into, priority: u32) -> Self { + Self::CacheSlot(CacheSlot { + slot: slot.into(), + priority: Some(priority), + }) + } +} diff --git a/crates/nvisy-runtime/src/definition/metadata.rs b/crates/nvisy-runtime/src/definition/metadata.rs new file mode 100644 index 0000000..7e705cb --- /dev/null +++ b/crates/nvisy-runtime/src/definition/metadata.rs @@ -0,0 +1,60 @@ +//! Workflow metadata. + +use derive_builder::Builder; +use jiff::Timestamp; +use semver::Version; +use serde::{Deserialize, Serialize}; + +/// Workflow metadata. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize, Builder)] +#[builder( + name = "WorkflowMetadataBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] +pub struct WorkflowMetadata { + /// Workflow name (optional). + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub name: Option, + /// Workflow description. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub description: Option, + /// Workflow version (semver, optional). + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub version: Option, + /// Tags for organization. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + #[builder(default)] + pub tags: Vec, + /// Creation timestamp. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub created_at: Option, + /// Last update timestamp. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub updated_at: Option, +} + +impl WorkflowMetadataBuilder { + fn validate(&self) -> Result<(), String> { + // All fields are optional, so validation always succeeds + Ok(()) + } +} + +impl WorkflowMetadata { + /// Creates a new empty metadata. + pub fn new() -> Self { + Self::default() + } + + /// Returns a builder for creating workflow metadata. + pub fn builder() -> WorkflowMetadataBuilder { + WorkflowMetadataBuilder::default() + } +} diff --git a/crates/nvisy-runtime/src/definition/mod.rs b/crates/nvisy-runtime/src/definition/mod.rs new file mode 100644 index 0000000..cbc9206 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/mod.rs @@ -0,0 +1,218 @@ +//! Workflow definition types. +//! +//! This module contains serializable, frontend-friendly types for defining workflows. +//! These types are designed for: +//! - Easy serialization to/from JSON +//! - Frontend consumption and editing +//! - Storage in databases +//! +//! To execute a workflow, definitions must be compiled into runtime types +//! using the [`crate::engine::Engine`]. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +mod edge; +mod input; +mod metadata; +mod node; +mod output; +mod route; +mod transform; +mod util; + +pub use edge::Edge; +pub use input::Input; +pub use metadata::WorkflowMetadata; +pub use node::{Node, NodeId, NodeKind}; +pub use output::Output; +pub use route::{ + CacheSlot, FileCategory, FileCategoryCondition, LanguageCondition, SwitchCondition, SwitchDef, +}; +pub use transform::{ + AnalyzeTask, Chunk, ChunkStrategy, ConvertTask, Derive, DeriveTask, Embedding, Enrich, + EnrichTask, Extract, ExtractTask, ImageEnrichTask, Partition, PartitionStrategy, + TableConvertTask, TableEnrichTask, TextConvertTask, Transformer, +}; +pub use util::Position; + +/// Serializable workflow definition. +/// +/// This is the JSON-friendly representation of a workflow graph. +/// It contains all the information needed to compile and execute a workflow. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Workflow { + /// Nodes in the workflow, keyed by their ID. + pub nodes: HashMap, + /// Edges connecting nodes. + pub edges: Vec, + /// Workflow metadata. + #[serde(default)] + pub metadata: WorkflowMetadata, +} + +impl Workflow { + /// Creates a new empty workflow definition. + pub fn new() -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata: WorkflowMetadata::default(), + } + } + + /// Creates a workflow definition with metadata. + pub fn with_metadata(metadata: WorkflowMetadata) -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata, + } + } + + /// Adds a node to the workflow. + pub fn add_node(&mut self, id: NodeId, node: Node) -> &mut Self { + self.nodes.insert(id, node); + self + } + + /// Adds a node definition with default metadata. + pub fn add_node_def(&mut self, id: NodeId, def: NodeKind) -> &mut Self { + self.nodes.insert(id, Node::new(def)); + self + } + + /// Adds an edge to the workflow. + pub fn add_edge(&mut self, edge: Edge) -> &mut Self { + self.edges.push(edge); + self + } + + /// Adds a simple edge between two nodes. + pub fn connect(&mut self, from: NodeId, to: NodeId) -> &mut Self { + self.edges.push(Edge::new(from, to)); + self + } + + /// Returns an iterator over input nodes. + pub fn input_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_input()) + } + + /// Returns an iterator over output nodes. + pub fn output_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_output()) + } + + /// Returns an iterator over transform nodes. + pub fn transform_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_transform()) + } + + /// Returns an iterator over switch nodes. + pub fn switch_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_switch()) + } +} + +impl Default for Workflow { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use uuid::Uuid; + + use super::*; + + /// Creates a deterministic NodeId for testing. + fn test_node_id(n: u128) -> NodeId { + NodeId::from_uuid(Uuid::from_u128(n)) + } + + fn input_node_cache(slot: &str) -> Node { + Node::new(NodeKind::Input(Input::CacheSlot(CacheSlot { + slot: slot.to_string(), + priority: None, + }))) + } + + fn output_node_cache(slot: &str) -> Node { + Node::new(NodeKind::Output(Output::Cache(CacheSlot { + slot: slot.to_string(), + priority: None, + }))) + } + + fn transform_node_partition() -> Node { + Node::new(NodeKind::Transform(Transformer::Partition(Partition { + strategy: Default::default(), + include_page_breaks: false, + discard_unsupported: false, + }))) + } + + #[test] + fn test_workflow_definition_new() { + let def = Workflow::new(); + assert!(def.nodes.is_empty()); + assert!(def.edges.is_empty()); + } + + #[test] + fn test_workflow_definition_add_node() { + let mut def = Workflow::new(); + let id = test_node_id(1); + def.add_node(id, input_node_cache("test")); + assert_eq!(def.nodes.len(), 1); + assert!(def.nodes.contains_key(&id)); + } + + #[test] + fn test_workflow_definition_connect() { + let mut def = Workflow::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id2); + + assert_eq!(def.edges.len(), 1); + assert_eq!(def.edges[0].from, id1); + assert_eq!(def.edges[0].to, id2); + } + + #[test] + fn test_workflow_definition_node_iterators() { + let mut def = Workflow::new(); + def.add_node(test_node_id(1), input_node_cache("in")) + .add_node(test_node_id(2), transform_node_partition()) + .add_node(test_node_id(3), output_node_cache("out")); + + assert_eq!(def.input_nodes().count(), 1); + assert_eq!(def.transform_nodes().count(), 1); + assert_eq!(def.output_nodes().count(), 1); + } + + #[test] + fn test_workflow_definition_serialization() { + let mut def = Workflow::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id2); + + // Serialize to JSON + let json = serde_json::to_string(&def).expect("serialization failed"); + + // Deserialize back + let deserialized: Workflow = serde_json::from_str(&json).expect("deserialization failed"); + + assert_eq!(def.nodes.len(), deserialized.nodes.len()); + assert_eq!(def.edges.len(), deserialized.edges.len()); + } +} diff --git a/crates/nvisy-runtime/src/definition/node.rs b/crates/nvisy-runtime/src/definition/node.rs new file mode 100644 index 0000000..a2e86a7 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/node.rs @@ -0,0 +1,172 @@ +//! Node definition types. + +use std::str::FromStr; + +use derive_builder::Builder; +use derive_more::{Debug, Display, From, Into}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::input::Input; +use super::output::Output; +use super::route::SwitchDef; +use super::transform::Transformer; +use super::util::Position; + +/// Unique identifier for a node in a workflow graph. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Debug, Display, From, Into)] +#[debug("{_0}")] +#[display("{_0}")] +#[serde(transparent)] +pub struct NodeId(Uuid); + +impl NodeId { + /// Creates a new random node ID. + #[inline] + pub fn new() -> Self { + Self(Uuid::now_v7()) + } + + /// Creates a node ID from an existing UUID. + #[inline] + pub const fn from_uuid(uuid: Uuid) -> Self { + Self(uuid) + } + + /// Returns the underlying UUID. + #[inline] + pub const fn as_uuid(&self) -> Uuid { + self.0 + } + + /// Returns the UUID as bytes. + #[inline] + pub const fn as_bytes(&self) -> &[u8; 16] { + self.0.as_bytes() + } +} + +impl Default for NodeId { + fn default() -> Self { + Self::new() + } +} + +impl FromStr for NodeId { + type Err = uuid::Error; + + fn from_str(s: &str) -> Result { + Ok(Self(Uuid::from_str(s)?)) + } +} + +impl AsRef for NodeId { + fn as_ref(&self) -> &Uuid { + &self.0 + } +} + +/// A workflow node definition with metadata and kind. +/// +/// Nodes are categorized by their role in data flow: +/// - **Input**: Reads/produces data (entry points) +/// - **Transform**: Processes/transforms data (intermediate) +/// - **Output**: Writes/consumes data (exit points) +/// - **Switch**: Routes data based on conditions +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)] +#[builder( + name = "NodeBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with") +)] +pub struct Node { + /// Display name of the node. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub name: Option, + /// Description of what this node does. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub description: Option, + /// Position in the visual editor. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub position: Option, + /// The node kind/type. + #[serde(flatten)] + pub kind: NodeKind, +} + +impl Node { + /// Creates a new node with the given kind. + pub fn new(kind: impl Into) -> Self { + Self { + name: None, + description: None, + position: None, + kind: kind.into(), + } + } + + /// Returns a builder for creating a node. + pub fn builder() -> NodeBuilder { + NodeBuilder::default() + } + + /// Returns whether this is an input node. + pub const fn is_input(&self) -> bool { + self.kind.is_input() + } + + /// Returns whether this is a transform node. + pub const fn is_transform(&self) -> bool { + self.kind.is_transform() + } + + /// Returns whether this is an output node. + pub const fn is_output(&self) -> bool { + self.kind.is_output() + } + + /// Returns whether this is a switch node. + pub const fn is_switch(&self) -> bool { + self.kind.is_switch() + } +} + +/// Node kind enum for workflow graphs. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, From)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum NodeKind { + /// Data input node, reads or produces data. + Input(Input), + /// Data transformer node, processes or transforms data. + Transform(Transformer), + /// Data output node, writes or consumes data. + Output(Output), + /// Conditional routing node. + Switch(SwitchDef), +} + +impl NodeKind { + /// Returns whether this is an input node. + pub const fn is_input(&self) -> bool { + matches!(self, NodeKind::Input(_)) + } + + /// Returns whether this is a transform node. + pub const fn is_transform(&self) -> bool { + matches!(self, NodeKind::Transform(_)) + } + + /// Returns whether this is an output node. + pub const fn is_output(&self) -> bool { + matches!(self, NodeKind::Output(_)) + } + + /// Returns whether this is a switch node. + pub const fn is_switch(&self) -> bool { + matches!(self, NodeKind::Switch(_)) + } +} diff --git a/crates/nvisy-runtime/src/definition/output.rs b/crates/nvisy-runtime/src/definition/output.rs new file mode 100644 index 0000000..0e8e346 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/output.rs @@ -0,0 +1,34 @@ +//! Output node definition types. + +use serde::{Deserialize, Serialize}; + +use super::route::CacheSlot; + +/// Output node definition - destination for workflow data. +/// +/// Storage provider outputs (S3, Qdrant, etc.) are handled externally via Python. +/// This enum only supports cache slots for internal workflow data flow. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "target", rename_all = "snake_case")] +pub enum Output { + /// Write to named cache slot (resolved at compile time). + Cache(CacheSlot), +} + +impl Output { + /// Creates a new output from a cache slot. + pub fn from_cache(slot: impl Into) -> Self { + Self::Cache(CacheSlot { + slot: slot.into(), + priority: None, + }) + } + + /// Creates a new output from a cache slot with priority. + pub fn from_cache_with_priority(slot: impl Into, priority: u32) -> Self { + Self::Cache(CacheSlot { + slot: slot.into(), + priority: Some(priority), + }) + } +} diff --git a/crates/nvisy-runtime/src/definition/route/cache.rs b/crates/nvisy-runtime/src/definition/route/cache.rs new file mode 100644 index 0000000..349127e --- /dev/null +++ b/crates/nvisy-runtime/src/definition/route/cache.rs @@ -0,0 +1,33 @@ +//! Cache slot types for in-memory data passing. + +use serde::{Deserialize, Serialize}; + +/// A cache slot reference for in-memory data passing. +/// +/// Cache slots act as named connection points that link different parts +/// of a workflow graph. During compilation, cache slots are resolved by +/// connecting incoming edges directly to outgoing edges with matching slot names. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CacheSlot { + /// Slot identifier (used as the key for matching inputs to outputs). + pub slot: String, + /// Priority for ordering when multiple slots are available. + #[serde(skip_serializing_if = "Option::is_none")] + pub priority: Option, +} + +impl CacheSlot { + /// Creates a new cache slot with the given slot name. + pub fn new(slot: impl Into) -> Self { + Self { + slot: slot.into(), + priority: None, + } + } + + /// Sets the priority. + pub fn with_priority(mut self, priority: u32) -> Self { + self.priority = Some(priority); + self + } +} diff --git a/crates/nvisy-runtime/src/definition/route/mod.rs b/crates/nvisy-runtime/src/definition/route/mod.rs new file mode 100644 index 0000000..109ba9a --- /dev/null +++ b/crates/nvisy-runtime/src/definition/route/mod.rs @@ -0,0 +1,13 @@ +//! Routing types for conditional data flow. +//! +//! This module provides types for controlling data flow in workflows: +//! - [`CacheSlot`]: Named connection point for linking workflow branches +//! - [`SwitchDef`]: Conditional routing based on data properties + +mod cache; +mod switch; + +pub use cache::CacheSlot; +pub use switch::{ + FileCategory, FileCategoryCondition, LanguageCondition, SwitchCondition, SwitchDef, +}; diff --git a/crates/nvisy-runtime/src/definition/route/switch.rs b/crates/nvisy-runtime/src/definition/route/switch.rs new file mode 100644 index 0000000..d88d898 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/route/switch.rs @@ -0,0 +1,103 @@ +//! Switch types for conditional data routing. + +use serde::{Deserialize, Serialize}; + +/// A switch node definition that routes data based on a condition. +/// +/// Switch nodes evaluate a condition against incoming data and route it +/// to either the `true` or `false` output branch. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SwitchDef { + /// The condition to evaluate. + pub condition: SwitchCondition, +} + +impl SwitchDef { + /// Creates a new switch definition. + pub fn new(condition: SwitchCondition) -> Self { + Self { condition } + } +} + +/// Switch condition enum - each variant is a distinct condition type. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum SwitchCondition { + /// Match by file category (based on extension). + FileCategory(FileCategoryCondition), + /// Match by detected content language. + Language(LanguageCondition), +} + +/// Condition that matches by file category based on extension. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FileCategoryCondition { + /// File category to match. + pub category: FileCategory, +} + +/// Condition that matches by detected content language. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct LanguageCondition { + /// Language codes to match (e.g., "en", "es", "fr"). + pub codes: Vec, + /// Minimum confidence threshold (0.0 to 1.0). + #[serde(default = "default_confidence")] + pub min_confidence: f32, +} + +fn default_confidence() -> f32 { + 0.8 +} + +/// File categories for routing based on extension. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FileCategory { + /// Text files (.txt, .md, etc.). + Text, + /// Image files (.jpg, .png, .gif, etc.). + Image, + /// Audio files (.mp3, .wav, .flac, etc.). + Audio, + /// Video files (.mp4, .webm, etc.). + Video, + /// Document files (.pdf, .docx, etc.). + Document, + /// Archive files (.zip, .tar, etc.). + Archive, + /// Spreadsheet files (.xlsx, .csv, etc.). + Spreadsheet, + /// Presentation files (.pptx, etc.). + Presentation, + /// Code/source files. + Code, + /// Other/unknown file type. + Other, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_switch_def_new() { + let switch = SwitchDef::new(SwitchCondition::FileCategory(FileCategoryCondition { + category: FileCategory::Image, + })); + + assert!(matches!(switch.condition, SwitchCondition::FileCategory(_))); + } + + #[test] + fn test_serialization() { + let switch = SwitchDef::new(SwitchCondition::Language(LanguageCondition { + codes: vec!["en".into(), "es".into()], + min_confidence: 0.9, + })); + + let json = serde_json::to_string_pretty(&switch).unwrap(); + let deserialized: SwitchDef = serde_json::from_str(&json).unwrap(); + assert_eq!(switch, deserialized); + } +} diff --git a/crates/nvisy-runtime/src/definition/transform/chunk.rs b/crates/nvisy-runtime/src/definition/transform/chunk.rs new file mode 100644 index 0000000..b139ec1 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/transform/chunk.rs @@ -0,0 +1,52 @@ +//! Chunk transform definition. + +use serde::{Deserialize, Serialize}; + +/// Chunking transform for splitting content into smaller pieces. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Chunk { + /// Chunking strategy. + #[serde(flatten)] + pub chunk_strategy: ChunkStrategy, + + /// Whether to use LLM-powered contextual chunking. + /// + /// When enabled, each chunk will include additional context + /// generated by an LLM to improve retrieval quality. + #[serde(default)] + pub contextual_chunking: bool, +} + +/// Chunking strategy. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "chunk_strategy", rename_all = "snake_case")] +pub enum ChunkStrategy { + /// Chunk by character count. + Character { + /// Maximum chunk size in characters. + max_characters: u32, + /// Overlap between chunks in characters. + overlap_characters: u32, + }, + /// Chunk by page boundaries. + Page { + /// Overlap between chunks in pages. + overlap_pages: u32, + }, + /// Chunk by document sections/headings. + Section { + /// Minimum chunk size in characters. + min_characters: u32, + /// Maximum chunk size in characters. + max_characters: u32, + /// Overlap between chunks in characters. + overlap_characters: u32, + }, + /// Chunk by semantic similarity. + Similarity { + /// Maximum chunk size in characters. + max_characters: u32, + /// Similarity score threshold (0.0 to 1.0). + score: f32, + }, +} diff --git a/crates/nvisy-runtime/src/definition/transform/derive.rs b/crates/nvisy-runtime/src/definition/transform/derive.rs new file mode 100644 index 0000000..6febbe1 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/transform/derive.rs @@ -0,0 +1,48 @@ +//! Derive transform definition. + +use nvisy_core::Provider; +use nvisy_rig::provider::{CompletionCredentials, CompletionModel, CompletionProvider}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +/// Derive transform for generating new content from input. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Derive { + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Completion model to use. + #[serde(flatten)] + pub model: CompletionModel, + + /// The derivation task to perform. + pub task: DeriveTask, + + /// Optional prompt override for the task. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub override_prompt: Option, +} + +impl Derive { + /// Creates a completion provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: CompletionCredentials, + ) -> Result { + CompletionProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} + +/// Tasks for generating new content from input. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DeriveTask { + /// Generate a condensed summary of the content. + Summarization, + /// Generate a title or heading for the content. + GenerateTitle, +} diff --git a/crates/nvisy-runtime/src/definition/transform/embedding.rs b/crates/nvisy-runtime/src/definition/transform/embedding.rs new file mode 100644 index 0000000..eff4acd --- /dev/null +++ b/crates/nvisy-runtime/src/definition/transform/embedding.rs @@ -0,0 +1,35 @@ +//! Embedding transform definition. + +use nvisy_core::Provider; +use nvisy_rig::provider::{EmbeddingCredentials, EmbeddingModel, EmbeddingProvider}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +/// Embedding transform for generating vector embeddings. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Embedding { + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Embedding model to use. + #[serde(flatten)] + pub model: EmbeddingModel, + + /// Whether to L2-normalize the output embeddings. + #[serde(default)] + pub normalize: bool, +} + +impl Embedding { + /// Creates an embedding provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: EmbeddingCredentials, + ) -> Result { + EmbeddingProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} diff --git a/crates/nvisy-runtime/src/definition/transform/enrich.rs b/crates/nvisy-runtime/src/definition/transform/enrich.rs new file mode 100644 index 0000000..b326d9f --- /dev/null +++ b/crates/nvisy-runtime/src/definition/transform/enrich.rs @@ -0,0 +1,73 @@ +//! Enrich transform definition. + +use nvisy_core::Provider; +use nvisy_rig::provider::{CompletionCredentials, CompletionModel, CompletionProvider}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +/// Enrich transform for adding metadata/descriptions to elements. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Enrich { + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Completion model to use. + #[serde(flatten)] + pub model: CompletionModel, + + /// The enrichment task to perform. + #[serde(flatten)] + pub task: EnrichTask, + + /// Optional prompt override for the task. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub override_prompt: Option, +} + +impl Enrich { + /// Creates a completion provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: CompletionCredentials, + ) -> Result { + CompletionProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} + +/// Tasks for adding metadata/descriptions to elements. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "input_type", content = "task", rename_all = "snake_case")] +pub enum EnrichTask { + /// Enrich table elements. + Table(TableEnrichTask), + /// Enrich image elements. + Image(ImageEnrichTask), +} + +/// Tasks for table enrichment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TableEnrichTask { + /// Generate a natural language description of the table. + Description, + /// Generate descriptions for each column. + ColumnDescriptions, +} + +/// Tasks for image enrichment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ImageEnrichTask { + /// Generate a natural language description of the image. + Description, + /// Generate a detailed description (people, objects, text, colors, layout). + DetailedDescription, + /// Extract text from image using generative OCR. + GenerativeOcr, + /// Detect and list objects/entities in the image. + ObjectDetection, +} diff --git a/crates/nvisy-runtime/src/definition/transform/extract.rs b/crates/nvisy-runtime/src/definition/transform/extract.rs new file mode 100644 index 0000000..fbda857 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/transform/extract.rs @@ -0,0 +1,109 @@ +//! Extract transform definition. + +use nvisy_core::Provider; +use nvisy_rig::provider::{CompletionCredentials, CompletionModel, CompletionProvider}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +/// Extract transform for extracting structured data or converting formats. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Extract { + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Completion model to use. + #[serde(flatten)] + pub model: CompletionModel, + + /// The extraction task to perform. + #[serde(flatten)] + pub task: ExtractTask, + + /// Optional prompt override for the task. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub override_prompt: Option, +} + +impl Extract { + /// Creates a completion provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: CompletionCredentials, + ) -> Result { + CompletionProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} + +/// Tasks for extracting structured data or converting formats. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "task_type", content = "task", rename_all = "snake_case")] +pub enum ExtractTask { + /// Convert elements to different formats. + Convert(ConvertTask), + /// Analyze text to extract structured information. + Analyze(AnalyzeTask), +} + +/// Tasks for format conversion. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde( + tag = "input_type", + content = "convert_task", + rename_all = "snake_case" +)] +pub enum ConvertTask { + /// Convert table elements. + Table(TableConvertTask), + /// Convert text elements. + Text(TextConvertTask), +} + +/// Tasks for table conversion. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TableConvertTask { + /// Convert table to HTML format. + ToHtml, + /// Convert table to Markdown format. + ToMarkdown, + /// Convert table to CSV format. + ToCsv, + /// Convert table to structured JSON. + ToJson, +} + +/// Tasks for text conversion. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TextConvertTask { + /// Convert text to JSON format. + ToJson, + /// Convert text to structured JSON based on a schema. + ToStructuredJson { + /// JSON schema for the output structure. + schema: String, + }, +} + +/// Tasks for analyzing text to extract structured information. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AnalyzeTask { + /// Extract named entities (people, places, organizations, dates, etc.). + NamedEntityRecognition, + /// Extract key terms and phrases. + KeywordExtraction, + /// Classify text into provided categories. + Classification { + /// Labels/categories for classification. + labels: Vec, + }, + /// Analyze sentiment (positive, negative, neutral). + SentimentAnalysis, + /// Extract relationships between entities. + RelationshipExtraction, +} diff --git a/crates/nvisy-runtime/src/definition/transform/mod.rs b/crates/nvisy-runtime/src/definition/transform/mod.rs new file mode 100644 index 0000000..68f8170 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/transform/mod.rs @@ -0,0 +1,43 @@ +//! Transform definition types. +//! +//! This module contains serializable definitions for transform nodes. +//! Each transform type defines the configuration needed to perform +//! a specific data transformation in a workflow. + +mod chunk; +mod derive; +mod embedding; +mod enrich; +mod extract; +mod partition; + +pub use chunk::{Chunk, ChunkStrategy}; +pub use derive::{Derive, DeriveTask}; +pub use embedding::Embedding; +pub use enrich::{Enrich, EnrichTask, ImageEnrichTask, TableEnrichTask}; +pub use extract::{ + AnalyzeTask, ConvertTask, Extract, ExtractTask, TableConvertTask, TextConvertTask, +}; +pub use partition::{Partition, PartitionStrategy}; +use serde::{Deserialize, Serialize}; + +/// Transformer node variant. +/// +/// Each variant represents a different type of data transformation +/// that can be performed in a workflow pipeline. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum Transformer { + /// Partition documents into elements. + Partition(Partition), + /// Chunk content into smaller pieces. + Chunk(Chunk), + /// Generate vector embeddings. + Embedding(Embedding), + /// Enrich elements with metadata/descriptions. + Enrich(Enrich), + /// Extract structured data or convert formats. + Extract(Extract), + /// Generate new content from input. + Derive(Derive), +} diff --git a/crates/nvisy-runtime/src/definition/transform/partition.rs b/crates/nvisy-runtime/src/definition/transform/partition.rs new file mode 100644 index 0000000..c4f0c2f --- /dev/null +++ b/crates/nvisy-runtime/src/definition/transform/partition.rs @@ -0,0 +1,33 @@ +//! Partition transform definition. + +use serde::{Deserialize, Serialize}; + +/// Partition transform for partitioning documents into elements. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Partition { + /// Partitioning strategy. + pub strategy: PartitionStrategy, + + /// Whether to include page break markers in output. + #[serde(default)] + pub include_page_breaks: bool, + + /// Whether to discard unsupported element types. + #[serde(default)] + pub discard_unsupported: bool, +} + +/// Partitioning strategy for document element extraction. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PartitionStrategy { + /// Automatically detect the best partitioning approach. + #[default] + Auto, + /// Fast rule-based partitioning without ML. + Fast, + /// Slower ML-based partitioning with layout detection. + Slow, + /// Vision-Language Model based partitioning. + Vlm, +} diff --git a/crates/nvisy-runtime/src/definition/util/mod.rs b/crates/nvisy-runtime/src/definition/util/mod.rs new file mode 100644 index 0000000..a1752a6 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/util/mod.rs @@ -0,0 +1,5 @@ +//! Utility types for workflow definitions. + +mod position; + +pub use position::Position; diff --git a/crates/nvisy-runtime/src/definition/util/position.rs b/crates/nvisy-runtime/src/definition/util/position.rs new file mode 100644 index 0000000..841b0d3 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/util/position.rs @@ -0,0 +1,19 @@ +//! Position type for visual editor node placement. + +use serde::{Deserialize, Serialize}; + +/// Position of a node in the visual editor. +#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)] +pub struct Position { + /// X coordinate. + pub x: f32, + /// Y coordinate. + pub y: f32, +} + +impl Position { + /// Creates a new position. + pub fn new(x: f32, y: f32) -> Self { + Self { x, y } + } +} diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs new file mode 100644 index 0000000..0cb4a4a --- /dev/null +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -0,0 +1,375 @@ +//! Workflow compiler for transforming definitions into executable graphs. +//! +//! The compiler takes workflow definitions and credentials registry +//! and produces compiled graphs that can be executed by the engine. +//! +//! # Compilation Process +//! +//! 1. **Validation**: Check that the definition is structurally valid +//! 2. **Cache Resolution**: Connect cache slot inputs to outputs +//! 3. **Node Compilation**: Create processors and streams for each node +//! 4. **Graph Building**: Build the petgraph structure with compiled nodes + +use std::collections::HashMap; + +use nvisy_rig::agent::{ + StructuredOutputAgent, TableAgent, TextAnalysisAgent, TextGenerationAgent, VisionAgent, +}; +use petgraph::graph::{DiGraph, NodeIndex}; + +use super::context::Context; +use super::credentials::CredentialsRegistry; +use crate::definition::{Input, NodeId, NodeKind, Output, Workflow}; +use crate::error::{Error, Result}; +use crate::graph::{ + ChunkProcessor, CompiledGraph, CompiledInput, CompiledNode, CompiledOutput, CompiledSwitch, + CompiledTransform, DeriveProcessor, EdgeData, EmbeddingProcessor, EnrichProcessor, + ExtractProcessor, InputStream, OutputStream, PartitionProcessor, +}; + +/// Workflow compiler that transforms definitions into executable graphs. +pub struct WorkflowCompiler<'a> { + /// Credentials registry for resolving provider credentials. + registry: &'a CredentialsRegistry, + /// Execution context for provider initialization. + #[allow(dead_code)] + ctx: Context, +} + +impl<'a> WorkflowCompiler<'a> { + /// Creates a new workflow compiler. + pub fn new(registry: &'a CredentialsRegistry, ctx: Context) -> Self { + Self { registry, ctx } + } + + /// Compiles a workflow definition into an executable graph. + pub async fn compile(&self, def: Workflow) -> Result { + // Phase 1: Validate definition structure + self.validate(&def)?; + + // Phase 2: Resolve cache slots + let resolved = self.resolve_cache_slots(&def)?; + + // Phase 3: Compile each node + let mut compiled_nodes = HashMap::new(); + for (id, node) in &def.nodes { + // Skip cache slot nodes - they're resolved during edge building + if self.is_cache_only_node(&node.kind) { + continue; + } + let compiled = self.compile_node(&node.kind).await?; + compiled_nodes.insert(*id, compiled); + } + + // Phase 4: Build petgraph + let (graph, node_indices) = self.build_graph(compiled_nodes, &resolved.edges)?; + + Ok(CompiledGraph::new(graph, node_indices, def.metadata)) + } + + /// Validates the workflow definition structure. + fn validate(&self, def: &Workflow) -> Result<()> { + // Check edge references + for edge in &def.edges { + if !def.nodes.contains_key(&edge.from) { + return Err(Error::InvalidDefinition(format!( + "edge references non-existent node: {}", + edge.from + ))); + } + if !def.nodes.contains_key(&edge.to) { + return Err(Error::InvalidDefinition(format!( + "edge references non-existent node: {}", + edge.to + ))); + } + } + + // Check for at least one input and output + let has_input = def.nodes.values().any(|n| n.is_input()); + let has_output = def.nodes.values().any(|n| n.is_output()); + + if !has_input { + return Err(Error::InvalidDefinition( + "workflow must have at least one input node".into(), + )); + } + if !has_output { + return Err(Error::InvalidDefinition( + "workflow must have at least one output node".into(), + )); + } + + Ok(()) + } + + /// Checks if a node is a cache-only node (input from cache or output to cache). + fn is_cache_only_node(&self, def: &NodeKind) -> bool { + match def { + NodeKind::Input(input) => matches!(input, Input::CacheSlot(_)), + NodeKind::Output(output) => matches!(output, Output::Cache(_)), + _ => false, + } + } + + /// Resolves cache slots by connecting cache inputs to cache outputs. + fn resolve_cache_slots(&self, def: &Workflow) -> Result { + // Collect cache slot outputs (nodes that write to cache slots) + let mut cache_outputs: HashMap> = HashMap::new(); + for (id, node) in &def.nodes { + if let NodeKind::Output(Output::Cache(slot)) = &node.kind { + cache_outputs + .entry(slot.slot.clone()) + .or_default() + .push(*id); + } + } + + // Collect cache slot inputs (nodes that read from cache slots) + let mut cache_inputs: HashMap> = HashMap::new(); + for (id, node) in &def.nodes { + if let NodeKind::Input(Input::CacheSlot(slot)) = &node.kind { + cache_inputs.entry(slot.slot.clone()).or_default().push(*id); + } + } + + // Build resolved edges + let mut resolved_edges = Vec::new(); + + // Add original edges (excluding edges to/from cache nodes) + for edge in &def.edges { + let from_node = def.nodes.get(&edge.from); + let to_node = def.nodes.get(&edge.to); + + let from_is_cache = from_node + .map(|n| self.is_cache_only_node(&n.kind)) + .unwrap_or(false); + let to_is_cache = to_node + .map(|n| self.is_cache_only_node(&n.kind)) + .unwrap_or(false); + + if !from_is_cache && !to_is_cache { + resolved_edges.push(ResolvedEdge { + from: edge.from, + to: edge.to, + data: EdgeData { + from_port: edge.from_port.clone(), + to_port: edge.to_port.clone(), + }, + }); + } + } + + // Connect nodes writing to cache slots with nodes reading from them + // by looking at incoming/outgoing edges + for (slot_name, output_ids) in &cache_outputs { + if let Some(input_ids) = cache_inputs.get(slot_name) { + // For each cache output node, find what writes to it + for output_id in output_ids { + let writers: Vec = def + .edges + .iter() + .filter(|e| e.to == *output_id) + .map(|e| e.from) + .collect(); + + // For each cache input node, find what reads from it + for input_id in input_ids { + let readers: Vec = def + .edges + .iter() + .filter(|e| e.from == *input_id) + .map(|e| e.to) + .collect(); + + // Connect writers directly to readers + for writer in &writers { + for reader in &readers { + resolved_edges.push(ResolvedEdge { + from: *writer, + to: *reader, + data: EdgeData { + from_port: None, + to_port: None, + }, + }); + } + } + } + } + } + } + + Ok(ResolvedDefinition { + edges: resolved_edges, + }) + } + + /// Compiles a single node definition into a compiled node. + async fn compile_node(&self, def: &NodeKind) -> Result { + match def { + NodeKind::Input(input) => { + let stream = self.create_input_stream(input)?; + Ok(CompiledNode::Input(CompiledInput::new(stream))) + } + NodeKind::Output(output) => { + let stream = self.create_output_stream(output)?; + Ok(CompiledNode::Output(CompiledOutput::new(stream))) + } + NodeKind::Transform(transformer) => { + let processor = self.create_processor(transformer).await?; + Ok(CompiledNode::Transform(Box::new(processor))) + } + NodeKind::Switch(switch) => { + Ok(CompiledNode::Switch(CompiledSwitch::from(switch.clone()))) + } + } + } + + /// Creates an input stream from an input definition. + fn create_input_stream(&self, input: &Input) -> Result { + match input { + Input::CacheSlot(_) => { + // Cache inputs are resolved during cache slot resolution + // This shouldn't be called for cache inputs + Err(Error::Internal( + "cache input nodes should be resolved before compilation".into(), + )) + } + } + } + + /// Creates an output stream from an output definition. + fn create_output_stream(&self, output: &Output) -> Result { + match output { + Output::Cache(_) => { + // Cache outputs are resolved during cache slot resolution + Err(Error::Internal( + "cache output nodes should be resolved before compilation".into(), + )) + } + } + } + + /// Creates a processor from a transformer definition. + async fn create_processor( + &self, + transformer: &crate::definition::Transformer, + ) -> Result { + use crate::definition::Transformer; + + match transformer { + Transformer::Partition(p) => Ok(CompiledTransform::Partition(PartitionProcessor::new( + p.strategy, + p.include_page_breaks, + p.discard_unsupported, + ))), + Transformer::Chunk(c) => Ok(CompiledTransform::Chunk(ChunkProcessor::new( + c.chunk_strategy.clone(), + ))), + Transformer::Embedding(e) => { + let creds = self.registry.get(e.credentials_id)?.clone(); + let provider = e + .clone() + .into_provider(creds.into_embedding_credentials()?) + .await?; + Ok(CompiledTransform::Embedding(EmbeddingProcessor::new( + provider, + e.normalize, + ))) + } + Transformer::Enrich(e) => { + let creds = self.registry.get(e.credentials_id)?.clone(); + let provider = e + .clone() + .into_provider(creds.into_completion_credentials()?) + .await?; + let vision_agent = VisionAgent::new(provider.clone(), false); + let table_agent = TableAgent::new(provider, false); + Ok(CompiledTransform::Enrich(Box::new(EnrichProcessor::new( + vision_agent, + table_agent, + e.task.clone(), + e.override_prompt.clone(), + )))) + } + Transformer::Extract(e) => { + let creds = self.registry.get(e.credentials_id)?.clone(); + let provider = e + .clone() + .into_provider(creds.into_completion_credentials()?) + .await?; + let text_analysis_agent = TextAnalysisAgent::new(provider.clone(), false); + let table_agent = TableAgent::new(provider.clone(), false); + let structured_output_agent = StructuredOutputAgent::new(provider, false); + Ok(CompiledTransform::Extract(Box::new(ExtractProcessor::new( + text_analysis_agent, + table_agent, + structured_output_agent, + e.task.clone(), + e.override_prompt.clone(), + )))) + } + Transformer::Derive(d) => { + let creds = self.registry.get(d.credentials_id)?.clone(); + let provider = d + .clone() + .into_provider(creds.into_completion_credentials()?) + .await?; + let agent = TextGenerationAgent::new(provider, false); + Ok(CompiledTransform::Derive(DeriveProcessor::new( + agent, + d.task, + d.override_prompt.clone(), + ))) + } + } + } + + /// Builds the petgraph from compiled nodes and resolved edges. + fn build_graph( + &self, + nodes: HashMap, + edges: &[ResolvedEdge], + ) -> Result<(DiGraph, HashMap)> { + let mut graph = DiGraph::new(); + let mut node_indices = HashMap::new(); + + // Add nodes + for (id, node) in nodes { + let idx = graph.add_node(node); + node_indices.insert(id, idx); + } + + // Add edges + for edge in edges { + let from_idx = node_indices.get(&edge.from).ok_or_else(|| { + Error::InvalidDefinition(format!("edge references unknown node: {}", edge.from)) + })?; + let to_idx = node_indices.get(&edge.to).ok_or_else(|| { + Error::InvalidDefinition(format!("edge references unknown node: {}", edge.to)) + })?; + + graph.add_edge(*from_idx, *to_idx, edge.data.clone()); + } + + // Verify acyclic + if petgraph::algo::is_cyclic_directed(&graph) { + return Err(Error::InvalidDefinition("workflow contains a cycle".into())); + } + + Ok((graph, node_indices)) + } +} + +/// Resolved edge after cache slot resolution. +struct ResolvedEdge { + from: NodeId, + to: NodeId, + data: EdgeData, +} + +/// Resolved workflow definition after cache slot resolution. +struct ResolvedDefinition { + edges: Vec, +} diff --git a/crates/nvisy-runtime/src/engine/config.rs b/crates/nvisy-runtime/src/engine/config.rs new file mode 100644 index 0000000..03c2bbc --- /dev/null +++ b/crates/nvisy-runtime/src/engine/config.rs @@ -0,0 +1,48 @@ +//! Engine configuration. + +use std::time::Duration; + +use derive_builder::Builder; + +/// Configuration for the workflow execution engine. +#[derive(Debug, Clone, Builder)] +#[builder(setter(into), build_fn(validate = "Self::validate"))] +pub struct EngineConfig { + /// Maximum number of concurrent workflow executions. + #[builder(default = "10")] + pub max_concurrent_runs: usize, + + /// Default timeout for workflow execution. + #[builder(default = "Duration::from_secs(3600)")] + pub default_timeout: Duration, + + /// Maximum number of retries for failed nodes. + #[builder(default = "3")] + pub max_retries: u32, + + /// Delay between retries. + #[builder(default = "Duration::from_secs(1)")] + pub retry_delay: Duration, +} + +impl EngineConfigBuilder { + fn validate(&self) -> Result<(), String> { + if let Some(max) = self.max_concurrent_runs + && max == 0 + { + return Err("max_concurrent_runs must be at least 1".into()); + } + Ok(()) + } +} + +impl Default for EngineConfig { + fn default() -> Self { + Self { + max_concurrent_runs: 10, + default_timeout: Duration::from_secs(3600), + max_retries: 3, + retry_delay: Duration::from_secs(1), + } + } +} diff --git a/crates/nvisy-runtime/src/engine/context.rs b/crates/nvisy-runtime/src/engine/context.rs new file mode 100644 index 0000000..39510ce --- /dev/null +++ b/crates/nvisy-runtime/src/engine/context.rs @@ -0,0 +1,185 @@ +//! Context types for workflow execution. + +use derive_builder::Builder; +use nvisy_dal::AnyDataValue; + +use super::CredentialsRegistry; + +/// Context for provider operations during compilation and execution. +/// +/// Provides configuration for read/write operations including target, +/// pagination cursor, and limits. +#[derive(Debug, Clone, Default)] +pub struct Context { + /// Target collection, table, bucket, topic, etc. + pub target: Option, + /// Cursor for pagination (provider-specific format). + pub cursor: Option, + /// Tiebreaker for pagination conflicts. + pub tiebreaker: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +impl Context { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the target. + pub fn with_target(mut self, target: impl Into) -> Self { + self.target = Some(target.into()); + self + } + + /// Sets the cursor for pagination. + pub fn with_cursor(mut self, cursor: impl Into) -> Self { + self.cursor = Some(cursor.into()); + self + } + + /// Sets the tiebreaker for pagination. + pub fn with_tiebreaker(mut self, tiebreaker: impl Into) -> Self { + self.tiebreaker = Some(tiebreaker.into()); + self + } + + /// Sets the limit. + pub fn with_limit(mut self, limit: usize) -> Self { + self.limit = Some(limit); + self + } + + /// Returns the target, if set. + pub fn target(&self) -> Option<&str> { + self.target.as_deref() + } + + /// Returns the cursor, if set. + pub fn cursor(&self) -> Option<&str> { + self.cursor.as_deref() + } + + /// Returns the limit, if set. + pub fn limit(&self) -> Option { + self.limit + } + + /// Converts to an ObjectContext for object storage providers. + pub fn to_object_context(&self) -> nvisy_dal::ObjectContext { + nvisy_dal::ObjectContext { + prefix: self.target.clone(), + token: self.cursor.clone(), + limit: self.limit, + } + } + + /// Converts to a RelationalContext for relational database providers. + pub fn to_relational_context(&self) -> nvisy_dal::RelationalContext { + nvisy_dal::RelationalContext { + cursor: self.cursor.clone(), + tiebreaker: self.tiebreaker.clone(), + limit: self.limit, + } + } +} + +/// Execution context for a workflow run. +/// +/// Manages the current data items flowing through the pipeline and holds +/// credentials for provider access. +/// +/// A single input can produce multiple outputs (e.g., 1 document → 1000 embeddings), +/// so the context holds a `Vec` of values at each stage. +#[derive(Debug, Builder)] +#[builder( + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] +pub struct ExecutionContext { + /// Credentials registry for provider authentication. + credentials: CredentialsRegistry, + /// Current data items being processed (can expand: 1 input → N outputs). + #[builder(default)] + current: Vec, + /// Total input items processed in this execution. + #[builder(default)] + items_processed: usize, +} + +impl ExecutionContextBuilder { + fn validate(&self) -> Result<(), String> { + if self.credentials.is_none() { + return Err("credentials is required".into()); + } + Ok(()) + } +} + +impl ExecutionContext { + /// Creates a new execution context with the given credentials. + pub fn new(credentials: CredentialsRegistry) -> Self { + Self { + credentials, + current: Vec::new(), + items_processed: 0, + } + } + + /// Returns a builder for creating an execution context. + pub fn builder() -> ExecutionContextBuilder { + ExecutionContextBuilder::default() + } + + /// Returns a reference to the credentials registry. + pub fn credentials(&self) -> &CredentialsRegistry { + &self.credentials + } + + /// Sets the current data items being processed. + pub fn set_current(&mut self, data: Vec) { + self.current = data; + } + + /// Sets a single item as current (convenience for input stage). + pub fn set_current_single(&mut self, data: AnyDataValue) { + self.current = vec![data]; + } + + /// Takes the current data items, leaving an empty vec in its place. + pub fn take_current(&mut self) -> Vec { + std::mem::take(&mut self.current) + } + + /// Returns a reference to the current data items. + pub fn current(&self) -> &[AnyDataValue] { + &self.current + } + + /// Returns whether there are any current data items. + pub fn has_current(&self) -> bool { + !self.current.is_empty() + } + + /// Returns the number of current data items. + pub fn current_len(&self) -> usize { + self.current.len() + } + + /// Increments the processed items counter. + pub fn mark_processed(&mut self) { + self.items_processed += 1; + } + + /// Returns the number of input items processed. + pub fn items_processed(&self) -> usize { + self.items_processed + } + + /// Clears the current data items. + pub fn clear(&mut self) { + self.current.clear(); + } +} diff --git a/crates/nvisy-runtime/src/engine/credentials.rs b/crates/nvisy-runtime/src/engine/credentials.rs new file mode 100644 index 0000000..e10fafb --- /dev/null +++ b/crates/nvisy-runtime/src/engine/credentials.rs @@ -0,0 +1,101 @@ +//! Credentials management for AI providers. +//! +//! This module provides a registry for storing and retrieving credentials +//! used by AI providers (completion, embedding) during workflow execution. + +use std::collections::HashMap; + +use derive_more::From; +use nvisy_rig::provider::{CompletionCredentials, EmbeddingCredentials}; +use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +/// AI provider credentials. +#[derive(Debug, Clone, From, Serialize, Deserialize, IntoStaticStr)] +#[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] +pub enum ProviderCredentials { + /// Completion provider credentials. + Completion(CompletionCredentials), + /// Embedding provider credentials. + Embedding(EmbeddingCredentials), +} + +impl ProviderCredentials { + /// Returns the provider kind as a string. + pub fn kind(&self) -> &'static str { + self.into() + } + + /// Converts to completion credentials if applicable. + pub fn into_completion_credentials(self) -> Result { + match self { + Self::Completion(c) => Ok(c), + other => Err(Error::Internal(format!( + "expected completion credentials, got '{}'", + other.kind() + ))), + } + } + + /// Converts to embedding credentials if applicable. + pub fn into_embedding_credentials(self) -> Result { + match self { + Self::Embedding(c) => Ok(c), + other => Err(Error::Internal(format!( + "expected embedding credentials, got '{}'", + other.kind() + ))), + } + } +} + +/// In-memory registry for AI provider credentials. +/// +/// Credentials are stored by UUID and can be retrieved during workflow compilation. +#[derive(Debug, Clone, Default)] +pub struct CredentialsRegistry { + credentials: HashMap, +} + +impl CredentialsRegistry { + /// Creates a new empty registry. + pub fn new() -> Self { + Self::default() + } + + /// Registers credentials with a UUID. + pub fn register(&mut self, id: Uuid, creds: ProviderCredentials) { + self.credentials.insert(id, creds); + } + + /// Retrieves credentials by UUID. + pub fn get(&self, id: Uuid) -> Result<&ProviderCredentials> { + self.credentials + .get(&id) + .ok_or_else(|| Error::CredentialsNotFound(id)) + } + + /// Removes credentials by UUID. + pub fn remove(&mut self, id: Uuid) -> Option { + self.credentials.remove(&id) + } + + /// Returns the number of registered credentials. + pub fn len(&self) -> usize { + self.credentials.len() + } + + /// Returns true if no credentials are registered. + pub fn is_empty(&self) -> bool { + self.credentials.is_empty() + } + + /// Clears all credentials. + pub fn clear(&mut self) { + self.credentials.clear(); + } +} diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs new file mode 100644 index 0000000..f192649 --- /dev/null +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -0,0 +1,232 @@ +//! Workflow execution engine. + +use std::sync::Arc; + +use futures::{SinkExt, StreamExt}; +use tokio::sync::Semaphore; + +use super::EngineConfig; +use super::compiler::WorkflowCompiler; +use super::context::{Context, ExecutionContext}; +use super::credentials::CredentialsRegistry; +use crate::definition::{NodeId, Workflow}; +use crate::error::{Error, Result}; +use crate::graph::{CompiledGraph, CompiledNode, InputStream, OutputStream, Process}; + +/// Tracing target for engine operations. +const TRACING_TARGET: &str = "nvisy_workflow::engine"; + +/// The workflow execution engine. +/// +/// Manages workflow execution, concurrency, and resource allocation. +/// Executes workflows in a pipe-based streaming manner: each data item +/// flows through the entire pipeline before the next item is processed. +pub struct Engine { + config: EngineConfig, + semaphore: Arc, +} + +impl Engine { + /// Creates a new engine with the given configuration. + pub fn new(config: EngineConfig) -> Self { + let semaphore = Arc::new(Semaphore::new(config.max_concurrent_runs)); + + tracing::info!( + target: TRACING_TARGET, + max_concurrent_runs = config.max_concurrent_runs, + default_timeout_secs = config.default_timeout.as_secs(), + "Workflow engine initialized" + ); + + Self { config, semaphore } + } + + /// Creates a new engine with default configuration. + pub fn with_defaults() -> Self { + Self::new(EngineConfig::default()) + } + + /// Returns the engine configuration. + pub fn config(&self) -> &EngineConfig { + &self.config + } + + /// Executes a workflow definition. + /// + /// The definition is compiled into an executable graph and then executed. + /// Execution is pipe-based: items are read from inputs one at a time, + /// flow through all transformers, and are written to outputs before + /// the next item is processed. + pub async fn execute( + &self, + definition: Workflow, + credentials: CredentialsRegistry, + ctx: Context, + ) -> Result { + // Compile the definition into an executable graph + let compiler = WorkflowCompiler::new(&credentials, ctx); + let graph = compiler.compile(definition).await?; + + self.execute_graph(graph, credentials).await + } + + /// Executes a pre-compiled workflow graph. + /// + /// Use [`Self::execute`] to compile and execute a workflow definition in one step. + /// This method is useful when you want to reuse a compiled graph multiple times. + pub async fn execute_graph( + &self, + mut graph: CompiledGraph, + credentials: CredentialsRegistry, + ) -> Result { + let _permit = self + .semaphore + .acquire() + .await + .map_err(|e| Error::Internal(format!("semaphore closed: {}", e)))?; + + let order = graph + .topological_order() + .ok_or_else(|| Error::InvalidDefinition("compiled graph contains a cycle".into()))?; + + tracing::debug!( + target: TRACING_TARGET, + node_count = order.len(), + "Starting workflow execution" + ); + + let mut ctx = ExecutionContext::new(credentials); + + // Execute the compiled pipeline + self.execute_pipeline(&mut graph, &order, &mut ctx).await?; + + tracing::debug!( + target: TRACING_TARGET, + items_processed = ctx.items_processed(), + "Workflow execution completed" + ); + + Ok(ctx) + } + + /// Executes a compiled pipeline by streaming items through. + async fn execute_pipeline( + &self, + graph: &mut CompiledGraph, + order: &[NodeId], + ctx: &mut ExecutionContext, + ) -> Result<()> { + // Collect input and output node IDs + let input_ids: Vec = order + .iter() + .filter(|id| graph.node(id).map(|n| n.is_input()).unwrap_or(false)) + .copied() + .collect(); + + let output_ids: Vec = order + .iter() + .filter(|id| graph.node(id).map(|n| n.is_output()).unwrap_or(false)) + .copied() + .collect(); + + let transform_ids: Vec = order + .iter() + .filter(|id| graph.node(id).map(|n| n.is_transform()).unwrap_or(false)) + .copied() + .collect(); + + // Take ownership of input streams + let mut input_streams: Vec<(NodeId, InputStream)> = Vec::new(); + for id in &input_ids { + if let Some(node) = graph.node_mut(id) + && let CompiledNode::Input(compiled_input) = node + { + // Create a placeholder stream and swap with the real one + let placeholder = InputStream::new(Box::pin(futures::stream::empty())); + let stream = std::mem::replace(compiled_input.stream_mut(), placeholder); + input_streams.push((*id, stream)); + } + } + + // Take ownership of output streams + let mut output_streams: Vec<(NodeId, OutputStream)> = Vec::new(); + for id in &output_ids { + if let Some(CompiledNode::Output(compiled_output)) = graph.node_mut(id) { + // Create a placeholder sink + let placeholder = OutputStream::new(Box::pin(futures::sink::drain().sink_map_err( + |_: std::convert::Infallible| Error::Internal("drain sink error".into()), + ))); + let stream = std::mem::replace(compiled_output.stream_mut(), placeholder); + output_streams.push((*id, stream)); + } + } + + // Process each input stream + for (input_node_id, mut input_stream) in input_streams { + tracing::debug!( + target: TRACING_TARGET, + node_id = %input_node_id, + "Reading from input stream" + ); + + while let Some(result) = input_stream.next().await { + let item = result?; + + // Start with single input item + ctx.set_current_single(item); + + // Execute transforms in order + for transform_id in &transform_ids { + if let Some(node) = graph.node(transform_id) + && let Some(transform) = node.as_transform() + { + let input_data = ctx.take_current(); + let output_data = transform.process(input_data).await?; + ctx.set_current(output_data); + } + } + + // Write to outputs + let output_data = ctx.take_current(); + if !output_data.is_empty() { + for (output_node_id, output_stream) in &mut output_streams { + tracing::trace!( + target: TRACING_TARGET, + node_id = %output_node_id, + item_count = output_data.len(), + "Writing to output stream" + ); + + for item in output_data.clone() { + output_stream.send(item).await?; + } + } + } + + ctx.mark_processed(); + ctx.clear(); + } + } + + // Close all output streams + for (_, mut output_stream) in output_streams { + output_stream.close().await?; + } + + Ok(()) + } + + /// Returns the number of available execution slots. + pub fn available_slots(&self) -> usize { + self.semaphore.available_permits() + } +} + +impl std::fmt::Debug for Engine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Engine") + .field("config", &self.config) + .field("available_slots", &self.available_slots()) + .finish() + } +} diff --git a/crates/nvisy-runtime/src/engine/mod.rs b/crates/nvisy-runtime/src/engine/mod.rs new file mode 100644 index 0000000..031ef29 --- /dev/null +++ b/crates/nvisy-runtime/src/engine/mod.rs @@ -0,0 +1,18 @@ +//! Workflow execution engine. +//! +//! This module provides the runtime for executing workflows: +//! - [`Engine`]: The main execution engine +//! - [`EngineConfig`]: Configuration options +//! - [`ExecutionContext`]: Runtime context for workflow execution +//! - [`CredentialsRegistry`]: Registry for AI provider credentials + +mod compiler; +mod config; +mod context; +mod credentials; +mod executor; + +pub use config::EngineConfig; +pub use context::{Context, ExecutionContext}; +pub use credentials::{CredentialsRegistry, ProviderCredentials}; +pub use executor::Engine; diff --git a/crates/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs new file mode 100644 index 0000000..190e9ce --- /dev/null +++ b/crates/nvisy-runtime/src/error.rs @@ -0,0 +1,63 @@ +//! Workflow error types. + +use thiserror::Error; +use uuid::Uuid; + +use crate::definition::NodeId; + +/// Result type for workflow operations. +pub type Result = std::result::Result; + +/// Errors that can occur during workflow operations. +#[derive(Debug, Error)] +pub enum Error { + /// Workflow definition is invalid. + #[error("invalid workflow definition: {0}")] + InvalidDefinition(String), + + /// Node configuration is invalid. + #[error("invalid config for node {node_id}: {message}")] + InvalidNodeConfig { + /// ID of the node with invalid config. + node_id: NodeId, + /// Error message. + message: String, + }, + + /// Node execution failed. + #[error("node {node_id} failed: {message}")] + NodeFailed { + /// ID of the failed node. + node_id: NodeId, + /// Error message. + message: String, + }, + + /// Workflow execution was cancelled. + #[error("workflow execution cancelled")] + Cancelled, + + /// Workflow execution timed out. + #[error("workflow execution timed out")] + Timeout, + + /// Failed to construct credentials registry. + #[error("failed to construct credentials registry: {0}")] + CredentialsRegistry(#[source] serde_json::Error), + + /// Credentials not found. + #[error("credentials not found: {0}")] + CredentialsNotFound(Uuid), + + /// Storage operation failed. + #[error("storage error: {0}")] + Storage(#[from] nvisy_dal::Error), + + /// Serialization/deserialization error. + #[error("serialization error: {0}")] + Serialization(#[from] serde_json::Error), + + /// Internal error. + #[error("internal error: {0}")] + Internal(String), +} diff --git a/crates/nvisy-runtime/src/graph/edge.rs b/crates/nvisy-runtime/src/graph/edge.rs new file mode 100644 index 0000000..c2f82ba --- /dev/null +++ b/crates/nvisy-runtime/src/graph/edge.rs @@ -0,0 +1,30 @@ +//! Edge data for compiled graphs. + +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +/// Edge data stored in the compiled graph. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +#[derive(Serialize, Deserialize, Builder)] +#[builder( + name = "EdgeDataBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with") +)] +pub struct EdgeData { + /// Optional port/slot name on the source node. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub from_port: Option, + /// Optional port/slot name on the target node. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub to_port: Option, +} + +impl EdgeData { + /// Returns a builder for creating edge data. + pub fn builder() -> EdgeDataBuilder { + EdgeDataBuilder::default() + } +} diff --git a/crates/nvisy-runtime/src/graph/input/mod.rs b/crates/nvisy-runtime/src/graph/input/mod.rs new file mode 100644 index 0000000..dba1e79 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/input/mod.rs @@ -0,0 +1,38 @@ +//! Compiled input node types. + +mod stream; + +pub use stream::{DataStream, InputStream}; + +/// Compiled input node - ready to stream data. +/// +/// This is the runtime representation of an input node after compilation. +/// Cache slots are resolved during compilation, so compiled inputs always +/// wrap concrete input streams. +#[derive(Debug)] +pub struct CompiledInput { + /// The input stream for reading data. + stream: InputStream, +} + +impl CompiledInput { + /// Creates a new compiled input from an input stream. + pub fn new(stream: InputStream) -> Self { + Self { stream } + } + + /// Returns a reference to the input stream. + pub fn stream(&self) -> &InputStream { + &self.stream + } + + /// Returns a mutable reference to the input stream. + pub fn stream_mut(&mut self) -> &mut InputStream { + &mut self.stream + } + + /// Consumes this compiled input and returns the underlying stream. + pub fn into_stream(self) -> InputStream { + self.stream + } +} diff --git a/crates/nvisy-runtime/src/graph/input/stream.rs b/crates/nvisy-runtime/src/graph/input/stream.rs new file mode 100644 index 0000000..5a0d667 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/input/stream.rs @@ -0,0 +1,124 @@ +//! Input stream types for compiled workflow data flow. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::stream::BoxStream; +use futures::{Stream, StreamExt}; +use nvisy_dal::AnyDataValue; + +use crate::error::Result; + +/// A boxed stream of workflow data values. +pub type DataStream = BoxStream<'static, Result>; + +/// Input stream for reading data in a workflow. +/// +/// Wraps a boxed stream and provides metadata about the source. +pub struct InputStream { + /// The underlying data stream. + stream: DataStream, + /// Optional cursor for pagination. + cursor: Option, + /// Optional limit on items to read. + limit: Option, + /// Number of items read so far. + items_read: usize, +} + +impl InputStream { + /// Creates a new input stream. + pub fn new(stream: DataStream) -> Self { + Self { + stream, + cursor: None, + limit: None, + items_read: 0, + } + } + + /// Creates an input stream with a cursor for pagination. + pub fn with_cursor(stream: DataStream, cursor: impl Into) -> Self { + Self { + stream, + cursor: Some(cursor.into()), + limit: None, + items_read: 0, + } + } + + /// Creates an input stream with a limit on items to read. + pub fn with_limit(stream: DataStream, limit: usize) -> Self { + Self { + stream: Box::pin(stream.take(limit)), + cursor: None, + limit: Some(limit), + items_read: 0, + } + } + + /// Creates an input stream with both cursor and limit. + pub fn with_cursor_and_limit( + stream: DataStream, + cursor: impl Into, + limit: usize, + ) -> Self { + Self { + stream: Box::pin(stream.take(limit)), + cursor: Some(cursor.into()), + limit: Some(limit), + items_read: 0, + } + } + + /// Returns the cursor for the next page, if any. + pub fn cursor(&self) -> Option<&str> { + self.cursor.as_deref() + } + + /// Returns the limit on items to read, if set. + pub fn limit(&self) -> Option { + self.limit + } + + /// Returns the number of items read so far. + pub fn items_read(&self) -> usize { + self.items_read + } + + /// Consumes the stream and returns the inner boxed stream. + pub fn into_inner(self) -> DataStream { + self.stream + } + + /// Consumes the stream and returns all parts. + pub fn into_parts(self) -> (DataStream, Option, Option) { + (self.stream, self.cursor, self.limit) + } +} + +impl Stream for InputStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let result = Pin::new(&mut self.stream).poll_next(cx); + if let Poll::Ready(Some(Ok(_))) = &result { + self.items_read += 1; + } + result + } + + fn size_hint(&self) -> (usize, Option) { + self.stream.size_hint() + } +} + +impl std::fmt::Debug for InputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InputStream") + .field("cursor", &self.cursor) + .field("limit", &self.limit) + .field("items_read", &self.items_read) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs new file mode 100644 index 0000000..81cc5eb --- /dev/null +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -0,0 +1,211 @@ +//! Compiled workflow types for execution. +//! +//! This module contains runtime-optimized types for executing workflows. +//! These types are created by compiling workflow definitions and are +//! optimized for: +//! - Fast execution without lookups +//! - Pre-resolved cache slots +//! - Pre-instantiated providers and agents +//! +//! To create compiled types, use the [`crate::engine::Engine`]. + +use std::collections::HashMap; + +use petgraph::Direction; +use petgraph::graph::{DiGraph, NodeIndex}; + +mod edge; +mod input; +mod node; +mod output; +mod route; +mod transform; + +pub use edge::EdgeData; +pub use input::{CompiledInput, DataStream, InputStream}; +pub use node::CompiledNode; +pub use output::{CompiledOutput, DataSink, OutputStream}; +pub use route::{CompiledSwitch, FileCategoryEvaluator, LanguageEvaluator, SwitchEvaluator}; +pub use transform::{ + ChunkProcessor, CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, + ExtractProcessor, PartitionProcessor, Process, +}; + +use crate::definition::{FileCategory, NodeId, WorkflowMetadata}; + +/// A compiled workflow graph ready for execution. +/// +/// This is the runtime representation of a workflow after compilation. +/// All cache slots are resolved into direct edges, and all node definitions +/// are compiled into their executable forms. +pub struct CompiledGraph { + /// The underlying directed graph. + graph: DiGraph, + /// Map from node IDs to graph indices. + node_indices: HashMap, + /// Map from graph indices to node IDs. + index_to_id: HashMap, + /// Workflow metadata. + metadata: WorkflowMetadata, +} + +impl CompiledGraph { + /// Creates a new compiled graph. + pub fn new( + graph: DiGraph, + node_indices: HashMap, + metadata: WorkflowMetadata, + ) -> Self { + let index_to_id = node_indices.iter().map(|(k, v)| (*v, *k)).collect(); + Self { + graph, + node_indices, + index_to_id, + metadata, + } + } + + /// Returns the workflow metadata. + pub fn metadata(&self) -> &WorkflowMetadata { + &self.metadata + } + + /// Returns the number of nodes in the graph. + pub fn node_count(&self) -> usize { + self.graph.node_count() + } + + /// Returns the number of edges in the graph. + pub fn edge_count(&self) -> usize { + self.graph.edge_count() + } + + /// Returns a reference to a node by ID. + pub fn node(&self, id: &NodeId) -> Option<&CompiledNode> { + self.node_indices + .get(id) + .and_then(|&idx| self.graph.node_weight(idx)) + } + + /// Returns a mutable reference to a node by ID. + pub fn node_mut(&mut self, id: &NodeId) -> Option<&mut CompiledNode> { + self.node_indices + .get(id) + .copied() + .and_then(|idx| self.graph.node_weight_mut(idx)) + } + + /// Returns the node ID for a graph index. + pub fn node_id(&self, index: NodeIndex) -> Option { + self.index_to_id.get(&index).copied() + } + + /// Returns the graph index for a node ID. + pub fn node_index(&self, id: &NodeId) -> Option { + self.node_indices.get(id).copied() + } + + /// Returns an iterator over all node IDs. + pub fn node_ids(&self) -> impl Iterator { + self.node_indices.keys() + } + + /// Returns an iterator over all nodes with their IDs. + pub fn nodes(&self) -> impl Iterator { + self.node_indices + .iter() + .filter_map(|(id, &idx)| self.graph.node_weight(idx).map(|node| (id, node))) + } + + /// Returns an iterator over input nodes. + pub fn input_nodes(&self) -> impl Iterator { + self.nodes() + .filter_map(|(id, node)| node.as_input().map(|input| (id, input))) + } + + /// Returns an iterator over output nodes. + pub fn output_nodes(&self) -> impl Iterator { + self.nodes() + .filter_map(|(id, node)| node.as_output().map(|output| (id, output))) + } + + /// Returns the predecessors (incoming nodes) of a node. + pub fn predecessors(&self, id: &NodeId) -> impl Iterator { + self.node_indices.get(id).into_iter().flat_map(|&idx| { + self.graph + .neighbors_directed(idx, Direction::Incoming) + .filter_map(|pred_idx| self.index_to_id.get(&pred_idx)) + }) + } + + /// Returns the successors (outgoing nodes) of a node. + pub fn successors(&self, id: &NodeId) -> impl Iterator { + self.node_indices.get(id).into_iter().flat_map(|&idx| { + self.graph + .neighbors_directed(idx, Direction::Outgoing) + .filter_map(|succ_idx| self.index_to_id.get(&succ_idx)) + }) + } + + /// Returns the edge data between two nodes, if an edge exists. + pub fn edge(&self, from: &NodeId, to: &NodeId) -> Option<&EdgeData> { + let from_idx = self.node_indices.get(from)?; + let to_idx = self.node_indices.get(to)?; + self.graph + .find_edge(*from_idx, *to_idx) + .and_then(|e| self.graph.edge_weight(e)) + } + + /// Returns topologically sorted node IDs (sources first). + /// + /// Returns `None` if the graph contains a cycle. + pub fn topological_order(&self) -> Option> { + petgraph::algo::toposort(&self.graph, None) + .ok() + .map(|indices| { + indices + .into_iter() + .filter_map(|idx| self.index_to_id.get(&idx).copied()) + .collect() + }) + } + + /// Consumes the graph and returns ownership of all nodes. + /// + /// Returns a map from node IDs to compiled nodes. + pub fn into_nodes(mut self) -> HashMap { + let mut nodes = HashMap::with_capacity(self.node_indices.len()); + for (id, &idx) in &self.node_indices { + if let Some(node) = self.graph.node_weight_mut(idx) { + // Use mem::replace with a placeholder to take ownership + // This is safe because we won't access the graph again + let placeholder = CompiledNode::Switch(CompiledSwitch::new( + SwitchEvaluator::FileCategory(FileCategoryEvaluator::new(FileCategory::Other)), + )); + let owned = std::mem::replace(node, placeholder); + nodes.insert(*id, owned); + } + } + nodes + } + + /// Returns a reference to the underlying petgraph. + pub fn inner(&self) -> &DiGraph { + &self.graph + } + + /// Returns a mutable reference to the underlying petgraph. + pub fn inner_mut(&mut self) -> &mut DiGraph { + &mut self.graph + } +} + +impl std::fmt::Debug for CompiledGraph { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompiledGraph") + .field("node_count", &self.graph.node_count()) + .field("edge_count", &self.graph.edge_count()) + .field("metadata", &self.metadata) + .finish() + } +} diff --git a/crates/nvisy-runtime/src/graph/node.rs b/crates/nvisy-runtime/src/graph/node.rs new file mode 100644 index 0000000..d848613 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/node.rs @@ -0,0 +1,140 @@ +//! Compiled node types. + +use super::input::CompiledInput; +use super::output::CompiledOutput; +use super::route::CompiledSwitch; +use super::transform::CompiledTransform; + +/// Compiled node enum for workflow execution. +/// +/// This is the runtime representation of a node after compilation. +/// Cache slots are resolved during compilation, so compiled nodes +/// only contain concrete processing types. +#[derive(Debug)] +pub enum CompiledNode { + /// Data input node - ready to stream data. + Input(CompiledInput), + /// Data output node - ready to receive data. + Output(CompiledOutput), + /// Data transform node - ready to process data. + /// Boxed to reduce enum size variance (transform processors are large). + Transform(Box), + /// Conditional routing node - evaluates conditions. + Switch(CompiledSwitch), +} + +impl CompiledNode { + /// Returns whether this is an input node. + pub const fn is_input(&self) -> bool { + matches!(self, CompiledNode::Input(_)) + } + + /// Returns whether this is an output node. + pub const fn is_output(&self) -> bool { + matches!(self, CompiledNode::Output(_)) + } + + /// Returns whether this is a transform node. + pub const fn is_transform(&self) -> bool { + matches!(self, CompiledNode::Transform(_)) + } + + /// Returns whether this is a switch node. + pub const fn is_switch(&self) -> bool { + matches!(self, CompiledNode::Switch(_)) + } + + /// Returns this node as an input, if it is one. + pub fn as_input(&self) -> Option<&CompiledInput> { + match self { + CompiledNode::Input(input) => Some(input), + _ => None, + } + } + + /// Returns this node as an output, if it is one. + pub fn as_output(&self) -> Option<&CompiledOutput> { + match self { + CompiledNode::Output(output) => Some(output), + _ => None, + } + } + + /// Returns this node as a transform, if it is one. + pub fn as_transform(&self) -> Option<&CompiledTransform> { + match self { + CompiledNode::Transform(transform) => Some(transform.as_ref()), + _ => None, + } + } + + /// Returns this node as a switch, if it is one. + pub fn as_switch(&self) -> Option<&CompiledSwitch> { + match self { + CompiledNode::Switch(switch) => Some(switch), + _ => None, + } + } + + /// Consumes this node and returns the input, if it is one. + pub fn into_input(self) -> Option { + match self { + CompiledNode::Input(input) => Some(input), + _ => None, + } + } + + /// Consumes this node and returns the output, if it is one. + pub fn into_output(self) -> Option { + match self { + CompiledNode::Output(output) => Some(output), + _ => None, + } + } + + /// Consumes this node and returns the transform, if it is one. + pub fn into_transform(self) -> Option> { + match self { + CompiledNode::Transform(transform) => Some(transform), + _ => None, + } + } + + /// Consumes this node and returns the switch, if it is one. + pub fn into_switch(self) -> Option { + match self { + CompiledNode::Switch(switch) => Some(switch), + _ => None, + } + } +} + +impl From for CompiledNode { + fn from(input: CompiledInput) -> Self { + CompiledNode::Input(input) + } +} + +impl From for CompiledNode { + fn from(output: CompiledOutput) -> Self { + CompiledNode::Output(output) + } +} + +impl From for CompiledNode { + fn from(transform: CompiledTransform) -> Self { + CompiledNode::Transform(Box::new(transform)) + } +} + +impl From> for CompiledNode { + fn from(transform: Box) -> Self { + CompiledNode::Transform(transform) + } +} + +impl From for CompiledNode { + fn from(switch: CompiledSwitch) -> Self { + CompiledNode::Switch(switch) + } +} diff --git a/crates/nvisy-runtime/src/graph/output/mod.rs b/crates/nvisy-runtime/src/graph/output/mod.rs new file mode 100644 index 0000000..3282bc4 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/output/mod.rs @@ -0,0 +1,38 @@ +//! Compiled output node types. + +mod stream; + +pub use stream::{DataSink, OutputStream}; + +/// Compiled output node - ready to receive data. +/// +/// This is the runtime representation of an output node after compilation. +/// Cache slots are resolved during compilation, so compiled outputs always +/// wrap concrete output streams. +#[derive(Debug)] +pub struct CompiledOutput { + /// The output stream for writing data. + stream: OutputStream, +} + +impl CompiledOutput { + /// Creates a new compiled output from an output stream. + pub fn new(stream: OutputStream) -> Self { + Self { stream } + } + + /// Returns a reference to the output stream. + pub fn stream(&self) -> &OutputStream { + &self.stream + } + + /// Returns a mutable reference to the output stream. + pub fn stream_mut(&mut self) -> &mut OutputStream { + &mut self.stream + } + + /// Consumes this compiled output and returns the underlying stream. + pub fn into_stream(self) -> OutputStream { + self.stream + } +} diff --git a/crates/nvisy-runtime/src/graph/output/stream.rs b/crates/nvisy-runtime/src/graph/output/stream.rs new file mode 100644 index 0000000..50873da --- /dev/null +++ b/crates/nvisy-runtime/src/graph/output/stream.rs @@ -0,0 +1,101 @@ +//! Output stream types for compiled workflow data flow. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::{Sink, SinkExt}; +use nvisy_dal::AnyDataValue; + +use crate::error::Error; + +/// A boxed sink for workflow data values. +pub type DataSink = Pin + Send + 'static>>; + +/// Output stream for writing data in a workflow. +/// +/// Wraps a boxed sink and tracks write statistics. +pub struct OutputStream { + /// The underlying data sink. + sink: DataSink, + /// Optional buffer size for batching. + buffer_size: Option, + /// Number of items written so far. + items_written: usize, +} + +impl OutputStream { + /// Creates a new output stream. + pub fn new(sink: DataSink) -> Self { + Self { + sink, + buffer_size: None, + items_written: 0, + } + } + + /// Creates an output stream with buffering for batched writes. + pub fn with_buffer(sink: DataSink, buffer_size: usize) -> Self { + Self { + sink: Box::pin(sink.buffer(buffer_size)), + buffer_size: Some(buffer_size), + items_written: 0, + } + } + + /// Returns the buffer size, if set. + pub fn buffer_size(&self) -> Option { + self.buffer_size + } + + /// Returns the number of items written so far. + pub fn items_written(&self) -> usize { + self.items_written + } + + /// Consumes the stream and returns the inner boxed sink. + pub fn into_inner(self) -> DataSink { + self.sink + } +} + +impl Sink for OutputStream { + type Error = Error; + + fn poll_ready( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_ready(cx) + } + + fn start_send( + mut self: Pin<&mut Self>, + item: AnyDataValue, + ) -> std::result::Result<(), Self::Error> { + self.items_written += 1; + self.sink.as_mut().start_send(item) + } + + fn poll_flush( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_flush(cx) + } + + fn poll_close( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_close(cx) + } +} + +impl std::fmt::Debug for OutputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OutputStream") + .field("buffer_size", &self.buffer_size) + .field("items_written", &self.items_written) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/route/file_category.rs b/crates/nvisy-runtime/src/graph/route/file_category.rs new file mode 100644 index 0000000..e319e1d --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/file_category.rs @@ -0,0 +1,131 @@ +//! File category evaluator for routing by file extension. + +use nvisy_dal::AnyDataValue; + +use crate::definition::FileCategory; + +/// Evaluates file category based on extension. +#[derive(Debug, Clone)] +pub struct FileCategoryEvaluator { + /// File category to match. + category: FileCategory, +} + +impl FileCategoryEvaluator { + /// Creates a new file category evaluator. + pub fn new(category: FileCategory) -> Self { + Self { category } + } + + /// Evaluates whether the data matches the file category. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + // Extract path from the value based on data type + let path: Option<&str> = match data { + AnyDataValue::Object(obj) => Some(obj.path.as_str()), + AnyDataValue::Document(doc) => doc.metadata.get("path").and_then(|v| v.as_str()), + AnyDataValue::Record(rec) => rec + .columns + .get("path") + .or_else(|| rec.columns.get("key")) + .and_then(|v| v.as_str()), + _ => None, + }; + + let Some(path) = path else { + return self.category == FileCategory::Other; + }; + + let Some(ext) = path.rsplit('.').next() else { + return self.category == FileCategory::Other; + }; + + let ext = ext.to_lowercase(); + match self.category { + FileCategory::Text => { + matches!(ext.as_str(), "txt" | "md" | "markdown" | "rst" | "text") + } + FileCategory::Image => { + matches!( + ext.as_str(), + "jpg" + | "jpeg" + | "png" + | "gif" + | "bmp" + | "webp" + | "svg" + | "ico" + | "tiff" + | "tif" + ) + } + FileCategory::Audio => { + matches!( + ext.as_str(), + "mp3" | "wav" | "flac" | "aac" | "ogg" | "wma" | "m4a" + ) + } + FileCategory::Video => { + matches!( + ext.as_str(), + "mp4" | "webm" | "avi" | "mov" | "mkv" | "wmv" | "flv" | "m4v" + ) + } + FileCategory::Document => { + matches!( + ext.as_str(), + "pdf" | "doc" | "docx" | "odt" | "rtf" | "epub" + ) + } + FileCategory::Archive => { + matches!( + ext.as_str(), + "zip" | "tar" | "gz" | "rar" | "7z" | "bz2" | "xz" + ) + } + FileCategory::Spreadsheet => { + matches!(ext.as_str(), "xls" | "xlsx" | "csv" | "ods" | "tsv") + } + FileCategory::Presentation => { + matches!(ext.as_str(), "ppt" | "pptx" | "odp" | "key") + } + FileCategory::Code => { + matches!( + ext.as_str(), + "rs" | "py" + | "js" + | "ts" + | "java" + | "c" + | "cpp" + | "h" + | "hpp" + | "go" + | "rb" + | "php" + | "swift" + | "kt" + | "scala" + | "sh" + | "bash" + | "zsh" + | "sql" + | "html" + | "css" + | "json" + | "yaml" + | "yml" + | "toml" + | "xml" + ) + } + FileCategory::Other => true, + } + } +} + +impl From for FileCategoryEvaluator { + fn from(category: FileCategory) -> Self { + Self::new(category) + } +} diff --git a/crates/nvisy-runtime/src/graph/route/language.rs b/crates/nvisy-runtime/src/graph/route/language.rs new file mode 100644 index 0000000..c1bed33 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/language.rs @@ -0,0 +1,84 @@ +//! Language evaluator for routing by detected content language. + +use nvisy_dal::AnyDataValue; + +/// Evaluates language based on metadata. +#[derive(Debug, Clone)] +pub struct LanguageEvaluator { + /// Language codes to match. + codes: Vec, + /// Minimum confidence threshold. + min_confidence: f32, +} + +impl LanguageEvaluator { + /// Creates a new language evaluator. + pub fn new(codes: Vec, min_confidence: f32) -> Self { + Self { + codes, + min_confidence, + } + } + + /// Evaluates whether the data matches any of the language codes. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + let detected_lang = self.get_metadata_string(data, "language"); + let confidence = self.get_metadata_f32(data, "language_confidence"); + + match (detected_lang, confidence) { + (Some(lang), Some(conf)) => { + self.codes + .iter() + .any(|code| lang.eq_ignore_ascii_case(code)) + && conf >= self.min_confidence + } + (Some(lang), None) => self + .codes + .iter() + .any(|code| lang.eq_ignore_ascii_case(code)), + _ => false, + } + } + + /// Gets a string metadata value. + fn get_metadata_string(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Object(obj) => obj.metadata.get(key).and_then(json_to_string), + AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_string), + AnyDataValue::Document(doc) => doc.metadata.get(key).and_then(json_to_string), + _ => None, + } + } + + /// Gets an f32 metadata value. + fn get_metadata_f32(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Object(obj) => obj + .metadata + .get(key) + .and_then(|v| v.as_f64()) + .map(|v| v as f32), + AnyDataValue::Record(record) => record + .columns + .get(key) + .and_then(|v| v.as_f64()) + .map(|v| v as f32), + AnyDataValue::Document(doc) => doc + .metadata + .get(key) + .and_then(|v| v.as_f64()) + .map(|v| v as f32), + _ => None, + } + } +} + +/// Converts a JSON value to a string. +fn json_to_string(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + serde_json::Value::Bool(b) => Some(b.to_string()), + _ => None, + } +} diff --git a/crates/nvisy-runtime/src/graph/route/mod.rs b/crates/nvisy-runtime/src/graph/route/mod.rs new file mode 100644 index 0000000..ade7107 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/mod.rs @@ -0,0 +1,67 @@ +//! Compiled routing types for conditional data flow. + +mod file_category; +mod language; + +pub use file_category::FileCategoryEvaluator; +pub use language::LanguageEvaluator; +use nvisy_dal::AnyDataValue; + +use crate::definition::SwitchDef; + +/// Compiled switch node - evaluates conditions and returns true/false. +#[derive(Debug, Clone)] +pub struct CompiledSwitch { + /// The evaluator for this switch. + evaluator: SwitchEvaluator, +} + +/// Evaluator enum for switch conditions. +#[derive(Debug, Clone)] +pub enum SwitchEvaluator { + /// Evaluate by file category (extension). + FileCategory(FileCategoryEvaluator), + /// Evaluate by detected language. + Language(LanguageEvaluator), +} + +impl SwitchEvaluator { + /// Evaluates the condition against the data. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + match self { + SwitchEvaluator::FileCategory(e) => e.evaluate(data), + SwitchEvaluator::Language(e) => e.evaluate(data), + } + } +} + +impl CompiledSwitch { + /// Creates a new compiled switch. + pub fn new(evaluator: SwitchEvaluator) -> Self { + Self { evaluator } + } + + /// Evaluates the switch condition against input data. + /// + /// Returns `true` if the condition matches, `false` otherwise. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + self.evaluator.evaluate(data) + } +} + +impl From for CompiledSwitch { + fn from(def: SwitchDef) -> Self { + use crate::definition::SwitchCondition; + + let evaluator = match def.condition { + SwitchCondition::FileCategory(c) => { + SwitchEvaluator::FileCategory(FileCategoryEvaluator::new(c.category)) + } + SwitchCondition::Language(c) => { + SwitchEvaluator::Language(LanguageEvaluator::new(c.codes, c.min_confidence)) + } + }; + + Self::new(evaluator) + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs new file mode 100644 index 0000000..0c632b7 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -0,0 +1,66 @@ +//! Chunk processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::TextGenerationAgent; + +use super::Process; +use crate::definition::ChunkStrategy; +use crate::error::Result; + +/// Processor for chunking content into smaller pieces. +pub struct ChunkProcessor { + /// Chunking strategy to use. + strategy: ChunkStrategy, + /// Whether to use LLM-powered contextual chunking. + contextual_chunking: bool, + /// Agent for contextual chunking (if enabled). + agent: Option, +} + +impl ChunkProcessor { + /// Creates a new chunk processor without contextual chunking. + pub fn new(strategy: ChunkStrategy) -> Self { + Self { + strategy, + contextual_chunking: false, + agent: None, + } + } + + /// Creates a new chunk processor with contextual chunking enabled. + pub fn with_contextual_chunking(strategy: ChunkStrategy, agent: TextGenerationAgent) -> Self { + Self { + strategy, + contextual_chunking: true, + agent: Some(agent), + } + } + + /// Returns the chunking strategy. + pub fn strategy(&self) -> &ChunkStrategy { + &self.strategy + } + + /// Returns whether contextual chunking is enabled. + pub fn contextual_chunking(&self) -> bool { + self.contextual_chunking + } +} + +impl Process for ChunkProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement chunking based on strategy + // If contextual_chunking is enabled, use agents for context generation + Ok(input) + } +} + +impl std::fmt::Debug for ChunkProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ChunkProcessor") + .field("strategy", &self.strategy) + .field("contextual_chunking", &self.contextual_chunking) + .field("has_agent", &self.agent.is_some()) + .finish() + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs new file mode 100644 index 0000000..4de85fa --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -0,0 +1,61 @@ +//! Derive processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::TextGenerationAgent; + +use super::Process; +use crate::definition::DeriveTask; +use crate::error::Result; + +/// Processor for generating new content from input. +pub struct DeriveProcessor { + /// Agent for text generation (summarization, titles). + agent: TextGenerationAgent, + /// The derivation task to perform. + task: DeriveTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl DeriveProcessor { + /// Creates a new derive processor. + pub fn new( + agent: TextGenerationAgent, + task: DeriveTask, + override_prompt: Option, + ) -> Self { + Self { + agent, + task, + override_prompt, + } + } + + /// Returns the derivation task. + pub fn task(&self) -> DeriveTask { + self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } +} + +impl Process for DeriveProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement derivation using agent + // Use self.agent for summarization and title generation + let _ = &self.agent; + Ok(input) + } +} + +impl std::fmt::Debug for DeriveProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DeriveProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs new file mode 100644 index 0000000..078e7e4 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/embedding.rs @@ -0,0 +1,47 @@ +//! Embedding processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::provider::EmbeddingProvider; + +use super::Process; +use crate::error::Result; + +/// Processor for generating vector embeddings. +pub struct EmbeddingProcessor { + /// The embedding provider for generating embeddings. + provider: EmbeddingProvider, + /// Whether to L2-normalize output embeddings. + normalize: bool, +} + +impl EmbeddingProcessor { + /// Creates a new embedding processor. + pub fn new(provider: EmbeddingProvider, normalize: bool) -> Self { + Self { + provider, + normalize, + } + } + + /// Returns whether normalization is enabled. + pub fn normalize(&self) -> bool { + self.normalize + } +} + +impl Process for EmbeddingProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement embedding generation using provider + // For now, pass through unchanged + let _ = &self.provider; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for EmbeddingProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EmbeddingProcessor") + .field("normalize", &self.normalize) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs new file mode 100644 index 0000000..4fe6fa9 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -0,0 +1,66 @@ +//! Enrich processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::{TableAgent, VisionAgent}; + +use super::Process; +use crate::definition::EnrichTask; +use crate::error::Result; + +/// Processor for enriching elements with metadata/descriptions. +pub struct EnrichProcessor { + /// Agent for vision/image tasks. + vision_agent: VisionAgent, + /// Agent for table processing. + table_agent: TableAgent, + /// The enrichment task to perform. + task: EnrichTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl EnrichProcessor { + /// Creates a new enrich processor. + pub fn new( + vision_agent: VisionAgent, + table_agent: TableAgent, + task: EnrichTask, + override_prompt: Option, + ) -> Self { + Self { + vision_agent, + table_agent, + task, + override_prompt, + } + } + + /// Returns the enrichment task. + pub fn task(&self) -> &EnrichTask { + &self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } +} + +impl Process for EnrichProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement enrichment using agents + // Use self.vision_agent for image tasks + // Use self.table_agent for table tasks + let _ = (&self.vision_agent, &self.table_agent); // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for EnrichProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EnrichProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs new file mode 100644 index 0000000..ee2864a --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -0,0 +1,75 @@ +//! Extract processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::{StructuredOutputAgent, TableAgent, TextAnalysisAgent}; + +use super::Process; +use crate::definition::ExtractTask; +use crate::error::Result; + +/// Processor for extracting structured data or converting formats. +pub struct ExtractProcessor { + /// Agent for text analysis (NER, keywords, classification, sentiment). + text_analysis_agent: TextAnalysisAgent, + /// Agent for table processing. + table_agent: TableAgent, + /// Agent for structured output extraction. + structured_output_agent: StructuredOutputAgent, + /// The extraction task to perform. + task: ExtractTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl ExtractProcessor { + /// Creates a new extract processor. + pub fn new( + text_analysis_agent: TextAnalysisAgent, + table_agent: TableAgent, + structured_output_agent: StructuredOutputAgent, + task: ExtractTask, + override_prompt: Option, + ) -> Self { + Self { + text_analysis_agent, + table_agent, + structured_output_agent, + task, + override_prompt, + } + } + + /// Returns the extraction task. + pub fn task(&self) -> &ExtractTask { + &self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } +} + +impl Process for ExtractProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement extraction using agents + // Use self.text_analysis_agent for NER, keywords, classification, sentiment + // Use self.table_agent for table conversion + // Use self.structured_output_agent for JSON conversion + let _ = ( + &self.text_analysis_agent, + &self.table_agent, + &self.structured_output_agent, + ); + Ok(input) + } +} + +impl std::fmt::Debug for ExtractProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExtractProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs new file mode 100644 index 0000000..93818d6 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/mod.rs @@ -0,0 +1,77 @@ +//! Compiled transform node types. +//! +//! Processors are the runtime representation of transform nodes. Each processor +//! encapsulates the logic and dependencies needed to execute a specific transform. + +mod chunk; +mod derive; +mod embedding; +mod enrich; +mod extract; +mod partition; + +use std::future::Future; + +pub use chunk::ChunkProcessor; +pub use derive::DeriveProcessor; +pub use embedding::EmbeddingProcessor; +pub use enrich::EnrichProcessor; +pub use extract::ExtractProcessor; +use nvisy_dal::AnyDataValue; +pub use partition::PartitionProcessor; + +use crate::error::Result; + +/// Trait for processing data in a workflow pipeline. +/// +/// Processors are the compiled form of transforms. They take input data items +/// and produce output data items. A single input can produce multiple outputs +/// (e.g., chunking splits one document into many chunks). +pub trait Process: Send + Sync { + /// Processes input data items into output data items. + /// + /// # Arguments + /// * `input` - The input data items to process + /// + /// # Returns + /// A vector of processed data items (may be more or fewer than input) + fn process( + &self, + input: Vec, + ) -> impl Future>> + Send; +} + +/// Compiled transform node - ready to process data. +/// +/// Each variant wraps a dedicated processor that encapsulates +/// the transform logic and any required external dependencies. +/// +/// Large processor variants are boxed to avoid enum size bloat. +#[derive(Debug)] +pub enum CompiledTransform { + /// Partition documents into elements. + Partition(PartitionProcessor), + /// Chunk content into smaller pieces. + Chunk(ChunkProcessor), + /// Generate vector embeddings. + Embedding(EmbeddingProcessor), + /// Enrich elements with metadata/descriptions. + Enrich(Box), + /// Extract structured data or convert formats. + Extract(Box), + /// Generate new content from input. + Derive(DeriveProcessor), +} + +impl Process for CompiledTransform { + async fn process(&self, input: Vec) -> Result> { + match self { + Self::Partition(p) => p.process(input).await, + Self::Chunk(p) => p.process(input).await, + Self::Embedding(p) => p.process(input).await, + Self::Enrich(p) => p.process(input).await, + Self::Extract(p) => p.process(input).await, + Self::Derive(p) => p.process(input).await, + } + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/partition.rs b/crates/nvisy-runtime/src/graph/transform/partition.rs new file mode 100644 index 0000000..042858a --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/partition.rs @@ -0,0 +1,56 @@ +//! Partition processor. + +use nvisy_dal::AnyDataValue; + +use super::Process; +use crate::definition::PartitionStrategy; +use crate::error::Result; + +/// Processor for partitioning documents into elements. +#[derive(Debug)] +pub struct PartitionProcessor { + /// Partitioning strategy to use. + strategy: PartitionStrategy, + /// Whether to include page break markers. + include_page_breaks: bool, + /// Whether to discard unsupported element types. + discard_unsupported: bool, +} + +impl PartitionProcessor { + /// Creates a new partition processor. + pub fn new( + strategy: PartitionStrategy, + include_page_breaks: bool, + discard_unsupported: bool, + ) -> Self { + Self { + strategy, + include_page_breaks, + discard_unsupported, + } + } + + /// Returns the partitioning strategy. + pub fn strategy(&self) -> PartitionStrategy { + self.strategy + } + + /// Returns whether page breaks are included. + pub fn include_page_breaks(&self) -> bool { + self.include_page_breaks + } + + /// Returns whether unsupported types are discarded. + pub fn discard_unsupported(&self) -> bool { + self.discard_unsupported + } +} + +impl Process for PartitionProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement document partitioning based on strategy + // For now, pass through unchanged + Ok(input) + } +} diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs new file mode 100644 index 0000000..4045275 --- /dev/null +++ b/crates/nvisy-runtime/src/lib.rs @@ -0,0 +1,14 @@ +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +pub mod definition; +pub mod engine; +mod error; +pub mod graph; + +pub use engine::{CredentialsRegistry, ProviderCredentials}; +pub use error::{Error, Result}; + +/// Tracing target for runtime operations. +pub const TRACING_TARGET: &str = "nvisy_runtime"; diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index 7be2d42..b664f48 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-server" +description = "High-performance HTTP API server for the Nvisy document redaction platform" +readme = "./README.md" +keywords = ["http", "server", "api", "web", "axum"] +categories = ["web-programming::http-server"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } @@ -28,31 +32,31 @@ config = [ "dep:clap", "nvisy-nats/config", "nvisy-postgres/config", - "nvisy-rig/config", ] [dependencies] # Internal crates nvisy-nats = { workspace = true, features = ["schema"] } nvisy-postgres = { workspace = true, features = ["schema"] } -nvisy-rig = { workspace = true, features = [] } +nvisy-runtime = { workspace = true, features = [] } nvisy-webhook = { workspace = true, features = ["schema"] } # Async runtime tokio = { workspace = true, features = [] } tokio-util = { workspace = true, features = [] } +tokio-stream = { workspace = true, features = [] } futures = { workspace = true, features = [] } async-trait = { workspace = true, features = [] } # HTTP & Web server -axum = { workspace = true, features = ["http2", "macros", "ws", "multipart"] } +axum = { workspace = true, features = ["http2", "macros", "multipart"] } axum-client-ip = { workspace = true, features = [] } axum-extra = { workspace = true, features = ["typed-header", "attachment", "query"] } tower = { workspace = true, features = [] } tower-http = { workspace = true, features = [] } # OpenAPI/Documentation -aide = { workspace = true, features = ["axum", "axum-query", "axum-form", "axum-json", "axum-ws", "axum-multipart", "axum-extra", "bytes", "http"] } +aide = { workspace = true, features = ["axum", "axum-query", "axum-form", "axum-json", "axum-multipart", "axum-extra", "bytes", "http"] } schemars = { workspace = true, features = [] } # Observability @@ -75,11 +79,6 @@ validator = { workspace = true, features = [] } regex = { workspace = true, features = [] } woothee = { workspace = true, features = [] } -# Archive/Compression -tar = { workspace = true, features = [] } -flate2 = { workspace = true, features = [] } -zip = { workspace = true, features = [] } - # Derive macros & utilities thiserror = { workspace = true, features = [] } derive_more = { workspace = true, features = ["full"] } diff --git a/crates/nvisy-server/README.md b/crates/nvisy-server/README.md index 3b5df5e..7d89619 100644 --- a/crates/nvisy-server/README.md +++ b/crates/nvisy-server/README.md @@ -1,11 +1,10 @@ -# api.nvisy.com/server +# nvisy-server + +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) High-performance HTTP API server for the Nvisy document redaction platform, built with Axum and Tokio. -[![rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![axum](https://img.shields.io/badge/Axum-0.8+-000000?style=flat-square&logo=rust&logoColor=white)](https://github.com/tokio-rs/axum) - ## Features - **Async HTTP Server** - Built with Axum web framework on Tokio runtime @@ -29,3 +28,18 @@ When running, the server exposes interactive documentation at: - **Swagger UI**: `/api/swagger` - **Scalar UI**: `/api/scalar` - **OpenAPI JSON**: `/api/openapi.json` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-server/src/error.rs b/crates/nvisy-server/src/error.rs index 7d006de..a323924 100644 --- a/crates/nvisy-server/src/error.rs +++ b/crates/nvisy-server/src/error.rs @@ -174,6 +174,12 @@ impl From for Error { } } +impl From for Error { + fn from(err: nvisy_postgres::PgError) -> Self { + Error::external("postgres", err.to_string()).with_source(err) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/nvisy-server/src/extract/auth/auth_provider.rs b/crates/nvisy-server/src/extract/auth/auth_provider.rs index 773d196..19e926d 100644 --- a/crates/nvisy-server/src/extract/auth/auth_provider.rs +++ b/crates/nvisy-server/src/extract/auth/auth_provider.rs @@ -5,7 +5,7 @@ //! The trait is designed to be implemented by types that represent authenticated users. use nvisy_postgres::model::WorkspaceMember; -use nvisy_postgres::query::{DocumentRepository, WorkspaceMemberRepository}; +use nvisy_postgres::query::{FileRepository, WorkspaceMemberRepository}; use nvisy_postgres::{PgConn, PgError}; use uuid::Uuid; @@ -120,15 +120,15 @@ pub trait AuthProvider { } } - /// Checks if a user has permission to access a document. + /// Checks if a user has permission to access a file. /// - /// This method resolves the document's workspace and checks workspace-level permissions. - /// Document owners have special privileges for write operations. + /// This method resolves the file's workspace and checks workspace-level permissions. + /// File owners have special privileges for write operations. /// /// # Arguments /// /// * `conn` - Database connection - /// * `document_id` - Document to check access for + /// * `file_id` - File to check access for /// * `permission` - Required permission level /// /// # Returns @@ -139,40 +139,40 @@ pub trait AuthProvider { /// /// Returns database errors if queries fail. #[allow(async_fn_in_trait)] - async fn check_document_permission( + async fn check_file_permission( &self, conn: &mut PgConn, - document_id: Uuid, + file_id: Uuid, permission: Permission, ) -> Result { - // Get the document to find its workspace - let document = conn.find_document_by_id(document_id).await?; + // Get the file to find its workspace + let file = conn.find_file_by_id(file_id).await?; - let Some(document) = document else { + let Some(file) = file else { tracing::warn!( target: TRACING_TARGET, account_id = %self.account_id(), - document_id = %document_id, - "access denied: document not found" + file_id = %file_id, + "access denied: file not found" ); - return Ok(AuthResult::denied("Document not found")); + return Ok(AuthResult::denied("File not found")); }; - // Document owners have special privileges for destructive operations - let is_document_owner = document.account_id == self.account_id(); + // File owners have special privileges for destructive operations + let is_file_owner = file.account_id == self.account_id(); let requires_ownership = matches!( permission, - Permission::UpdateDocuments | Permission::DeleteDocuments + Permission::UpdateFiles | Permission::DeleteFiles ); - if requires_ownership && !is_document_owner && !self.is_admin() { + if requires_ownership && !is_file_owner && !self.is_admin() { // Non-owners need explicit workspace-level permissions for destructive operations return self - .check_workspace_permission(conn, document.workspace_id, permission) + .check_workspace_permission(conn, file.workspace_id, permission) .await; } - self.check_workspace_permission(conn, document.workspace_id, permission) + self.check_workspace_permission(conn, file.workspace_id, permission) .await } @@ -274,17 +274,17 @@ pub trait AuthProvider { auth_result.into_result() } - /// Authorizes document access with ownership and workspace-level checks. + /// Authorizes file access with ownership and workspace-level checks. /// - /// This convenience method handles complex document authorization logic: - /// - Document owners have enhanced privileges for their own documents + /// This convenience method handles complex file authorization logic: + /// - File owners have enhanced privileges for their own files /// - All access requires at least workspace membership /// - Global administrators bypass all restrictions /// /// # Arguments /// - /// * `pg_client` - Database client for verification - /// * `document_id` - Target document identifier + /// * `conn` - Database connection for verification + /// * `file_id` - Target file identifier /// * `permission` - Required permission level /// /// # Returns @@ -295,14 +295,14 @@ pub trait AuthProvider { /// /// Returns `Forbidden` error if access is denied, or propagates database errors. #[allow(async_fn_in_trait)] - async fn authorize_document( + async fn authorize_file( &self, conn: &mut PgConn, - document_id: Uuid, + file_id: Uuid, permission: Permission, ) -> Result> { let auth_result = self - .check_document_permission(conn, document_id, permission) + .check_file_permission(conn, file_id, permission) .await?; auth_result.into_result() } diff --git a/crates/nvisy-server/src/extract/auth/permission.rs b/crates/nvisy-server/src/extract/auth/permission.rs index 5b98217..ec2c3b1 100644 --- a/crates/nvisy-server/src/extract/auth/permission.rs +++ b/crates/nvisy-server/src/extract/auth/permission.rs @@ -23,17 +23,7 @@ pub enum Permission { /// Can delete the entire workspace. DeleteWorkspace, - // Document permissions - /// Can view and read documents in the workspace. - ViewDocuments, - /// Can create new documents in the workspace. - CreateDocuments, - /// Can edit existing documents. - UpdateDocuments, - /// Can delete documents from the workspace. - DeleteDocuments, - - // File and asset permissions + // File permissions /// Can view and download files. ViewFiles, /// Can upload new files to the workspace. @@ -44,6 +34,20 @@ pub enum Permission { DownloadFiles, /// Can delete files from the workspace. DeleteFiles, + /// Can create and manage annotations on files. + AnnotateFiles, + + // Pipeline permissions + /// Can view pipelines in the workspace. + ViewPipelines, + /// Can create new pipelines. + CreatePipelines, + /// Can update existing pipelines. + UpdatePipelines, + /// Can delete pipelines. + DeletePipelines, + /// Can execute pipeline runs. + RunPipelines, // Member management permissions /// Can view workspace members and their roles. @@ -72,12 +76,6 @@ pub enum Permission { DeleteWebhooks, /// Can test webhooks by sending test payloads. TestWebhooks, - - // Workspace settings and configuration - /// Can view workspace settings. - ViewSettings, - /// Can modify workspace settings and configuration. - ManageSettings, } impl Permission { @@ -94,25 +92,26 @@ impl Permission { #[must_use] pub const fn minimum_required_role(self) -> WorkspaceRole { match self { - // Guest-level permissions + // Guest-level permissions (read-only access) Self::ViewWorkspace - | Self::ViewDocuments | Self::ViewFiles + | Self::ViewPipelines | Self::ViewMembers | Self::ViewIntegrations - | Self::ViewWebhooks - | Self::ViewSettings => WorkspaceRole::Guest, - - // Member-level permissions - Self::CreateDocuments - | Self::UpdateDocuments - | Self::DeleteDocuments - | Self::UploadFiles + | Self::ViewWebhooks => WorkspaceRole::Guest, + + // Member-level permissions (create and modify own resources) + Self::UploadFiles | Self::UpdateFiles | Self::DownloadFiles - | Self::DeleteFiles => WorkspaceRole::Member, - - // Admin-level permissions + | Self::DeleteFiles + | Self::AnnotateFiles + | Self::CreatePipelines + | Self::UpdatePipelines + | Self::DeletePipelines + | Self::RunPipelines => WorkspaceRole::Member, + + // Admin-level permissions (manage workspace resources) Self::UpdateWorkspace | Self::InviteMembers | Self::RemoveMembers @@ -120,8 +119,7 @@ impl Permission { | Self::CreateWebhooks | Self::UpdateWebhooks | Self::DeleteWebhooks - | Self::TestWebhooks - | Self::ManageSettings => WorkspaceRole::Admin, + | Self::TestWebhooks => WorkspaceRole::Admin, // Owner-only permissions (highest level) Self::DeleteWorkspace | Self::ManageRoles => WorkspaceRole::Owner, diff --git a/crates/nvisy-server/src/handler/accounts.rs b/crates/nvisy-server/src/handler/accounts.rs index 4eb8003..9ddde9c 100644 --- a/crates/nvisy-server/src/handler/accounts.rs +++ b/crates/nvisy-server/src/handler/accounts.rs @@ -11,12 +11,14 @@ use axum::extract::State; use axum::http::StatusCode; use nvisy_postgres::PgClient; use nvisy_postgres::model::Account as AccountModel; -use nvisy_postgres::query::{AccountNotificationRepository, AccountRepository}; +use nvisy_postgres::query::{ + AccountNotificationRepository, AccountRepository, WorkspaceMemberRepository, +}; use uuid::Uuid; -use super::request::{CursorPagination, UpdateAccount}; +use super::request::{AccountPathParams, CursorPagination, UpdateAccount}; use super::response::{Account, ErrorResponse, Notification, NotificationsPage, UnreadStatus}; -use crate::extract::{AuthState, Json, Query, ValidateJson}; +use crate::extract::{AuthState, Json, Path, Query, ValidateJson}; use crate::handler::{ErrorKind, Result}; use crate::service::{PasswordHasher, PasswordStrength, ServiceState}; @@ -43,12 +45,65 @@ async fn get_own_account( } fn get_own_account_docs(op: TransformOperation) -> TransformOperation { - op.summary("Get account") + op.summary("Get own account") .description("Returns the authenticated user's account details.") .response::<200, Json>() .response::<401, Json>() } +/// Retrieves an account by ID. +/// +/// The requester must share at least one workspace with the target account. +#[tracing::instrument( + skip_all, + fields( + requester_id = %auth_claims.account_id, + target_id = %path_params.account_id, + ) +)] +async fn get_account( + State(pg_client): State, + AuthState(auth_claims): AuthState, + Path(path_params): Path, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Reading account by ID"); + + let mut conn = pg_client.get_connection().await?; + + // Check if requester shares a workspace with target account + let shares_workspace = conn + .accounts_share_workspace(auth_claims.account_id, path_params.account_id) + .await?; + + if !shares_workspace { + tracing::warn!( + target: TRACING_TARGET, + "Access denied: accounts do not share a workspace" + ); + return Err(ErrorKind::Forbidden + .with_message("You do not have access to this account") + .with_resource("account")); + } + + let account = find_account(&mut conn, path_params.account_id).await?; + + tracing::info!(target: TRACING_TARGET, "Account read by ID"); + + Ok((StatusCode::OK, Json(Account::from_model(account)))) +} + +fn get_account_docs(op: TransformOperation) -> TransformOperation { + op.summary("Get account by ID") + .description( + "Returns an account's details by ID. \ + The requester must share at least one workspace with the target account.", + ) + .response::<200, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + /// Updates the authenticated account. #[tracing::instrument( skip_all, @@ -262,6 +317,10 @@ pub fn routes(_state: ServiceState) -> ApiRouter { .patch_with(update_own_account, update_own_account_docs) .delete_with(delete_own_account, delete_own_account_docs), ) + .api_route( + "/accounts/{accountId}/", + get_with(get_account, get_account_docs), + ) .api_route( "/notifications/", get_with(list_notifications, list_notifications_docs), diff --git a/crates/nvisy-server/src/handler/annotations.rs b/crates/nvisy-server/src/handler/annotations.rs index 2eea75a..43543ad 100644 --- a/crates/nvisy-server/src/handler/annotations.rs +++ b/crates/nvisy-server/src/handler/annotations.rs @@ -7,7 +7,7 @@ use aide::transform::TransformOperation; use axum::extract::State; use axum::http::StatusCode; use nvisy_postgres::PgClient; -use nvisy_postgres::query::{DocumentAnnotationRepository, DocumentFileRepository}; +use nvisy_postgres::query::{FileAnnotationRepository, FileRepository}; use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; use crate::handler::request::{ @@ -24,8 +24,8 @@ const TRACING_TARGET: &str = "nvisy_server::handler::annotations"; async fn find_annotation( conn: &mut nvisy_postgres::PgConn, annotation_id: uuid::Uuid, -) -> Result { - conn.find_document_annotation_by_id(annotation_id) +) -> Result { + conn.find_file_annotation_by_id(annotation_id) .await? .ok_or_else(|| { ErrorKind::NotFound @@ -38,14 +38,12 @@ async fn find_annotation( async fn find_file( conn: &mut nvisy_postgres::PgConn, file_id: uuid::Uuid, -) -> Result { - conn.find_document_file_by_id(file_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("File not found") - .with_resource("file") - }) +) -> Result { + conn.find_file_by_id(file_id).await?.ok_or_else(|| { + ErrorKind::NotFound + .with_message("File not found") + .with_resource("file") + }) } /// Creates a new annotation on a file. @@ -68,11 +66,11 @@ async fn create_annotation( let file = find_file(&mut conn, path_params.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::CreateDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::AnnotateFiles) .await?; let new_annotation = request.into_model(path_params.file_id, auth_state.account_id); - let annotation = conn.create_document_annotation(new_annotation).await?; + let annotation = conn.create_file_annotation(new_annotation).await?; tracing::info!( target: TRACING_TARGET, @@ -116,11 +114,11 @@ async fn list_annotations( let file = find_file(&mut conn, path_params.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewFiles) .await?; let page = conn - .cursor_list_file_document_annotations(path_params.file_id, pagination.into()) + .cursor_list_file_annotations(path_params.file_id, pagination.into()) .await?; let response = AnnotationsPage::from_cursor_page(page, Annotation::from_model); @@ -160,10 +158,10 @@ async fn get_annotation( let mut conn = pg_client.get_connection().await?; let annotation = find_annotation(&mut conn, path_params.annotation_id).await?; - let file = find_file(&mut conn, annotation.document_file_id).await?; + let file = find_file(&mut conn, annotation.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewFiles) .await?; tracing::debug!(target: TRACING_TARGET, "Annotation retrieved"); @@ -204,14 +202,14 @@ async fn update_annotation( return Err(ErrorKind::Forbidden.with_message("You can only update your own annotations")); } - let file = find_file(&mut conn, annotation.document_file_id).await?; + let file = find_file(&mut conn, annotation.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::CreateDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::AnnotateFiles) .await?; let updated = conn - .update_document_annotation(path_params.annotation_id, request.into_model()) + .update_file_annotation(path_params.annotation_id, request.into_model()) .await?; tracing::info!(target: TRACING_TARGET, "Annotation updated"); @@ -252,13 +250,13 @@ async fn delete_annotation( return Err(ErrorKind::Forbidden.with_message("You can only delete your own annotations")); } - let file = find_file(&mut conn, annotation.document_file_id).await?; + let file = find_file(&mut conn, annotation.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::CreateDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::AnnotateFiles) .await?; - conn.delete_document_annotation(path_params.annotation_id) + conn.delete_file_annotation(path_params.annotation_id) .await?; tracing::info!(target: TRACING_TARGET, "Annotation deleted"); diff --git a/crates/nvisy-server/src/handler/comments.rs b/crates/nvisy-server/src/handler/comments.rs deleted file mode 100644 index eea3b9d..0000000 --- a/crates/nvisy-server/src/handler/comments.rs +++ /dev/null @@ -1,258 +0,0 @@ -//! File comment management handlers for CRUD operations. -//! -//! This module provides comment management functionality for files. -//! Supports threaded conversations and @mentions. - -use aide::axum::ApiRouter; -use aide::transform::TransformOperation; -use axum::extract::State; -use axum::http::StatusCode; -use nvisy_postgres::PgClient; -use nvisy_postgres::query::{DocumentCommentRepository, DocumentFileRepository}; - -use crate::extract::{AuthState, Json, Path, Query, ValidateJson}; -use crate::handler::request::{ - CommentPathParams, CreateComment, CursorPagination, FilePathParams, UpdateComment, -}; -use crate::handler::response::{Comment, CommentsPage, ErrorResponse}; -use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; - -/// Tracing target for file comment operations. -const TRACING_TARGET: &str = "nvisy_server::handler::comments"; - -/// Creates a new comment on a file. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - file_id = %path_params.file_id, - ) -)] -async fn post_comment( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Creating comment"); - - let mut conn = pg_client.get_connection().await?; - - // Verify file exists - let _ = find_file(&mut conn, path_params.file_id).await?; - - // Validate parent comment if provided - if let Some(parent_id) = request.parent_comment_id { - let parent_comment = find_comment(&mut conn, parent_id).await?; - - // Verify parent comment is on the same file - if parent_comment.file_id != path_params.file_id { - return Err(ErrorKind::BadRequest - .with_message("Parent comment must belong to the same file.") - .with_resource("comment")); - } - } - - let comment = conn - .create_document_comment(request.into_model(auth_claims.account_id, path_params.file_id)) - .await?; - - tracing::info!( - target: TRACING_TARGET, - comment_id = %comment.id, - "Comment created", - ); - - Ok((StatusCode::CREATED, Json(Comment::from_model(comment)))) -} - -fn post_comment_docs(op: TransformOperation) -> TransformOperation { - op.summary("Create comment") - .description("Creates a new comment on a file.") - .response::<201, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<404, Json>() -} - -/// Returns all comments for a file. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - file_id = %path_params.file_id, - ) -)] -async fn list_comments( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - Query(pagination): Query, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Listing comments"); - - let mut conn = pg_client.get_connection().await?; - - // Verify file exists - let _ = find_file(&mut conn, path_params.file_id).await?; - - let page = conn - .cursor_list_file_document_comments(path_params.file_id, pagination.into()) - .await?; - - let response = CommentsPage::from_cursor_page(page, Comment::from_model); - - tracing::debug!( - target: TRACING_TARGET, - comment_count = response.items.len(), - "Comments listed", - ); - - Ok((StatusCode::OK, Json(response))) -} - -fn list_comments_docs(op: TransformOperation) -> TransformOperation { - op.summary("List comments") - .description("Returns all comments for a file.") - .response::<200, Json>() - .response::<401, Json>() - .response::<404, Json>() -} - -/// Updates a comment by ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - comment_id = %path_params.comment_id, - ) -)] -async fn update_comment( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Updating comment"); - - let mut conn = pg_client.get_connection().await?; - - // Fetch comment and verify ownership - let existing_comment = find_comment(&mut conn, path_params.comment_id).await?; - - // Check ownership - if existing_comment.account_id != auth_claims.account_id { - return Err(ErrorKind::Forbidden - .with_message("You can only update your own comments.") - .with_resource("comment")); - } - - let comment = conn - .update_document_comment(path_params.comment_id, request.into_model()) - .await?; - - tracing::info!(target: TRACING_TARGET, "Comment updated"); - - Ok((StatusCode::OK, Json(Comment::from_model(comment)))) -} - -fn update_comment_docs(op: TransformOperation) -> TransformOperation { - op.summary("Update comment") - .description("Updates a comment by ID.") - .response::<200, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Deletes a comment by ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - comment_id = %path_params.comment_id, - ) -)] -async fn delete_comment( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, -) -> Result { - tracing::debug!(target: TRACING_TARGET, "Deleting comment"); - - let mut conn = pg_client.get_connection().await?; - - // Fetch comment and verify ownership - let existing_comment = find_comment(&mut conn, path_params.comment_id).await?; - - // Check ownership - if existing_comment.account_id != auth_claims.account_id { - return Err(ErrorKind::Forbidden - .with_message("You can only delete your own comments.") - .with_resource("comment")); - } - - conn.delete_document_comment(path_params.comment_id).await?; - - tracing::info!(target: TRACING_TARGET, "Comment deleted"); - - Ok(StatusCode::NO_CONTENT) -} - -fn delete_comment_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete comment") - .description("Deletes a comment by ID.") - .response_with::<204, (), _>(|res| res.description("Comment deleted.")) - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Finds a file by ID or returns NotFound error. -async fn find_file( - conn: &mut nvisy_postgres::PgConn, - file_id: uuid::Uuid, -) -> Result { - conn.find_document_file_by_id(file_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("File not found.") - .with_resource("file") - }) -} - -/// Finds a comment by ID or returns NotFound error. -async fn find_comment( - conn: &mut nvisy_postgres::PgConn, - comment_id: uuid::Uuid, -) -> Result { - conn.find_document_comment_by_id(comment_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("Comment not found.") - .with_resource("comment") - }) -} - -/// Returns a [`Router`] with all comment-related routes. -/// -/// [`Router`]: axum::routing::Router -pub fn routes() -> ApiRouter { - use aide::axum::routing::*; - - ApiRouter::new() - .api_route( - "/files/{fileId}/comments", - post_with(post_comment, post_comment_docs).get_with(list_comments, list_comments_docs), - ) - .api_route( - "/comments/{commentId}", - patch_with(update_comment, update_comment_docs) - .delete_with(delete_comment, delete_comment_docs), - ) - .with_path_items(|item| item.tag("Comments")) -} diff --git a/crates/nvisy-server/src/handler/documents.rs b/crates/nvisy-server/src/handler/documents.rs deleted file mode 100644 index 5a68419..0000000 --- a/crates/nvisy-server/src/handler/documents.rs +++ /dev/null @@ -1,288 +0,0 @@ -//! Document management handlers for document CRUD operations. -//! -//! This module provides comprehensive document management functionality within workspaces, -//! including creation, reading, updating, and deletion of documents. All operations -//! are secured with proper authorization and follow workspace-based access control. - -use aide::axum::ApiRouter; -use aide::transform::TransformOperation; -use axum::extract::State; -use axum::http::StatusCode; -use nvisy_nats::NatsClient; -use nvisy_postgres::PgClient; -use nvisy_postgres::query::DocumentRepository; - -use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; -use crate::handler::request::{ - CreateDocument, CursorPagination, DocumentPathParams, UpdateDocument, WorkspacePathParams, -}; -use crate::handler::response::{Document, DocumentsPage, ErrorResponse}; -use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; - -/// Tracing target for document operations. -const TRACING_TARGET: &str = "nvisy_server::handler::documents"; - -/// Creates a new document. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn create_document( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Creating document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::CreateDocuments, - ) - .await?; - - let new_document = request.into_model(path_params.workspace_id, auth_state.account_id); - let document = conn.create_document(new_document).await?; - - tracing::info!( - target: TRACING_TARGET, - document_id = %document.id, - "Document created", - ); - - Ok((StatusCode::CREATED, Json(Document::from_model(document)))) -} - -fn create_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Create document") - .description("Creates a new document container for organizing files.") - .response::<201, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() -} - -/// Returns all documents for a workspace. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn get_all_documents( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - Query(pagination): Query, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Listing documents"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::ViewDocuments, - ) - .await?; - - let page = conn - .cursor_list_workspace_documents(path_params.workspace_id, pagination.into()) - .await?; - - let response = DocumentsPage::from_cursor_page(page, Document::from_model); - - tracing::debug!( - target: TRACING_TARGET, - document_count = response.items.len(), - "Documents listed", - ); - - Ok((StatusCode::OK, Json(response))) -} - -fn get_all_documents_docs(op: TransformOperation) -> TransformOperation { - op.summary("List documents") - .description("Lists all documents in a workspace with pagination.") - .response::<200, Json>() - .response::<401, Json>() - .response::<403, Json>() -} - -/// Gets a document by its document ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - document_id = %path_params.document_id, - ) -)] -async fn get_document( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Reading document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_document( - &mut conn, - path_params.document_id, - Permission::ViewDocuments, - ) - .await?; - - let document = find_document(&mut conn, path_params.document_id).await?; - - tracing::info!(target: TRACING_TARGET, "Document read"); - - Ok((StatusCode::OK, Json(Document::from_model(document)))) -} - -fn get_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Get document") - .description("Returns document details by ID.") - .response::<200, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Updates a document by its document ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - document_id = %path_params.document_id, - ) -)] -async fn update_document( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Updating document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_document( - &mut conn, - path_params.document_id, - Permission::UpdateDocuments, - ) - .await?; - - // Verify document exists - let _ = find_document(&mut conn, path_params.document_id).await?; - - let update_data = request.into_model(); - let document = conn - .update_document(path_params.document_id, update_data) - .await?; - - tracing::info!(target: TRACING_TARGET, "Document updated"); - - Ok((StatusCode::OK, Json(Document::from_model(document)))) -} - -fn update_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Update document") - .description("Updates document metadata.") - .response::<200, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Deletes a document by its document ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - document_id = %path_params.document_id, - ) -)] -async fn delete_document( - State(pg_client): State, - State(_nats_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, -) -> Result { - tracing::debug!(target: TRACING_TARGET, "Deleting document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_document( - &mut conn, - path_params.document_id, - Permission::DeleteDocuments, - ) - .await?; - - // Verify document exists - let _ = find_document(&mut conn, path_params.document_id).await?; - - conn.delete_document(path_params.document_id).await?; - - tracing::info!(target: TRACING_TARGET, "Document deleted"); - - Ok(StatusCode::OK) -} - -fn delete_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete document") - .description("Soft-deletes the document and associated files.") - .response_with::<200, (), _>(|res| res.description("Document deleted.")) - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Finds a document by ID or returns NotFound error. -async fn find_document( - conn: &mut nvisy_postgres::PgConn, - document_id: uuid::Uuid, -) -> Result { - conn.find_document_by_id(document_id).await?.ok_or_else(|| { - ErrorKind::NotFound - .with_message("Document not found.") - .with_resource("document") - }) -} - -/// Returns a [`Router`] with all related routes. -/// -/// [`Router`]: axum::routing::Router -pub fn routes() -> ApiRouter { - use aide::axum::routing::*; - - ApiRouter::new() - .api_route( - "/workspaces/{workspaceId}/documents", - post_with(create_document, create_document_docs) - .get_with(get_all_documents, get_all_documents_docs), - ) - .api_route( - "/documents/{documentId}", - get_with(get_document, get_document_docs) - .patch_with(update_document, update_document_docs) - .delete_with(delete_document, delete_document_docs), - ) - .with_path_items(|item| item.tag("Documents")) -} diff --git a/crates/nvisy-server/src/handler/error/mod.rs b/crates/nvisy-server/src/handler/error/mod.rs index 196759c..8e75e77 100644 --- a/crates/nvisy-server/src/handler/error/mod.rs +++ b/crates/nvisy-server/src/handler/error/mod.rs @@ -5,6 +5,7 @@ mod nats_error; mod pg_account; mod pg_document; mod pg_error; +mod pg_pipeline; mod pg_workspace; mod service_error; diff --git a/crates/nvisy-server/src/handler/error/pg_document.rs b/crates/nvisy-server/src/handler/error/pg_document.rs index 0862561..9968d05 100644 --- a/crates/nvisy-server/src/handler/error/pg_document.rs +++ b/crates/nvisy-server/src/handler/error/pg_document.rs @@ -1,204 +1,95 @@ -//! Document-related constraint violation error handlers. +//! File-related constraint violation error handlers. -use nvisy_postgres::types::{ - DocumentAnnotationConstraints, DocumentChunkConstraints, DocumentCommentConstraints, - DocumentConstraints, DocumentFileConstraints, DocumentVersionConstraints, -}; +use nvisy_postgres::types::{FileAnnotationConstraints, FileChunkConstraints, FileConstraints}; use crate::handler::{Error, ErrorKind}; -impl From for Error<'static> { - fn from(c: DocumentConstraints) -> Self { +impl From for Error<'static> { + fn from(c: FileConstraints) -> Self { let error = match c { - DocumentConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("Document name must be between 1 and 255 characters long"), - DocumentConstraints::DescriptionLengthMax => ErrorKind::BadRequest - .with_message("Document description cannot exceed 2048 characters"), - DocumentConstraints::TagsCountMax => { - ErrorKind::BadRequest.with_message("Cannot have more than 32 tags") - } - DocumentConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("Document metadata size is invalid") - } - DocumentConstraints::UpdatedAfterCreated - | DocumentConstraints::DeletedAfterCreated - | DocumentConstraints::DeletedAfterUpdated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("document") - } -} - -impl From for Error<'static> { - fn from(c: DocumentFileConstraints) -> Self { - let error = - match c { - DocumentFileConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("File name must be between 1 and 255 characters long"), - DocumentFileConstraints::OriginalFilenameLength => ErrorKind::BadRequest - .with_message("Original filename must be between 1 and 255 characters long"), - DocumentFileConstraints::FileExtensionFormat => { - ErrorKind::BadRequest.with_message("Invalid file extension format") - } - DocumentFileConstraints::ProcessingPriorityRange => ErrorKind::BadRequest - .with_message("Processing priority must be between 1 and 10"), - DocumentFileConstraints::FileSizeMin => ErrorKind::BadRequest - .with_message("File size must be greater than or equal to 0"), - DocumentFileConstraints::StoragePathNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentFileConstraints::StorageBucketNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentFileConstraints::FileHashSha256Length => { - ErrorKind::InternalServerError.into_error() - } - DocumentFileConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("File metadata size is invalid") - } - DocumentFileConstraints::RetentionPeriod => ErrorKind::BadRequest - .with_message("File retention period must be between 1 hour and 5 years"), - DocumentFileConstraints::TagsCountMax => { - ErrorKind::BadRequest.with_message("Maximum number of tags exceeded") - } - DocumentFileConstraints::UpdatedAfterCreated - | DocumentFileConstraints::DeletedAfterCreated - | DocumentFileConstraints::DeletedAfterUpdated - | DocumentFileConstraints::AutoDeleteAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("document_file") - } -} - -impl From for Error<'static> { - fn from(c: DocumentVersionConstraints) -> Self { - let error = match c { - DocumentVersionConstraints::VersionNumberMin => { - ErrorKind::BadRequest.with_message("Version number must be at least 1") - } - DocumentVersionConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("Version name must be between 1 and 255 characters long"), - DocumentVersionConstraints::FileExtensionFormat => { + FileConstraints::DisplayNameLength => ErrorKind::BadRequest + .with_message("File name must be between 1 and 255 characters long"), + FileConstraints::OriginalFilenameLength => ErrorKind::BadRequest + .with_message("Original filename must be between 1 and 255 characters long"), + FileConstraints::FileExtensionFormat => { ErrorKind::BadRequest.with_message("Invalid file extension format") } - DocumentVersionConstraints::ProcessingCreditsMin => { - ErrorKind::InternalServerError.into_error() - } - DocumentVersionConstraints::ProcessingDurationMin => { - ErrorKind::InternalServerError.into_error() + FileConstraints::MimeTypeFormat => { + ErrorKind::BadRequest.with_message("Invalid MIME type format") } - DocumentVersionConstraints::ApiCallsMin => ErrorKind::InternalServerError.into_error(), - DocumentVersionConstraints::FileSizeMin => { + FileConstraints::FileSizeMin => { ErrorKind::BadRequest.with_message("File size must be greater than or equal to 0") } - DocumentVersionConstraints::StoragePathNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentVersionConstraints::StorageBucketNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentVersionConstraints::FileHashSha256Length => { - ErrorKind::InternalServerError.into_error() + FileConstraints::StoragePathNotEmpty => ErrorKind::InternalServerError.into_error(), + FileConstraints::StorageBucketNotEmpty => ErrorKind::InternalServerError.into_error(), + FileConstraints::FileHashSha256Length => ErrorKind::InternalServerError.into_error(), + FileConstraints::MetadataSize => { + ErrorKind::BadRequest.with_message("File metadata size is invalid") } - DocumentVersionConstraints::ResultsSize => { - ErrorKind::BadRequest.with_message("Processing results size is invalid") + FileConstraints::TagsCountMax => { + ErrorKind::BadRequest.with_message("Maximum number of tags exceeded") } - DocumentVersionConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("Version metadata size is invalid") - } - DocumentVersionConstraints::RetentionPeriod => ErrorKind::BadRequest - .with_message("Version retention period must be between 1 hour and 5 years"), - DocumentVersionConstraints::UpdatedAfterCreated - | DocumentVersionConstraints::DeletedAfterCreated - | DocumentVersionConstraints::DeletedAfterUpdated - | DocumentVersionConstraints::AutoDeleteAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("document_version") - } -} - -impl From for Error<'static> { - fn from(c: DocumentCommentConstraints) -> Self { - let error = match c { - DocumentCommentConstraints::ContentLength => ErrorKind::BadRequest - .with_message("Comment content must be between 1 and 10,000 characters"), - DocumentCommentConstraints::OneTarget => ErrorKind::BadRequest.with_message( - "Comment must be attached to exactly one target (document, file, or version)", - ), - DocumentCommentConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("Comment metadata size is invalid") - } - DocumentCommentConstraints::UpdatedAfterCreated - | DocumentCommentConstraints::DeletedAfterCreated - | DocumentCommentConstraints::DeletedAfterUpdated => { - ErrorKind::InternalServerError.into_error() + FileConstraints::VersionNumberMin => { + ErrorKind::BadRequest.with_message("Version number must be at least 1") } + FileConstraints::UpdatedAfterCreated + | FileConstraints::DeletedAfterCreated + | FileConstraints::DeletedAfterUpdated => ErrorKind::InternalServerError.into_error(), }; - error.with_resource("document_comment") + error.with_resource("file") } } -impl From for Error<'static> { - fn from(c: DocumentAnnotationConstraints) -> Self { +impl From for Error<'static> { + fn from(c: FileAnnotationConstraints) -> Self { let error = match c { - DocumentAnnotationConstraints::ContentLength => { + FileAnnotationConstraints::ContentLength => { ErrorKind::BadRequest.with_message("Annotation content length is invalid") } - DocumentAnnotationConstraints::TypeFormat => { - ErrorKind::BadRequest.with_message("Annotation type format is invalid") - } - DocumentAnnotationConstraints::MetadataSize => { + FileAnnotationConstraints::MetadataSize => { ErrorKind::BadRequest.with_message("Annotation metadata size is invalid") } - DocumentAnnotationConstraints::UpdatedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterUpdated => { + FileAnnotationConstraints::UpdatedAfterCreated + | FileAnnotationConstraints::DeletedAfterCreated + | FileAnnotationConstraints::DeletedAfterUpdated => { ErrorKind::InternalServerError.into_error() } }; - error.with_resource("document_annotation") + error.with_resource("file_annotation") } } -impl From for Error<'static> { - fn from(c: DocumentChunkConstraints) -> Self { +impl From for Error<'static> { + fn from(c: FileChunkConstraints) -> Self { let error = match c { - DocumentChunkConstraints::ChunkIndexMin => { + FileChunkConstraints::ChunkIndexMin => { ErrorKind::BadRequest.with_message("Chunk index must be at least 0") } - DocumentChunkConstraints::ContentSha256Length => { + FileChunkConstraints::ContentSha256Length => { ErrorKind::InternalServerError.into_error() } - DocumentChunkConstraints::ContentSizeMin => { + FileChunkConstraints::ContentSizeMin => { ErrorKind::BadRequest.with_message("Chunk content size must be at least 0") } - DocumentChunkConstraints::TokenCountMin => { + FileChunkConstraints::TokenCountMin => { ErrorKind::BadRequest.with_message("Token count must be at least 0") } - DocumentChunkConstraints::EmbeddingModelFormat => { + FileChunkConstraints::EmbeddingModelFormat => { ErrorKind::BadRequest.with_message("Invalid embedding model format") } - DocumentChunkConstraints::MetadataSize => { + FileChunkConstraints::MetadataSize => { ErrorKind::BadRequest.with_message("Chunk metadata size is invalid") } - DocumentChunkConstraints::UpdatedAfterCreated => { + FileChunkConstraints::UpdatedAfterCreated => { ErrorKind::InternalServerError.into_error() } - DocumentChunkConstraints::FileChunkUnique => { + FileChunkConstraints::FileChunkUnique => { ErrorKind::Conflict.with_message("Chunk with this index already exists for file") } }; - error.with_resource("document_chunk") + error.with_resource("file_chunk") } } diff --git a/crates/nvisy-server/src/handler/error/pg_error.rs b/crates/nvisy-server/src/handler/error/pg_error.rs index b3a92e6..f8aff47 100644 --- a/crates/nvisy-server/src/handler/error/pg_error.rs +++ b/crates/nvisy-server/src/handler/error/pg_error.rs @@ -27,13 +27,12 @@ impl From for Error<'static> { ConstraintViolation::WorkspaceActivityLog(c) => c.into(), ConstraintViolation::WorkspaceIntegration(c) => c.into(), ConstraintViolation::WorkspaceIntegrationRun(c) => c.into(), - ConstraintViolation::Document(c) => c.into(), - ConstraintViolation::DocumentChunk(c) => c.into(), - ConstraintViolation::DocumentComment(c) => c.into(), - ConstraintViolation::DocumentAnnotation(c) => c.into(), - ConstraintViolation::DocumentFile(c) => c.into(), - ConstraintViolation::DocumentVersion(c) => c.into(), ConstraintViolation::WorkspaceWebhook(c) => c.into(), + ConstraintViolation::File(c) => c.into(), + ConstraintViolation::FileAnnotation(c) => c.into(), + ConstraintViolation::FileChunk(c) => c.into(), + ConstraintViolation::Pipeline(c) => c.into(), + ConstraintViolation::PipelineRun(c) => c.into(), } } } diff --git a/crates/nvisy-server/src/handler/error/pg_pipeline.rs b/crates/nvisy-server/src/handler/error/pg_pipeline.rs new file mode 100644 index 0000000..b66c9a6 --- /dev/null +++ b/crates/nvisy-server/src/handler/error/pg_pipeline.rs @@ -0,0 +1,50 @@ +//! Pipeline-related constraint violation error handlers. + +use nvisy_postgres::types::{PipelineConstraints, PipelineRunConstraints}; + +use crate::handler::{Error, ErrorKind}; + +impl From for Error<'static> { + fn from(c: PipelineConstraints) -> Self { + let error = match c { + PipelineConstraints::NameLength => ErrorKind::BadRequest + .with_message("Pipeline name must be between 1 and 255 characters long"), + PipelineConstraints::DescriptionLength => ErrorKind::BadRequest + .with_message("Pipeline description must be at most 4096 characters long"), + PipelineConstraints::DefinitionSize => { + ErrorKind::BadRequest.with_message("Pipeline definition size exceeds maximum limit") + } + PipelineConstraints::MetadataSize => { + ErrorKind::BadRequest.with_message("Pipeline metadata size exceeds maximum limit") + } + PipelineConstraints::UpdatedAfterCreated | PipelineConstraints::DeletedAfterCreated => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("pipeline") + } +} + +impl From for Error<'static> { + fn from(c: PipelineRunConstraints) -> Self { + let error = match c { + PipelineRunConstraints::InputConfigSize => ErrorKind::BadRequest + .with_message("Pipeline run input configuration size exceeds maximum limit"), + PipelineRunConstraints::OutputConfigSize => ErrorKind::BadRequest + .with_message("Pipeline run output configuration size exceeds maximum limit"), + PipelineRunConstraints::DefinitionSnapshotSize => ErrorKind::BadRequest + .with_message("Pipeline run definition snapshot size exceeds maximum limit"), + PipelineRunConstraints::ErrorSize => ErrorKind::BadRequest + .with_message("Pipeline run error details size exceeds maximum limit"), + PipelineRunConstraints::MetricsSize => ErrorKind::BadRequest + .with_message("Pipeline run metrics size exceeds maximum limit"), + PipelineRunConstraints::StartedAfterCreated + | PipelineRunConstraints::CompletedAfterStarted => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("pipeline_run") + } +} diff --git a/crates/nvisy-server/src/handler/files.rs b/crates/nvisy-server/src/handler/files.rs index b56239e..7593047 100644 --- a/crates/nvisy-server/src/handler/files.rs +++ b/crates/nvisy-server/src/handler/files.rs @@ -14,38 +14,37 @@ use axum::extract::{DefaultBodyLimit, State}; use axum::http::{HeaderMap, StatusCode}; use futures::StreamExt; use nvisy_nats::NatsClient; -use nvisy_nats::object::{DocumentKey, DocumentStore, Files as FilesBucket}; -use nvisy_nats::stream::{DocumentJobPublisher, PreprocessingData}; +use nvisy_nats::object::{FileKey, FilesBucket, ObjectStore}; +use nvisy_nats::stream::{EventPublisher, FileJob, FileStream}; use nvisy_postgres::PgClient; -use nvisy_postgres::model::{DocumentFile, NewDocumentFile}; -use nvisy_postgres::query::DocumentFileRepository; -use nvisy_postgres::types::ProcessingStatus; +use nvisy_postgres::model::{File as FileModel, NewFile}; +use nvisy_postgres::query::FileRepository; use uuid::Uuid; use crate::extract::{ AuthProvider, AuthState, Json, Multipart, Path, Permission, Query, ValidateJson, }; use crate::handler::request::{ - CursorPagination, DeleteFiles, DownloadFiles, FilePathParams, ListFiles, UpdateFile, - WorkspacePathParams, + CursorPagination, FilePathParams, ListFiles, UpdateFile, WorkspacePathParams, }; use crate::handler::response::{self, ErrorResponse, File, Files, FilesPage}; use crate::handler::{ErrorKind, Result}; use crate::middleware::DEFAULT_MAX_FILE_BODY_SIZE; -use crate::service::{ArchiveFormat, ArchiveService, ServiceState}; +use crate::service::{ServiceState, WebhookEmitter}; /// Tracing target for workspace file operations. const TRACING_TARGET: &str = "nvisy_server::handler::workspace_files"; +/// Type alias for file job publisher. +type FileJobPublisher = EventPublisher, FileStream>; + /// Finds a file by ID or returns NotFound error. -async fn find_file(conn: &mut nvisy_postgres::PgConn, file_id: Uuid) -> Result { - conn.find_document_file_by_id(file_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("File not found") - .with_resource("file") - }) +async fn find_file(conn: &mut nvisy_postgres::PgConn, file_id: Uuid) -> Result { + conn.find_file_by_id(file_id).await?.ok_or_else(|| { + ErrorKind::NotFound + .with_message("File not found") + .with_resource("file") + }) } /// Lists files in a workspace with cursor-based pagination. @@ -106,8 +105,8 @@ fn list_files_docs(op: TransformOperation) -> TransformOperation { struct FileUploadContext { workspace_id: Uuid, account_id: Uuid, - document_store: DocumentStore, - publisher: DocumentJobPublisher, + file_store: ObjectStore, + publisher: FileJobPublisher, } /// Processes a single file from a multipart upload using streaming. @@ -115,7 +114,7 @@ async fn process_single_file( conn: &mut nvisy_postgres::PgConn, ctx: &FileUploadContext, field: axum::extract::multipart::Field<'_>, -) -> Result { +) -> Result { let filename = field .file_name() .map(ToString::to_string) @@ -127,12 +126,12 @@ async fn process_single_file( .unwrap_or("bin") .to_lowercase(); - // Generate document key with unique object ID for NATS storage - let document_key = DocumentKey::generate(ctx.workspace_id); + // Generate file key with unique object ID for NATS storage + let file_key = FileKey::generate(ctx.workspace_id); tracing::debug!( target: TRACING_TARGET, - object_id = %document_key.object_id(), + object_id = %file_key.object_id, "Streaming file to storage" ); @@ -141,18 +140,18 @@ async fn process_single_file( field.map(|result| result.map_err(std::io::Error::other)), ); - let put_result = ctx.document_store.put(&document_key, reader).await?; + let put_result = ctx.file_store.put(&file_key, reader).await?; tracing::debug!( target: TRACING_TARGET, - object_id = %document_key.object_id(), + object_id = %file_key.object_id, size = put_result.size(), sha256 = %put_result.sha256_hex(), "File streamed to storage" ); // Step 2: Create DB record with all storage info (Postgres generates its own id) - let file_record = NewDocumentFile { + let file_record = NewFile { workspace_id: ctx.workspace_id, account_id: ctx.account_id, display_name: Some(filename.clone()), @@ -160,28 +159,22 @@ async fn process_single_file( file_extension: Some(file_extension.clone()), file_size_bytes: put_result.size() as i64, file_hash_sha256: put_result.sha256().to_vec(), - storage_path: document_key.to_string(), - storage_bucket: ctx.document_store.bucket().to_owned(), - processing_status: Some(ProcessingStatus::Pending), + storage_path: file_key.to_string(), + storage_bucket: ctx.file_store.bucket().to_owned(), ..Default::default() }; - let created_file = conn.create_document_file(file_record).await?; + let created_file = conn.create_file(file_record).await?; // Step 3: Publish job to queue (use Postgres-generated file ID) - let job = nvisy_nats::stream::DocumentJob::new( - created_file.id, - document_key.to_string(), - file_extension, - PreprocessingData::default(), - ); + let job = FileJob::new(created_file.id, file_key.to_string(), file_extension, ()); - ctx.publisher.publish_job(&job).await.map_err(|err| { + ctx.publisher.publish(&job).await.map_err(|err| { tracing::error!( target: TRACING_TARGET, error = %err, file_id = %created_file.id, - "Failed to publish document job" + "Failed to publish file job" ); ErrorKind::InternalServerError.with_message("Failed to queue file for processing") })?; @@ -190,7 +183,7 @@ async fn process_single_file( target: TRACING_TARGET, file_id = %created_file.id, job_id = %job.id, - "Document job published" + "File job published" ); Ok(created_file) @@ -207,6 +200,7 @@ async fn process_single_file( async fn upload_file( State(pg_client): State, State(nats_client): State, + State(webhook_emitter): State, Path(path_params): Path, AuthState(auth_claims): AuthState, Multipart(mut multipart): Multipart, @@ -219,16 +213,14 @@ async fn upload_file( .authorize_workspace(&mut conn, path_params.workspace_id, Permission::UploadFiles) .await?; - let document_store = nats_client.document_store::().await?; + let file_store = nats_client.object_store::().await?; - let publisher = nats_client - .document_job_publisher::() - .await?; + let publisher: FileJobPublisher = nats_client.event_publisher().await?; let ctx = FileUploadContext { workspace_id: path_params.workspace_id, account_id: auth_claims.account_id, - document_store, + file_store, publisher, }; @@ -248,6 +240,30 @@ async fn upload_file( return Err(ErrorKind::BadRequest.with_message("No files provided in multipart request")); } + // Emit webhook events for created files (fire-and-forget) + for file in &uploaded_files { + let data = serde_json::json!({ + "displayName": file.display_name, + "fileSizeBytes": file.file_size, + }); + if let Err(err) = webhook_emitter + .emit_file_created( + path_params.workspace_id, + file.id, + Some(auth_claims.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + file_id = %file.id, + "Failed to emit file:created webhook event" + ); + } + } + tracing::info!( target: TRACING_TARGET, file_count = uploaded_files.len(), @@ -313,6 +329,7 @@ fn read_file_docs(op: TransformOperation) -> TransformOperation { )] async fn update_file( State(pg_client): State, + State(webhook_emitter): State, Path(path_params): Path, AuthState(auth_claims): AuthState, ValidateJson(request): ValidateJson, @@ -331,13 +348,34 @@ async fn update_file( let updates = request.into_model(); let updated_file = conn - .update_document_file(path_params.file_id, updates) + .update_file(path_params.file_id, updates) .await .map_err(|err| { tracing::error!(target: TRACING_TARGET, error = %err, "Failed to update file"); ErrorKind::InternalServerError.with_message("Failed to update file") })?; + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "displayName": updated_file.display_name, + }); + if let Err(err) = webhook_emitter + .emit_file_updated( + file.workspace_id, + path_params.file_id, + Some(auth_claims.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + file_id = %path_params.file_id, + "Failed to emit file:updated webhook event" + ); + } + tracing::info!(target: TRACING_TARGET, "File updated"); Ok(( @@ -381,19 +419,19 @@ async fn download_file( .authorize_workspace(&mut conn, file.workspace_id, Permission::DownloadFiles) .await?; - let document_store = nats_client - .document_store::() + let file_store = nats_client + .object_store::() .await .map_err(|err| { tracing::error!( target: TRACING_TARGET, error = %err, - "Failed to create document store" + "Failed to create file store" ); ErrorKind::InternalServerError.with_message("Failed to initialize file storage") })?; - let document_key = DocumentKey::from_str(&file.storage_path).map_err(|err| { + let file_key = FileKey::from_str(&file.storage_path).map_err(|err| { tracing::error!( target: TRACING_TARGET, error = %err, @@ -405,9 +443,9 @@ async fn download_file( .with_context(format!("Parse error: {}", err)) })?; - // Get streaming content from NATS document store - let get_result = document_store - .get(&document_key) + // Get streaming content from NATS file store + let get_result = file_store + .get(&file_key) .await .map_err(|err| { tracing::error!( @@ -477,6 +515,7 @@ fn download_file_docs(op: TransformOperation) -> TransformOperation { )] async fn delete_file( State(pg_client): State, + State(webhook_emitter): State, Path(path_params): Path, AuthState(auth_claims): AuthState, ) -> Result { @@ -491,213 +530,42 @@ async fn delete_file( .authorize_workspace(&mut conn, file.workspace_id, Permission::DeleteFiles) .await?; - conn.delete_document_file(path_params.file_id) - .await - .map_err(|err| { - tracing::error!(target: TRACING_TARGET, error = %err, "Failed to soft delete file"); - ErrorKind::InternalServerError - .with_message("Failed to delete file") - .with_context(format!("Database error: {}", err)) - })?; - - tracing::info!(target: TRACING_TARGET, "File deleted"); - Ok(StatusCode::NO_CONTENT) -} - -fn delete_file_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete file") - .description("Soft deletes a file by setting a deleted timestamp. The file can be recovered within the retention period.") - .response::<204, ()>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Deletes multiple files (soft delete). -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn delete_multiple_files( - State(pg_client): State, - Path(path_params): Path, - AuthState(auth_claims): AuthState, - ValidateJson(request): ValidateJson, -) -> Result { - tracing::info!(target: TRACING_TARGET, file_count = request.file_ids.len(), "Deleting multiple files"); - - let mut conn = pg_client.get_connection().await?; - - auth_claims - .authorize_workspace(&mut conn, path_params.workspace_id, Permission::DeleteFiles) - .await?; - - // Soft delete all files in a single query - let deleted_count = conn - .delete_document_files(path_params.workspace_id, &request.file_ids) - .await?; + conn.delete_file(path_params.file_id).await.map_err(|err| { + tracing::error!(target: TRACING_TARGET, error = %err, "Failed to soft delete file"); + ErrorKind::InternalServerError + .with_message("Failed to delete file") + .with_context(format!("Database error: {}", err)) + })?; - // Check if all requested files were deleted - if deleted_count != request.file_ids.len() { + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "displayName": file.display_name, + }); + if let Err(err) = webhook_emitter + .emit_file_deleted( + file.workspace_id, + path_params.file_id, + Some(auth_claims.account_id), + Some(data), + ) + .await + { tracing::warn!( target: TRACING_TARGET, - requested = request.file_ids.len(), - deleted = deleted_count, - "Some files were not found or already deleted" + error = %err, + file_id = %path_params.file_id, + "Failed to emit file:deleted webhook event" ); - return Err(ErrorKind::NotFound - .with_message("One or more files not found") - .with_resource("file")); } - tracing::info!(target: TRACING_TARGET, file_count = deleted_count, "Files deleted"); - + tracing::info!(target: TRACING_TARGET, "File deleted"); Ok(StatusCode::NO_CONTENT) } -fn delete_multiple_files_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete multiple files") - .description("Soft deletes multiple files by setting deleted timestamps. Files can be recovered within the retention period.") +fn delete_file_docs(op: TransformOperation) -> TransformOperation { + op.summary("Delete file") + .description("Soft deletes a file by setting a deleted timestamp. The file can be recovered within the retention period.") .response::<204, ()>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Downloads all or specific workspace files as an archive. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn download_archived_files( - State(pg_client): State, - State(nats_client): State, - State(archive): State, - Path(path_params): Path, - AuthState(auth_claims): AuthState, - Json(request): Json, -) -> Result<(StatusCode, HeaderMap, Vec)> { - tracing::debug!(target: TRACING_TARGET, "Downloading archived files"); - - let mut conn = pg_client.get_connection().await?; - - auth_claims - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::DownloadFiles, - ) - .await?; - - let document_store = nats_client - .document_store::() - .await - .map_err(|err| { - tracing::error!( - target: TRACING_TARGET, - error = %err, - "Failed to create document store" - ); - ErrorKind::InternalServerError.with_message("Failed to initialize file storage") - })?; - - // Determine which files to download - let files = if let Some(specific_ids) = request.file_ids { - // Batch fetch specific files - conn.find_document_files_by_ids(&specific_ids).await? - } else { - // Get all workspace files using the workspace-scoped query - conn.cursor_list_workspace_files( - path_params.workspace_id, - Default::default(), - Default::default(), - ) - .await? - .items - }; - - // Filter to only files belonging to this workspace and not deleted - let valid_files: Vec<_> = files - .into_iter() - .filter(|f| f.workspace_id == path_params.workspace_id && f.deleted_at.is_none()) - .collect(); - - if valid_files.is_empty() { - return Err(ErrorKind::NotFound.with_message("No files found for archive")); - } - - // Fetch all file contents - let mut files_data = Vec::new(); - - for file in &valid_files { - let document_key = DocumentKey::from_str(&file.storage_path).map_err(|err| { - ErrorKind::InternalServerError - .with_message("Invalid file storage path") - .with_context(format!("Parse error: {}", err)) - })?; - - if let Ok(Some(mut get_result)) = document_store.get(&document_key).await { - let mut buffer = Vec::with_capacity(get_result.size()); - if tokio::io::AsyncReadExt::read_to_end(get_result.reader(), &mut buffer) - .await - .is_ok() - { - files_data.push((file.display_name.clone(), buffer)); - } - } - } - - if files_data.is_empty() { - return Err(ErrorKind::NotFound.with_message("No files found for archive")); - } - - // Create archive - let archive_bytes = archive.create_archive(files_data, request.format).await?; - - // Determine content type and file extension based on format - let (content_type, extension) = match request.format { - ArchiveFormat::Tar => ("application/x-tar", "tar.gz"), - ArchiveFormat::Zip => ("application/zip", "zip"), - }; - - // Set up response headers - let mut headers = HeaderMap::new(); - headers.insert( - "content-disposition", - format!( - "attachment; filename=\"workspace_{}_archive.{}\"", - path_params.workspace_id, extension - ) - .parse() - .unwrap(), - ); - headers.insert("content-type", content_type.parse().unwrap()); - headers.insert( - "content-length", - archive_bytes.len().to_string().parse().unwrap(), - ); - - tracing::debug!( - target: TRACING_TARGET, - file_count = valid_files.len(), - "Workspace files downloaded as archive", - ); - - Ok((StatusCode::OK, headers, archive_bytes)) -} - -fn download_archived_files_docs(op: TransformOperation) -> TransformOperation { - op.summary("Download archived files") - .description("Downloads all or specific workspace files as a compressed archive. Supports zip and tar.gz formats.") - .response::<200, ()>() - .response::<400, Json>() .response::<401, Json>() .response::<403, Json>() .response::<404, Json>() @@ -717,11 +585,6 @@ pub fn routes() -> ApiRouter { .layer(DefaultBodyLimit::max(DEFAULT_MAX_FILE_BODY_SIZE)) .get_with(list_files, list_files_docs), ) - .api_route( - "/workspaces/{workspaceId}/files/batch", - get_with(download_archived_files, download_archived_files_docs) - .delete_with(delete_multiple_files, delete_multiple_files_docs), - ) // File-specific routes (file ID is globally unique) .api_route( "/files/{fileId}", diff --git a/crates/nvisy-server/src/handler/members.rs b/crates/nvisy-server/src/handler/members.rs index ef3426b..77b117f 100644 --- a/crates/nvisy-server/src/handler/members.rs +++ b/crates/nvisy-server/src/handler/members.rs @@ -19,7 +19,7 @@ use crate::handler::request::{ }; use crate::handler::response::{ErrorResponse, Member, MembersPage, Page}; use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; +use crate::service::{ServiceState, WebhookEmitter}; /// Tracing target for workspace member operations. const TRACING_TARGET: &str = "nvisy_server::handler::members"; @@ -150,6 +150,7 @@ fn get_member_docs(op: TransformOperation) -> TransformOperation { )] async fn delete_member( State(pg_client): State, + State(webhook_emitter): State, AuthState(auth_state): AuthState, Path(path_params): Path, ) -> Result { @@ -188,6 +189,27 @@ async fn delete_member( conn.remove_workspace_member(path_params.workspace_id, path_params.account_id) .await?; + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "removedAccountId": path_params.account_id, + "removedBy": auth_state.account_id, + }); + if let Err(err) = webhook_emitter + .emit_member_deleted( + path_params.workspace_id, + path_params.account_id, // Use account_id as resource_id + Some(auth_state.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + "Failed to emit member:deleted webhook event" + ); + } + tracing::warn!(target: TRACING_TARGET, "Workspace member removed"); Ok(StatusCode::OK) @@ -221,6 +243,7 @@ fn delete_member_docs(op: TransformOperation) -> TransformOperation { )] async fn update_member( State(pg_client): State, + State(webhook_emitter): State, AuthState(auth_state): AuthState, Path(path_params): Path, ValidateJson(request): ValidateJson, @@ -254,6 +277,7 @@ async fn update_member( .with_context("Owners can only leave the workspace themselves")); } + let new_role = request.role; conn.update_workspace_member( path_params.workspace_id, path_params.account_id, @@ -268,6 +292,28 @@ async fn update_member( return Err(ErrorKind::NotFound.with_resource("workspace_member")); }; + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "accountId": path_params.account_id, + "previousRole": current_member.member_role.to_string(), + "newRole": new_role.to_string(), + }); + if let Err(err) = webhook_emitter + .emit_member_updated( + path_params.workspace_id, + path_params.account_id, // Use account_id as resource_id + Some(auth_state.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + "Failed to emit member:updated webhook event" + ); + } + tracing::info!( target: TRACING_TARGET, new_role = ?updated_member.member_role, diff --git a/crates/nvisy-server/src/handler/mod.rs b/crates/nvisy-server/src/handler/mod.rs index 0a3e933..c2f9460 100644 --- a/crates/nvisy-server/src/handler/mod.rs +++ b/crates/nvisy-server/src/handler/mod.rs @@ -6,21 +6,19 @@ mod accounts; mod annotations; mod authentication; -mod comments; -mod documents; mod error; mod files; mod integrations; mod invites; mod members; mod monitors; +mod pipelines; pub mod request; pub mod response; mod runs; mod tokens; mod utility; mod webhooks; -mod websocket; mod workspaces; use aide::axum::ApiRouter; @@ -51,11 +49,9 @@ fn private_routes( .merge(invites::routes()) .merge(members::routes()) .merge(webhooks::routes()) - .merge(websocket::routes()) .merge(files::routes()) - .merge(documents::routes()) - .merge(comments::routes()) - .merge(annotations::routes()); + .merge(annotations::routes()) + .merge(pipelines::routes()); if let Some(additional) = additional_routes { router = router.merge(additional); diff --git a/crates/nvisy-server/src/handler/pipelines.rs b/crates/nvisy-server/src/handler/pipelines.rs new file mode 100644 index 0000000..028c5ba --- /dev/null +++ b/crates/nvisy-server/src/handler/pipelines.rs @@ -0,0 +1,309 @@ +//! Pipeline management handlers for CRUD operations. +//! +//! This module provides comprehensive pipeline management functionality including +//! creating, reading, updating, deleting pipelines, and listing pipelines within +//! a workspace. All operations are secured with role-based access control. + +use aide::axum::ApiRouter; +use aide::transform::TransformOperation; +use axum::extract::State; +use axum::http::StatusCode; +use nvisy_postgres::PgClient; +use nvisy_postgres::query::PipelineRepository; + +use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; +use crate::handler::request::{ + CreatePipeline, CursorPagination, PipelineFilter, PipelinePathParams, UpdatePipeline, + WorkspacePathParams, +}; +use crate::handler::response::{ErrorResponse, Page, Pipeline, PipelineSummary}; +use crate::handler::{ErrorKind, Result}; +use crate::service::ServiceState; + +/// Tracing target for pipeline operations. +const TRACING_TARGET: &str = "nvisy_server::handler::pipelines"; + +/// Creates a new pipeline within a workspace. +/// +/// The creator is automatically set as the owner of the pipeline. +/// Requires `UploadFiles` permission for the workspace. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + workspace_id = %path_params.workspace_id, + ) +)] +async fn create_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + ValidateJson(request): ValidateJson, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Creating pipeline"); + + let mut conn = pg_client.get_connection().await?; + + auth_state + .authorize_workspace( + &mut conn, + path_params.workspace_id, + Permission::CreatePipelines, + ) + .await?; + + let new_pipeline = request.into_model(path_params.workspace_id, auth_state.account_id); + let pipeline = conn.create_pipeline(new_pipeline).await?; + + let response = Pipeline::from_model(pipeline); + + tracing::info!( + target: TRACING_TARGET, + pipeline_id = %response.pipeline_id, + "Pipeline created", + ); + + Ok((StatusCode::CREATED, Json(response))) +} + +fn create_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Create pipeline") + .description("Creates a new pipeline in the workspace. The creator is set as the owner.") + .response::<201, Json>() + .response::<400, Json>() + .response::<401, Json>() + .response::<403, Json>() +} + +/// Lists all pipelines in a workspace with optional filtering. +/// +/// Supports filtering by status and searching by name. +/// Requires `ViewFiles` permission for the workspace. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + workspace_id = %path_params.workspace_id, + ) +)] +async fn list_pipelines( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + Query(pagination): Query, + Query(filter): Query, +) -> Result<(StatusCode, Json>)> { + tracing::debug!(target: TRACING_TARGET, "Listing pipelines"); + + let mut conn = pg_client.get_connection().await?; + + auth_state + .authorize_workspace( + &mut conn, + path_params.workspace_id, + Permission::ViewPipelines, + ) + .await?; + + let page = conn + .cursor_list_workspace_pipelines( + path_params.workspace_id, + pagination.into(), + filter.status, + filter.search.as_deref(), + ) + .await?; + + let response = Page::from_cursor_page(page, PipelineSummary::from_model); + + tracing::debug!( + target: TRACING_TARGET, + pipeline_count = response.items.len(), + "Pipelines listed", + ); + + Ok((StatusCode::OK, Json(response))) +} + +fn list_pipelines_docs(op: TransformOperation) -> TransformOperation { + op.summary("List pipelines") + .description("Returns all pipelines in the workspace with optional filtering by status and name search.") + .response::<200, Json>>() + .response::<401, Json>() + .response::<403, Json>() +} + +/// Retrieves a pipeline by ID. +/// +/// The workspace is derived from the pipeline record for authorization. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + pipeline_id = %path_params.pipeline_id, + ) +)] +async fn get_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Getting pipeline"); + + let mut conn = pg_client.get_connection().await?; + + let Some(pipeline) = conn.find_pipeline_by_id(path_params.pipeline_id).await? else { + return Err(ErrorKind::NotFound + .with_message("Pipeline not found") + .with_resource("pipeline")); + }; + + auth_state + .authorize_workspace(&mut conn, pipeline.workspace_id, Permission::ViewPipelines) + .await?; + + let response = Pipeline::from_model(pipeline); + + tracing::info!(target: TRACING_TARGET, "Pipeline retrieved"); + + Ok((StatusCode::OK, Json(response))) +} + +fn get_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Get pipeline") + .description("Returns a pipeline by its unique identifier.") + .response::<200, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Updates an existing pipeline. +/// +/// Only the pipeline owner or users with `UpdateFiles` permission can update. +/// Only provided fields are updated. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + pipeline_id = %path_params.pipeline_id, + ) +)] +async fn update_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + ValidateJson(request): ValidateJson, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Updating pipeline"); + + let mut conn = pg_client.get_connection().await?; + + let Some(existing) = conn.find_pipeline_by_id(path_params.pipeline_id).await? else { + return Err(ErrorKind::NotFound + .with_message("Pipeline not found") + .with_resource("pipeline")); + }; + + auth_state + .authorize_workspace( + &mut conn, + existing.workspace_id, + Permission::UpdatePipelines, + ) + .await?; + + let update_data = request.into_model(); + let pipeline = conn + .update_pipeline(path_params.pipeline_id, update_data) + .await?; + + let response = Pipeline::from_model(pipeline); + + tracing::info!(target: TRACING_TARGET, "Pipeline updated"); + + Ok((StatusCode::OK, Json(response))) +} + +fn update_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Update pipeline") + .description("Updates an existing pipeline. Only provided fields are updated.") + .response::<200, Json>() + .response::<400, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Soft-deletes a pipeline. +/// +/// Requires `DeleteFiles` permission. The pipeline is marked as deleted +/// but data is retained for potential recovery. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + pipeline_id = %path_params.pipeline_id, + ) +)] +async fn delete_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, +) -> Result { + tracing::debug!(target: TRACING_TARGET, "Deleting pipeline"); + + let mut conn = pg_client.get_connection().await?; + + let Some(pipeline) = conn.find_pipeline_by_id(path_params.pipeline_id).await? else { + return Err(ErrorKind::NotFound + .with_message("Pipeline not found") + .with_resource("pipeline")); + }; + + auth_state + .authorize_workspace( + &mut conn, + pipeline.workspace_id, + Permission::DeletePipelines, + ) + .await?; + + conn.delete_pipeline(path_params.pipeline_id).await?; + + tracing::info!(target: TRACING_TARGET, "Pipeline deleted"); + + Ok(StatusCode::OK) +} + +fn delete_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Delete pipeline") + .description("Soft-deletes a pipeline. Data is retained for potential recovery.") + .response::<200, ()>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Returns a [`Router`] with all pipeline-related routes. +/// +/// [`Router`]: axum::routing::Router +pub fn routes() -> ApiRouter { + use aide::axum::routing::*; + + ApiRouter::new() + // Workspace-scoped routes for listing and creating + .api_route( + "/workspaces/{workspaceId}/pipelines/", + post_with(create_pipeline, create_pipeline_docs) + .get_with(list_pipelines, list_pipelines_docs), + ) + // Pipeline operations by ID + .api_route( + "/pipelines/{pipelineId}/", + get_with(get_pipeline, get_pipeline_docs) + .patch_with(update_pipeline, update_pipeline_docs) + .delete_with(delete_pipeline, delete_pipeline_docs), + ) + .with_path_items(|item| item.tag("Pipelines")) +} diff --git a/crates/nvisy-server/src/handler/request/annotations.rs b/crates/nvisy-server/src/handler/request/annotations.rs index b0270a8..cc735eb 100644 --- a/crates/nvisy-server/src/handler/request/annotations.rs +++ b/crates/nvisy-server/src/handler/request/annotations.rs @@ -1,6 +1,6 @@ //! Annotation request types. -use nvisy_postgres::model::{NewDocumentAnnotation, UpdateDocumentAnnotation}; +use nvisy_postgres::model::{NewFileAnnotation, UpdateFileAnnotation}; use nvisy_postgres::types::AnnotationType; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -24,9 +24,9 @@ pub struct CreateAnnotation { impl CreateAnnotation { /// Converts to database model. - pub fn into_model(self, file_id: Uuid, account_id: Uuid) -> NewDocumentAnnotation { - NewDocumentAnnotation { - document_file_id: file_id, + pub fn into_model(self, file_id: Uuid, account_id: Uuid) -> NewFileAnnotation { + NewFileAnnotation { + file_id, account_id, content: self.content, annotation_type: Some(self.annotation_type), @@ -52,11 +52,12 @@ pub struct UpdateAnnotation { } impl UpdateAnnotation { - pub fn into_model(self) -> UpdateDocumentAnnotation { - UpdateDocumentAnnotation { + pub fn into_model(self) -> UpdateFileAnnotation { + UpdateFileAnnotation { content: self.content, annotation_type: self.annotation_type, metadata: self.metadata, + deleted_at: None, } } } diff --git a/crates/nvisy-server/src/handler/request/comments.rs b/crates/nvisy-server/src/handler/request/comments.rs deleted file mode 100644 index f62d430..0000000 --- a/crates/nvisy-server/src/handler/request/comments.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! Document comment request types. - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; -use validator::Validate; - -/// Request payload for creating a new document comment. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct CreateComment { - /// Comment text content. - #[validate(length(min = 1, max = 10000))] - pub content: String, - /// Parent comment ID for threaded replies. - pub parent_comment_id: Option, - /// Account being replied to (@mention). - pub reply_to_account_id: Option, -} - -impl CreateComment { - /// Converts to database model. - pub fn into_model( - self, - account_id: Uuid, - file_id: Uuid, - ) -> nvisy_postgres::model::NewDocumentComment { - nvisy_postgres::model::NewDocumentComment { - file_id, - account_id, - parent_comment_id: self.parent_comment_id, - reply_to_account_id: self.reply_to_account_id, - content: self.content, - ..Default::default() - } - } -} - -/// Request payload to update a document comment. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct UpdateComment { - /// Updated comment content. - #[validate(length(min = 1, max = 10000))] - pub content: Option, -} - -impl UpdateComment { - pub fn into_model(self) -> nvisy_postgres::model::UpdateDocumentComment { - nvisy_postgres::model::UpdateDocumentComment { - content: self.content, - ..Default::default() - } - } -} diff --git a/crates/nvisy-server/src/handler/request/documents.rs b/crates/nvisy-server/src/handler/request/documents.rs deleted file mode 100644 index 1eb9d93..0000000 --- a/crates/nvisy-server/src/handler/request/documents.rs +++ /dev/null @@ -1,67 +0,0 @@ -//! Document request types. - -use nvisy_postgres::model::{NewDocument, UpdateDocument as UpdateDocumentModel}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; -use validator::Validate; - -use super::validations::is_alphanumeric; - -/// Request payload for creating a new document. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct CreateDocument { - /// Display name of the document. - #[validate(length(min = 1, max = 255))] - pub display_name: String, - /// Description of the document. - #[validate(length(max = 200))] - pub description: Option, - /// Tags for document classification. - #[validate(length(max = 20))] - pub tags: Option>, -} - -impl CreateDocument { - /// Converts this request into a database model. - pub fn into_model(self, workspace_id: Uuid, account_id: Uuid) -> NewDocument { - NewDocument { - workspace_id, - account_id, - display_name: Some(self.display_name), - description: self.description, - tags: self.tags.map(|t| t.into_iter().map(Some).collect()), - ..Default::default() - } - } -} - -/// Request payload for updating a document. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct UpdateDocument { - /// Updated display name. - #[validate(length(min = 1, max = 255))] - pub display_name: Option, - /// Updated description. - #[validate(length(max = 2000))] - pub description: Option, - /// Updated tags (must be alphanumeric). - #[validate(length(min = 1, max = 20))] - #[validate(custom(function = "is_alphanumeric"))] - pub tags: Option>, -} - -impl UpdateDocument { - pub fn into_model(self) -> UpdateDocumentModel { - UpdateDocumentModel { - display_name: self.display_name, - description: self.description.map(Some), - tags: self.tags.map(|t| t.into_iter().map(Some).collect()), - ..Default::default() - } - } -} diff --git a/crates/nvisy-server/src/handler/request/files.rs b/crates/nvisy-server/src/handler/request/files.rs index e1e6947..5b4a75a 100644 --- a/crates/nvisy-server/src/handler/request/files.rs +++ b/crates/nvisy-server/src/handler/request/files.rs @@ -1,14 +1,11 @@ -//! Document file request types. +//! File request types. -use nvisy_postgres::model::UpdateDocumentFile; -use nvisy_postgres::types::{ContentSegmentation, FileFilter, FileFormat}; +use nvisy_postgres::model::UpdateFile as UpdateFileModel; +use nvisy_postgres::types::{FileFilter, FileFormat}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use uuid::Uuid; use validator::Validate; -use crate::service::ArchiveFormat; - /// Request to update file metadata. #[must_use] #[derive(Debug, Default, Serialize, Deserialize, Validate, JsonSchema)] @@ -17,63 +14,23 @@ pub struct UpdateFile { /// New display name for the file. #[validate(length(min = 1, max = 255))] pub display_name: Option, - /// New processing priority (1-10, higher = more priority). - #[validate(range(min = 1, max = 10))] - pub processing_priority: Option, - /// Document ID to assign the file to. - pub document_id: Option, - /// Knowledge extraction settings update. - #[serde(flatten)] - pub knowledge: Option, + /// Updated tags. + pub tags: Option>, + /// Updated metadata. + pub metadata: Option, } impl UpdateFile { - pub fn into_model(self) -> UpdateDocumentFile { - UpdateDocumentFile { + pub fn into_model(self) -> UpdateFileModel { + UpdateFileModel { display_name: self.display_name, - processing_priority: self.processing_priority, - document_id: self.document_id.map(Some), - is_indexed: self.knowledge.as_ref().and_then(|k| k.is_indexed), - content_segmentation: self.knowledge.as_ref().and_then(|k| k.content_segmentation), - visual_support: self.knowledge.as_ref().and_then(|k| k.visual_support), + tags: self.tags.map(|t| t.into_iter().map(Some).collect()), + metadata: self.metadata, ..Default::default() } } } -/// Request to update file knowledge extraction settings. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, Validate, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct UpdateFileKnowledge { - /// Whether the file is indexed for knowledge extraction. - pub is_indexed: Option, - /// Content segmentation strategy for knowledge extraction. - pub content_segmentation: Option, - /// Whether visual elements are supported for knowledge extraction. - pub visual_support: Option, -} - -/// Request to delete multiple files. -#[derive(Debug, Deserialize, Validate, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct DeleteFiles { - /// File IDs to delete (1-100 files). - #[validate(length(min = 1, max = 100))] - pub file_ids: Vec, -} - -/// Request to download files as an archive. -#[derive(Debug, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct DownloadFiles { - /// Archive format. - pub format: ArchiveFormat, - /// Optional specific file IDs (if None, downloads all workspace files). - #[serde(skip_serializing_if = "Option::is_none")] - pub file_ids: Option>, -} - /// Query parameters for listing files. #[must_use] #[derive(Debug, Default, Serialize, Deserialize, JsonSchema)] diff --git a/crates/nvisy-server/src/handler/request/mod.rs b/crates/nvisy-server/src/handler/request/mod.rs index d3ce496..972df00 100644 --- a/crates/nvisy-server/src/handler/request/mod.rs +++ b/crates/nvisy-server/src/handler/request/mod.rs @@ -3,8 +3,6 @@ mod accounts; mod annotations; mod authentications; -mod comments; -mod documents; mod files; mod integrations; mod invites; @@ -12,6 +10,7 @@ mod members; mod monitors; mod paginations; mod paths; +mod pipelines; mod tokens; mod validations; mod webhooks; @@ -20,8 +19,6 @@ mod workspaces; pub use accounts::*; pub use annotations::*; pub use authentications::*; -pub use comments::*; -pub use documents::*; pub use files::*; pub use integrations::*; pub use invites::*; @@ -29,6 +26,7 @@ pub use members::*; pub use monitors::*; pub use paginations::*; pub use paths::*; +pub use pipelines::*; pub use tokens::*; pub use validations::*; pub use webhooks::*; diff --git a/crates/nvisy-server/src/handler/request/paths.rs b/crates/nvisy-server/src/handler/request/paths.rs index 7295901..a7a9275 100644 --- a/crates/nvisy-server/src/handler/request/paths.rs +++ b/crates/nvisy-server/src/handler/request/paths.rs @@ -13,15 +13,6 @@ pub struct WorkspacePathParams { pub workspace_id: Uuid, } -/// Path parameters for document operations. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct DocumentPathParams { - /// Unique identifier of the document. - pub document_id: Uuid, -} - /// Path parameters for workspace member operations. #[must_use] #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -98,18 +89,6 @@ pub struct VersionPathParams { pub version_id: Uuid, } -/// Path parameters for comment operations (comment ID only). -/// -/// Since comment IDs are globally unique UUIDs, file/workspace context can be -/// derived from the comment record itself for authorization purposes. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct CommentPathParams { - /// Unique identifier of the comment. - pub comment_id: Uuid, -} - /// Path parameters for webhook operations (webhook ID only). /// /// Since webhook IDs are globally unique UUIDs, workspace context can be @@ -157,3 +136,27 @@ pub struct TokenPathParams { /// Unique identifier of the API token. pub token_id: Uuid, } + +/// Path parameters for account operations. +/// +/// Used when retrieving account information by ID. Access is granted +/// if the requester shares at least one workspace with the target account. +#[must_use] +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct AccountPathParams { + /// Unique identifier of the account. + pub account_id: Uuid, +} + +/// Path parameters for pipeline operations. +/// +/// Since pipeline IDs are globally unique UUIDs, workspace context can be +/// derived from the pipeline record itself for authorization purposes. +#[must_use] +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct PipelinePathParams { + /// Unique identifier of the pipeline. + pub pipeline_id: Uuid, +} diff --git a/crates/nvisy-server/src/handler/request/pipelines.rs b/crates/nvisy-server/src/handler/request/pipelines.rs new file mode 100644 index 0000000..cd86c01 --- /dev/null +++ b/crates/nvisy-server/src/handler/request/pipelines.rs @@ -0,0 +1,96 @@ +//! Pipeline request types. +//! +//! This module provides request DTOs for pipeline management operations including +//! creation, updates, and filtering. All request types support JSON serialization +//! and validation. + +use nvisy_postgres::model::{NewPipeline, UpdatePipeline as UpdatePipelineModel}; +use nvisy_postgres::types::PipelineStatus; +use nvisy_runtime::definition::Workflow; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; +use validator::Validate; + +/// Request payload for creating a new pipeline. +/// +/// Creates a new pipeline with the specified name and optional description. +/// The definition can be added later via update. +#[must_use] +#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct CreatePipeline { + /// Pipeline name (3-100 characters). + #[validate(length(min = 3, max = 100))] + pub name: String, + /// Optional description of the pipeline (max 500 characters). + #[validate(length(max = 500))] + pub description: Option, +} + +impl CreatePipeline { + /// Converts this request into a [`NewPipeline`] model for database insertion. + /// + /// # Arguments + /// + /// * `workspace_id` - The ID of the workspace this pipeline belongs to. + /// * `account_id` - The ID of the account creating the pipeline. + #[inline] + pub fn into_model(self, workspace_id: Uuid, account_id: Uuid) -> NewPipeline { + NewPipeline { + workspace_id, + account_id, + name: self.name, + description: self.description, + ..Default::default() + } + } +} + +/// Request payload to update an existing pipeline. +/// +/// All fields are optional; only provided fields will be updated. +/// The definition field accepts a strictly typed WorkflowDefinition. +#[must_use] +#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct UpdatePipeline { + /// New name for the pipeline (3-100 characters). + #[validate(length(min = 3, max = 100))] + pub name: Option, + /// New description for the pipeline (max 500 characters). + #[validate(length(max = 500))] + pub description: Option, + /// New status for the pipeline. + pub status: Option, + /// New definition for the pipeline (strictly typed workflow definition). + #[schemars(with = "Option")] + pub definition: Option, +} + +impl UpdatePipeline { + /// Converts this request into an [`UpdatePipelineModel`] for database update. + pub fn into_model(self) -> UpdatePipelineModel { + UpdatePipelineModel { + name: self.name, + description: self.description.map(Some), + status: self.status, + definition: self.definition.map(|d| { + serde_json::to_value(d).expect("WorkflowDefinition serialization should not fail") + }), + ..Default::default() + } + } +} + +/// Query parameters for filtering pipelines. +#[must_use] +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct PipelineFilter { + /// Filter by pipeline status. + pub status: Option, + /// Search by pipeline name (trigram similarity). + #[validate(length(max = 100))] + pub search: Option, +} diff --git a/crates/nvisy-server/src/handler/response/annotations.rs b/crates/nvisy-server/src/handler/response/annotations.rs index a77d0bd..f56077e 100644 --- a/crates/nvisy-server/src/handler/response/annotations.rs +++ b/crates/nvisy-server/src/handler/response/annotations.rs @@ -1,7 +1,7 @@ //! Document annotation response types. use jiff::Timestamp; -use nvisy_postgres::model::DocumentAnnotation; +use nvisy_postgres::model::FileAnnotation; use nvisy_postgres::types::AnnotationType; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -33,10 +33,10 @@ pub struct Annotation { pub type AnnotationsPage = Page; impl Annotation { - pub fn from_model(annotation: DocumentAnnotation) -> Self { + pub fn from_model(annotation: FileAnnotation) -> Self { Self { id: annotation.id, - file_id: annotation.document_file_id, + file_id: annotation.file_id, account_id: annotation.account_id, content: annotation.content, annotation_type: annotation.annotation_type, diff --git a/crates/nvisy-server/src/handler/response/comments.rs b/crates/nvisy-server/src/handler/response/comments.rs deleted file mode 100644 index cfc84ba..0000000 --- a/crates/nvisy-server/src/handler/response/comments.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! Document comment response types. - -use jiff::Timestamp; -use nvisy_postgres::model; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::Page; - -/// Represents a document comment. -#[must_use] -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct Comment { - /// ID of the comment. - pub comment_id: Uuid, - /// ID of the file this comment belongs to. - pub file_id: Uuid, - /// ID of the account that created the comment. - pub account_id: Uuid, - /// Parent comment ID for threaded replies. - pub parent_comment_id: Option, - /// Account being replied to (@mention). - pub reply_to_account_id: Option, - /// Comment text content. - pub content: Option, - /// Timestamp when the comment was created. - pub created_at: Timestamp, - /// Timestamp when the comment was last updated. - pub updated_at: Timestamp, -} - -/// Paginated list of comments. -pub type CommentsPage = Page; - -impl Comment { - pub fn from_model(comment: model::DocumentComment) -> Self { - Self { - comment_id: comment.id, - file_id: comment.file_id, - account_id: comment.account_id, - parent_comment_id: comment.parent_comment_id, - reply_to_account_id: comment.reply_to_account_id, - content: comment.get_content(), - created_at: comment.created_at.into(), - updated_at: comment.updated_at.into(), - } - } -} diff --git a/crates/nvisy-server/src/handler/response/documents.rs b/crates/nvisy-server/src/handler/response/documents.rs deleted file mode 100644 index 0973ad9..0000000 --- a/crates/nvisy-server/src/handler/response/documents.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! Document response types. - -use jiff::Timestamp; -use nvisy_postgres::model; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::Page; - -/// Represents a document with full details. -#[must_use] -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct Document { - /// ID of the document. - pub document_id: Uuid, - /// ID of the workspace that the document belongs to. - pub workspace_id: Uuid, - /// ID of the account that owns the document. - pub account_id: Uuid, - /// Display name of the document. - pub display_name: String, - /// Description of the document. - pub description: Option, - /// Tags associated with the document. - pub tags: Vec, - /// Timestamp when the document was created. - pub created_at: Timestamp, - /// Timestamp when the document was last updated. - pub updated_at: Timestamp, -} - -/// Paginated list of documents. -pub type DocumentsPage = Page; - -impl Document { - pub fn from_model(document: model::Document) -> Self { - Self { - tags: document.tags(), - document_id: document.id, - workspace_id: document.workspace_id, - account_id: document.account_id, - display_name: document.display_name, - description: document.description, - created_at: document.created_at.into(), - updated_at: document.updated_at.into(), - } - } -} diff --git a/crates/nvisy-server/src/handler/response/files.rs b/crates/nvisy-server/src/handler/response/files.rs index 94f0bcd..89e8587 100644 --- a/crates/nvisy-server/src/handler/response/files.rs +++ b/crates/nvisy-server/src/handler/response/files.rs @@ -1,47 +1,45 @@ -//! Document file response types. +//! File response types. use jiff::Timestamp; -use nvisy_postgres::model::DocumentFile; -use nvisy_postgres::types::{ContentSegmentation, ProcessingStatus}; +use nvisy_postgres::model::File as FileModel; +use nvisy_postgres::types::FileSource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::Page; -/// Knowledge-related fields for file responses. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct FileKnowledge { - /// Whether the file is indexed for knowledge extraction. - pub is_indexed: bool, - - /// Content segmentation strategy. - pub content_segmentation: ContentSegmentation, - - /// Whether visual elements are supported. - pub visual_support: bool, -} - -/// Represents an uploaded file. +/// Represents a file in responses. #[must_use] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct File { /// Unique file identifier. - pub file_id: Uuid, + pub id: Uuid, + /// Workspace this file belongs to. + pub workspace_id: Uuid, /// Display name. pub display_name: String, + /// Original filename when uploaded. + pub original_filename: String, + /// File extension (without dot). + pub file_extension: String, + /// MIME type. + #[serde(skip_serializing_if = "Option::is_none")] + pub mime_type: Option, /// File size in bytes. pub file_size: i64, - /// Processing status. - pub status: ProcessingStatus, - /// Processing priority (1-10). - pub processing_priority: i32, /// Classification tags. pub tags: Vec, - /// Knowledge extraction settings. - pub file_knowledge: FileKnowledge, + /// How the file was created (uploaded, imported, generated). + pub source: FileSource, + /// Account ID of the user who uploaded/created the file. + pub uploaded_by: Uuid, + /// Version number (1 for original, higher for newer versions). + pub version_number: i32, + /// Parent file ID if this is a newer version. + #[serde(skip_serializing_if = "Option::is_none")] + pub parent_id: Option, /// Creation timestamp. pub created_at: Timestamp, /// Last update timestamp. @@ -49,19 +47,20 @@ pub struct File { } impl File { - pub fn from_model(file: DocumentFile) -> Self { + pub fn from_model(file: FileModel) -> Self { Self { - file_id: file.id, + id: file.id, + workspace_id: file.workspace_id, display_name: file.display_name, + original_filename: file.original_filename, + file_extension: file.file_extension, + mime_type: file.mime_type, file_size: file.file_size_bytes, - status: file.processing_status, - processing_priority: file.processing_priority, tags: file.tags.into_iter().flatten().collect(), - file_knowledge: FileKnowledge { - is_indexed: file.is_indexed, - content_segmentation: file.content_segmentation, - visual_support: file.visual_support, - }, + source: file.source, + uploaded_by: file.account_id, + version_number: file.version_number, + parent_id: file.parent_id, created_at: file.created_at.into(), updated_at: file.updated_at.into(), } diff --git a/crates/nvisy-server/src/handler/response/mod.rs b/crates/nvisy-server/src/handler/response/mod.rs index e4d1d3a..377db11 100644 --- a/crates/nvisy-server/src/handler/response/mod.rs +++ b/crates/nvisy-server/src/handler/response/mod.rs @@ -8,8 +8,6 @@ mod accounts; mod activities; mod annotations; mod authentications; -mod comments; -mod documents; mod errors; mod files; mod integrations; @@ -17,6 +15,7 @@ mod invites; mod members; mod monitors; mod notifications; +mod pipelines; mod runs; mod tokens; mod webhooks; @@ -26,8 +25,6 @@ pub use accounts::*; pub use activities::*; pub use annotations::*; pub use authentications::*; -pub use comments::*; -pub use documents::*; pub use errors::*; pub use files::*; pub use integrations::*; @@ -35,6 +32,7 @@ pub use invites::*; pub use members::*; pub use monitors::*; pub use notifications::*; +pub use pipelines::*; pub use runs::*; pub use tokens::*; pub use webhooks::*; diff --git a/crates/nvisy-server/src/handler/response/pipelines.rs b/crates/nvisy-server/src/handler/response/pipelines.rs new file mode 100644 index 0000000..c60bd5a --- /dev/null +++ b/crates/nvisy-server/src/handler/response/pipelines.rs @@ -0,0 +1,94 @@ +//! Pipeline response types. + +use jiff::Timestamp; +use nvisy_postgres::model; +use nvisy_postgres::types::PipelineStatus; +use nvisy_runtime::definition::Workflow; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::Page; + +/// Pipeline response. +#[must_use] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Pipeline { + /// Unique pipeline identifier. + pub pipeline_id: Uuid, + /// Workspace this pipeline belongs to. + pub workspace_id: Uuid, + /// Account that created this pipeline. + pub account_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline lifecycle status. + pub status: PipelineStatus, + /// Pipeline definition (workflow graph). + #[schemars(with = "serde_json::Value")] + pub definition: Workflow, + /// Timestamp when the pipeline was created. + pub created_at: Timestamp, + /// Timestamp when the pipeline was last updated. + pub updated_at: Timestamp, +} + +impl Pipeline { + /// Creates a new instance of [`Pipeline`] from the database model. + pub fn from_model(pipeline: model::Pipeline) -> Self { + let definition: Workflow = serde_json::from_value(pipeline.definition).unwrap_or_default(); + Self { + pipeline_id: pipeline.id, + workspace_id: pipeline.workspace_id, + account_id: pipeline.account_id, + name: pipeline.name, + description: pipeline.description, + status: pipeline.status, + definition, + created_at: pipeline.created_at.into(), + updated_at: pipeline.updated_at.into(), + } + } +} + +/// Paginated list of pipelines. +pub type PipelinesPage = Page; + +/// Summary response for pipeline (used in lists). +#[must_use] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct PipelineSummary { + /// Unique pipeline identifier. + pub pipeline_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline lifecycle status. + pub status: PipelineStatus, + /// Timestamp when the pipeline was created. + pub created_at: Timestamp, + /// Timestamp when the pipeline was last updated. + pub updated_at: Timestamp, +} + +impl PipelineSummary { + /// Creates a new instance of [`PipelineSummary`] from the database model. + pub fn from_model(pipeline: model::Pipeline) -> Self { + Self { + pipeline_id: pipeline.id, + name: pipeline.name, + description: pipeline.description, + status: pipeline.status, + created_at: pipeline.created_at.into(), + updated_at: pipeline.updated_at.into(), + } + } +} + +/// Paginated list of pipeline summaries. +pub type PipelineSummariesPage = Page; diff --git a/crates/nvisy-server/src/handler/response/webhooks.rs b/crates/nvisy-server/src/handler/response/webhooks.rs index c27f724..be79265 100644 --- a/crates/nvisy-server/src/handler/response/webhooks.rs +++ b/crates/nvisy-server/src/handler/response/webhooks.rs @@ -71,6 +71,35 @@ impl Webhook { } } +/// Webhook creation response that includes the secret (visible only once). +/// +/// The secret is used for HMAC-SHA256 signature verification of webhook payloads. +/// It is only returned when the webhook is first created and cannot be retrieved +/// again. Store it securely. +#[must_use] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct WebhookCreated { + /// The created webhook details. + #[serde(flatten)] + pub webhook: Webhook, + /// HMAC-SHA256 signing secret for webhook verification. + /// + /// **Important**: This is the only time the secret will be shown. + /// Store it securely as it cannot be retrieved again. + pub secret: String, +} + +impl WebhookCreated { + pub fn from_model(webhook: model::WorkspaceWebhook) -> Self { + let secret = webhook.secret.clone(); + Self { + webhook: Webhook::from_model(webhook), + secret, + } + } +} + /// Paginated response for workspace webhooks. pub type WebhooksPage = Page; diff --git a/crates/nvisy-server/src/handler/webhooks.rs b/crates/nvisy-server/src/handler/webhooks.rs index 37da975..14576f8 100644 --- a/crates/nvisy-server/src/handler/webhooks.rs +++ b/crates/nvisy-server/src/handler/webhooks.rs @@ -19,7 +19,9 @@ use crate::handler::request::{ CreateWebhook, CursorPagination, TestWebhook, UpdateWebhook as UpdateWebhookRequest, WebhookPathParams, WorkspacePathParams, }; -use crate::handler::response::{ErrorResponse, Webhook, WebhookResult, WebhooksPage}; +use crate::handler::response::{ + ErrorResponse, Webhook, WebhookCreated, WebhookResult, WebhooksPage, +}; use crate::handler::{ErrorKind, Result}; use crate::service::ServiceState; @@ -41,7 +43,7 @@ async fn create_webhook( AuthState(auth_state): AuthState, Path(path_params): Path, ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { +) -> Result<(StatusCode, Json)> { tracing::debug!(target: TRACING_TARGET, "Creating workspace webhook"); let mut conn = pg_client.get_connection().await?; @@ -63,13 +65,21 @@ async fn create_webhook( "Webhook created", ); - Ok((StatusCode::CREATED, Json(Webhook::from_model(webhook)))) + // Return WebhookCreated which includes the secret (visible only once) + Ok(( + StatusCode::CREATED, + Json(WebhookCreated::from_model(webhook)), + )) } fn create_webhook_docs(op: TransformOperation) -> TransformOperation { op.summary("Create webhook") - .description("Creates a new webhook for the workspace.") - .response::<201, Json>() + .description( + "Creates a new webhook for the workspace. The response includes the signing secret \ + which is used for HMAC-SHA256 verification of webhook payloads. **Important**: The \ + secret is only shown once upon creation and cannot be retrieved again.", + ) + .response::<201, Json>() .response::<400, Json>() .response::<401, Json>() .response::<403, Json>() diff --git a/crates/nvisy-server/src/handler/websocket.rs b/crates/nvisy-server/src/handler/websocket.rs deleted file mode 100644 index 9e3e64c..0000000 --- a/crates/nvisy-server/src/handler/websocket.rs +++ /dev/null @@ -1,820 +0,0 @@ -//! WebSocket handler for real-time workspace communication via NATS. - -use std::ops::ControlFlow; -use std::sync::Arc; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::Duration; - -use aide::axum::ApiRouter; -use aide::transform::TransformOperation; -use axum::extract::State; -use axum::extract::ws::{Message, Utf8Bytes, WebSocket, WebSocketUpgrade}; -use axum::response::Response; -use futures::{SinkExt, StreamExt}; -use nvisy_nats::NatsClient; -use nvisy_nats::stream::{WorkspaceEventPublisher, WorkspaceWsMessage}; -use nvisy_postgres::PgClient; -use nvisy_postgres::query::{AccountRepository, WorkspaceRepository}; -use uuid::Uuid; - -use crate::extract::{AuthProvider, AuthState, Json, Path, Permission}; -use crate::handler::request::WorkspacePathParams; -use crate::handler::response::ErrorResponse; -use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; - -/// Tracing target for workspace websocket operations. -const TRACING_TARGET: &str = "nvisy_server::handler::workspace_websocket"; - -/// Maximum size of a WebSocket message in bytes (1 MB). -const MAX_MESSAGE_SIZE: usize = 1_024 * 1_024; - -/// Timeout for fetching messages from NATS stream. -const NATS_FETCH_TIMEOUT: Duration = Duration::from_millis(100); - -/// Maximum time to wait for graceful connection shutdown. -const GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5); - -/// Context for a WebSocket connection. -#[derive(Debug, Clone)] -struct WsContext { - /// Unique connection identifier for logging/debugging. - connection_id: Uuid, - /// The workspace this connection belongs to. - workspace_id: Uuid, - /// The authenticated account ID. - account_id: Uuid, -} - -impl WsContext { - /// Creates a new WebSocket connection context. - fn new(workspace_id: Uuid, account_id: Uuid) -> Self { - Self { - connection_id: Uuid::new_v4(), - workspace_id, - account_id, - } - } -} - -/// Metrics for a WebSocket connection. -#[derive(Debug, Default)] -struct ConnectionMetrics { - messages_sent: AtomicU64, - messages_received: AtomicU64, - messages_published: AtomicU64, - messages_dropped: AtomicU64, - errors: AtomicU64, -} - -impl ConnectionMetrics { - fn new() -> Arc { - Arc::new(Self::default()) - } - - fn increment_sent(&self) { - self.messages_sent.fetch_add(1, Ordering::Relaxed); - } - - fn increment_received(&self) { - self.messages_received.fetch_add(1, Ordering::Relaxed); - } - - fn increment_published(&self) { - self.messages_published.fetch_add(1, Ordering::Relaxed); - } - - fn increment_dropped(&self) { - self.messages_dropped.fetch_add(1, Ordering::Relaxed); - } - - fn increment_errors(&self) { - self.errors.fetch_add(1, Ordering::Relaxed); - } - - fn snapshot(&self) -> MetricsSnapshot { - MetricsSnapshot { - sent: self.messages_sent.load(Ordering::Relaxed), - received: self.messages_received.load(Ordering::Relaxed), - published: self.messages_published.load(Ordering::Relaxed), - dropped: self.messages_dropped.load(Ordering::Relaxed), - errors: self.errors.load(Ordering::Relaxed), - } - } -} - -#[derive(Debug)] -struct MetricsSnapshot { - sent: u64, - received: u64, - published: u64, - dropped: u64, - errors: u64, -} - -/// Validate message size to prevent DoS attacks. -fn validate_message_size(ctx: &WsContext, size: usize, metrics: &ConnectionMetrics) -> bool { - if size > MAX_MESSAGE_SIZE { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_size = size, - max_size = MAX_MESSAGE_SIZE, - "message exceeds maximum size, dropping" - ); - metrics.increment_dropped(); - false - } else { - true - } -} - -/// Check if the account has permission to perform the action in the message. -async fn check_event_permission( - conn: &mut nvisy_postgres::PgConn, - ctx: &WsContext, - msg: &WorkspaceWsMessage, -) -> Result<()> { - // Determine required permission based on message type - let permission = match msg { - // Read-only events - require ViewDocuments permission - WorkspaceWsMessage::Typing(_) | WorkspaceWsMessage::MemberPresence(_) => { - Permission::ViewDocuments - } - - // Document write events - require UpdateDocuments permission - WorkspaceWsMessage::DocumentUpdate(_) => Permission::UpdateDocuments, - WorkspaceWsMessage::DocumentCreated(_) => Permission::CreateDocuments, - WorkspaceWsMessage::DocumentDeleted(_) => Permission::DeleteDocuments, - - // File events - require appropriate file permissions - WorkspaceWsMessage::FilePreprocessed(_) - | WorkspaceWsMessage::FilePostprocessed(_) - | WorkspaceWsMessage::JobProgress(_) - | WorkspaceWsMessage::JobCompleted(_) - | WorkspaceWsMessage::JobFailed(_) => Permission::ViewFiles, - WorkspaceWsMessage::FileTransformed(_) => Permission::UpdateFiles, - - // Member management - require InviteMembers/RemoveMembers permission - WorkspaceWsMessage::MemberAdded(_) => Permission::InviteMembers, - WorkspaceWsMessage::MemberRemoved(_) => Permission::RemoveMembers, - - // Workspace settings - require UpdateWorkspace permission - WorkspaceWsMessage::WorkspaceUpdated(_) => Permission::UpdateWorkspace, - - // System events - always allowed (sent by server) - WorkspaceWsMessage::Join(_) - | WorkspaceWsMessage::Leave(_) - | WorkspaceWsMessage::Error(_) => { - return Ok(()); - } - }; - - // Fetch workspace membership directly - use nvisy_postgres::query::WorkspaceMemberRepository; - - let member = conn - .find_workspace_member(ctx.workspace_id, ctx.account_id) - .await?; - - // Check if member exists and has the required permission - match member { - Some(m) if permission.is_permitted_by_role(m.member_role) => Ok(()), - Some(m) => { - tracing::debug!( - target: TRACING_TARGET, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - required_permission = ?permission, - current_role = ?m.member_role, - "insufficient permissions for event" - ); - Err(ErrorKind::Forbidden.with_context(format!( - "Insufficient permissions: requires {:?}", - permission.minimum_required_role() - ))) - } - None => { - tracing::debug!( - target: TRACING_TARGET, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - "not a member of workspace" - ); - Err(ErrorKind::Forbidden.with_context("Not a workspace member")) - } - } -} - -/// Processes an incoming WebSocket message from the client. -async fn process_client_message( - ctx: &WsContext, - msg: Message, - publisher: &WorkspaceEventPublisher, - conn: &mut nvisy_postgres::PgConn, - metrics: &ConnectionMetrics, -) -> ControlFlow<(), ()> { - match msg { - Message::Text(text) => { - metrics.increment_received(); - - if !validate_message_size(ctx, text.len(), metrics) { - return ControlFlow::Continue(()); - } - - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_length = text.len(), - "received text message" - ); - - match serde_json::from_str::(&text) { - Ok(ws_msg) => { - handle_client_message(ctx, ws_msg, publisher, conn, metrics).await; - ControlFlow::Continue(()) - } - Err(e) => { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to parse message, dropping" - ); - metrics.increment_errors(); - metrics.increment_dropped(); - ControlFlow::Continue(()) - } - } - } - Message::Binary(data) => { - metrics.increment_received(); - - if !validate_message_size(ctx, data.len(), metrics) { - return ControlFlow::Continue(()); - } - - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - data_length = data.len(), - "received binary message (not supported), dropping" - ); - metrics.increment_dropped(); - ControlFlow::Continue(()) - } - Message::Close(close_frame) => { - if let Some(cf) = close_frame { - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - close_code = cf.code, - close_reason = %cf.reason, - "client sent close frame" - ); - } else { - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "client sent close frame" - ); - } - ControlFlow::Break(()) - } - Message::Ping(payload) => { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - payload_len = payload.len(), - "received ping" - ); - ControlFlow::Continue(()) - } - Message::Pong(payload) => { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - payload_len = payload.len(), - "received pong" - ); - ControlFlow::Continue(()) - } - } -} - -/// Handles parsed messages from the client with permission checking. -async fn handle_client_message( - ctx: &WsContext, - msg: WorkspaceWsMessage, - publisher: &WorkspaceEventPublisher, - conn: &mut nvisy_postgres::PgConn, - metrics: &ConnectionMetrics, -) { - // Check permissions for this event - if let Err(e) = check_event_permission(conn, ctx, &msg).await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %ctx.account_id, - message_type = ?std::mem::discriminant(&msg), - error = %e, - "permission denied for event, dropping" - ); - metrics.increment_dropped(); - metrics.increment_errors(); - return; - } - - match &msg { - WorkspaceWsMessage::Typing(_) => { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "publishing typing indicator" - ); - - // Publish with fresh timestamp - let msg_with_ts = WorkspaceWsMessage::typing(ctx.account_id, None); - - if let Err(e) = publisher - .publish_message(ctx.workspace_id, msg_with_ts) - .await - { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish typing indicator" - ); - metrics.increment_errors(); - } else { - metrics.increment_published(); - } - } - WorkspaceWsMessage::DocumentUpdate(_) - | WorkspaceWsMessage::DocumentCreated(_) - | WorkspaceWsMessage::DocumentDeleted(_) - | WorkspaceWsMessage::FilePreprocessed(_) - | WorkspaceWsMessage::FileTransformed(_) - | WorkspaceWsMessage::FilePostprocessed(_) - | WorkspaceWsMessage::JobProgress(_) - | WorkspaceWsMessage::JobCompleted(_) - | WorkspaceWsMessage::JobFailed(_) - | WorkspaceWsMessage::MemberPresence(_) - | WorkspaceWsMessage::MemberAdded(_) - | WorkspaceWsMessage::MemberRemoved(_) - | WorkspaceWsMessage::WorkspaceUpdated(_) => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_type = ?std::mem::discriminant(&msg), - "publishing event to NATS" - ); - - if let Err(e) = publisher.publish_message(ctx.workspace_id, msg).await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish event to NATS" - ); - metrics.increment_errors(); - } else { - metrics.increment_published(); - } - } - WorkspaceWsMessage::Join(_) - | WorkspaceWsMessage::Leave(_) - | WorkspaceWsMessage::Error(_) => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_type = ?std::mem::discriminant(&msg), - "ignoring system message from client" - ); - metrics.increment_dropped(); - } - } -} - -/// Handles the WebSocket connection lifecycle with NATS pub/sub. -/// -/// This function: -/// 1. Fetches account details and creates context -/// 2. Creates a unique NATS consumer for this WebSocket connection -/// 3. Publishes a join message to all clients -/// 4. Spawns separate tasks for sending and receiving -/// 5. Uses `tokio::select!` to handle whichever task completes first -/// 6. Publishes a leave message and cleans up -async fn handle_workspace_websocket( - socket: WebSocket, - workspace_id: Uuid, - account_id: Uuid, - nats_client: NatsClient, - pg_client: PgClient, -) { - let start_time = std::time::Instant::now(); - let ctx = WsContext::new(workspace_id, account_id); - let metrics = ConnectionMetrics::new(); - - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - "websocket connection established" - ); - - // Get a connection for initial queries - let mut conn = match pg_client.get_connection().await { - Ok(conn) => conn, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to acquire database connection" - ); - return; - } - }; - - // Fetch account display name - let display_name = match conn.find_account_by_id(account_id).await { - Ok(Some(account)) => account.display_name, - Ok(None) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %account_id, - "account not found, aborting connection" - ); - return; - } - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %account_id, - error = %e, - "failed to fetch account, aborting connection" - ); - return; - } - }; - - // Create publisher for this connection - let publisher = match nats_client.workspace_event_publisher().await { - Ok(p) => p, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to create event publisher, aborting connection" - ); - return; - } - }; - - // Create subscriber with unique consumer name for this connection - let consumer_name = format!("ws-{}", ctx.connection_id); - let subscriber = match nats_client - .workspace_event_subscriber_for_workspace(&consumer_name, workspace_id) - .await - { - Ok(s) => s, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to create event subscriber, aborting connection" - ); - return; - } - }; - - // Get message stream - let mut message_stream = match subscriber.subscribe().await { - Ok(stream) => stream, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to subscribe to event stream, aborting connection" - ); - return; - } - }; - - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - consumer_name = %consumer_name, - "NATS subscriber created" - ); - - // Split socket into sender and receiver - let (mut sender, mut receiver) = socket.split(); - - // Create and publish join message - let join_msg = WorkspaceWsMessage::join(ctx.account_id, display_name); - - if let Err(e) = publisher - .publish_message(ctx.workspace_id, join_msg.clone()) - .await - { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish join message" - ); - } else { - metrics.increment_published(); - } - - // Clone context and clients for the receive task - let recv_ctx = ctx.clone(); - let recv_publisher = publisher.clone(); - let recv_pg_client = pg_client.clone(); - let recv_metrics = metrics.clone(); - - // Spawn a task to receive messages from the client - let recv_task = tokio::spawn(async move { - // Get a dedicated connection for the receive task - let mut recv_conn = match recv_pg_client.get_connection().await { - Ok(conn) => conn, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %recv_ctx.connection_id, - error = %e, - "failed to acquire database connection for receive task" - ); - return; - } - }; - - while let Some(msg_result) = receiver.next().await { - match msg_result { - Ok(msg) => { - if process_client_message( - &recv_ctx, - msg, - &recv_publisher, - &mut recv_conn, - &recv_metrics, - ) - .await - .is_break() - { - break; - } - } - Err(e) => { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %recv_ctx.connection_id, - error = %e, - "error receiving from websocket" - ); - recv_metrics.increment_errors(); - break; - } - } - } - }); - - // Spawn a task to send messages from NATS to the client - let send_ctx = ctx.clone(); - let send_metrics = metrics.clone(); - let send_task = tokio::spawn(async move { - // Send initial join message to this client - if let Ok(text) = serde_json::to_string(&join_msg) { - if let Err(e) = sender.send(Message::Text(Utf8Bytes::from(text))).await { - tracing::error!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to send join message, aborting connection" - ); - return; - } - send_metrics.increment_sent(); - } - - // Listen for NATS messages and forward to this client - loop { - match message_stream.next_with_timeout(NATS_FETCH_TIMEOUT).await { - Ok(Some(mut nats_msg)) => { - let ws_message = &nats_msg.payload().message; - - // Echo prevention: don't send messages back to the sender - if let Some(sender_id) = ws_message.account_id() - && sender_id == send_ctx.account_id - { - if let Err(e) = nats_msg.ack().await { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to ack echoed message" - ); - } - continue; - } - - // Serialize and send the message - match serde_json::to_string(ws_message) { - Ok(text) => { - if let Err(e) = sender.send(Message::Text(Utf8Bytes::from(text))).await - { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to send message, client disconnected" - ); - break; - } - send_metrics.increment_sent(); - - // Acknowledge the message - if let Err(e) = nats_msg.ack().await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to acknowledge NATS message" - ); - send_metrics.increment_errors(); - } - } - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to serialize message" - ); - send_metrics.increment_errors(); - - // Still ack to prevent redelivery - let _ = nats_msg.ack().await; - } - } - } - Ok(None) => { - // Timeout - continue waiting - continue; - } - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "error receiving from NATS stream" - ); - send_metrics.increment_errors(); - break; - } - } - } - }); - - // Wait for either task to complete with graceful shutdown - let shutdown_result = tokio::time::timeout(GRACEFUL_SHUTDOWN_TIMEOUT, async { - tokio::select! { - _ = recv_task => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "receive task completed" - ); - }, - _ = send_task => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "send task completed" - ); - } - } - }) - .await; - - if shutdown_result.is_err() { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "graceful shutdown timeout exceeded" - ); - } - - // Publish leave message - let leave_msg = WorkspaceWsMessage::leave(ctx.account_id); - if let Err(e) = publisher.publish_message(ctx.workspace_id, leave_msg).await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish leave message" - ); - } - - // Log final metrics - let duration = start_time.elapsed(); - let final_metrics = metrics.snapshot(); - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - duration_ms = duration.as_millis(), - messages_sent = final_metrics.sent, - messages_received = final_metrics.received, - messages_published = final_metrics.published, - messages_dropped = final_metrics.dropped, - errors = final_metrics.errors, - "websocket connection closed" - ); -} - -/// Establishes a WebSocket connection for a workspace. -#[tracing::instrument(skip_all, fields( - account_id = %auth_claims.account_id, - workspace_id = %path_params.workspace_id -))] -async fn workspace_websocket_handler( - State(pg_client): State, - State(nats_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - ws: WebSocketUpgrade, -) -> Result { - let workspace_id = path_params.workspace_id; - let account_id = auth_claims.account_id; - - tracing::debug!( - target: TRACING_TARGET, - account_id = %account_id, - workspace_id = %workspace_id, - "websocket connection requested" - ); - - // Verify workspace exists and user has basic access - let mut conn = pg_client.get_connection().await?; - - // Check if user has minimum permission to view documents - auth_claims - .authorize_workspace(&mut conn, workspace_id, Permission::ViewDocuments) - .await?; - - // Verify the workspace exists - if conn.find_workspace_by_id(workspace_id).await?.is_none() { - return Err(ErrorKind::NotFound.with_resource("workspace")); - } - - tracing::info!( - target: TRACING_TARGET, - account_id = %account_id, - workspace_id = %workspace_id, - "websocket upgrade authorized" - ); - - // Upgrade the connection to WebSocket - Ok(ws.on_upgrade(move |socket| { - handle_workspace_websocket(socket, workspace_id, account_id, nats_client, pg_client) - })) -} - -fn workspace_websocket_handler_docs(op: TransformOperation) -> TransformOperation { - op.summary("Connect to workspace WebSocket") - .description( - "Establishes a WebSocket connection for real-time workspace events and collaboration.", - ) - .response::<101, ()>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Returns a [`Router`] with WebSocket routes for workspaces. -/// -/// [`Router`]: axum::routing::Router -pub fn routes() -> ApiRouter { - use aide::axum::routing::*; - - ApiRouter::new() - .api_route( - "/workspaces/{workspaceId}/ws/", - get_with( - workspace_websocket_handler, - workspace_websocket_handler_docs, - ), - ) - .with_path_items(|item| item.tag("WebSocket")) -} diff --git a/crates/nvisy-server/src/lib.rs b/crates/nvisy-server/src/lib.rs index 54b3dff..40b5661 100644 --- a/crates/nvisy-server/src/lib.rs +++ b/crates/nvisy-server/src/lib.rs @@ -7,7 +7,7 @@ mod error; pub mod extract; pub mod handler; pub mod middleware; -pub mod pipeline; pub mod service; +pub mod worker; pub use crate::error::{BoxedError, Error, ErrorKind, Result}; diff --git a/crates/nvisy-server/src/middleware/constants.rs b/crates/nvisy-server/src/middleware/constants.rs index f8c22a0..a386b93 100644 --- a/crates/nvisy-server/src/middleware/constants.rs +++ b/crates/nvisy-server/src/middleware/constants.rs @@ -6,8 +6,8 @@ /// and prevent denial-of-service attacks via large payloads. pub const DEFAULT_MAX_BODY_SIZE: usize = 4 * 1024 * 1024; -/// Maximum file size for uploads: 100MB. +/// Maximum file size for uploads: 12MB. /// /// Used in file upload handlers to enforce file size limits /// before accepting file data into memory. -pub const DEFAULT_MAX_FILE_BODY_SIZE: usize = 100 * 1024 * 1024; +pub const DEFAULT_MAX_FILE_BODY_SIZE: usize = 12 * 1024 * 1024; diff --git a/crates/nvisy-server/src/middleware/specification.rs b/crates/nvisy-server/src/middleware/specification.rs index 8606489..0a0d58c 100644 --- a/crates/nvisy-server/src/middleware/specification.rs +++ b/crates/nvisy-server/src/middleware/specification.rs @@ -161,19 +161,14 @@ fn api_docs(api: TransformOpenApi) -> TransformOpenApi { description: Some("Workspace creation and management".into()), ..Default::default() }) - .tag(Tag { - name: "Documents".into(), - description: Some("Document upload, processing, and retrieval".into()), - ..Default::default() - }) .tag(Tag { name: "Files".into(), description: Some("File upload, download, and management".into()), ..Default::default() }) .tag(Tag { - name: "Comments".into(), - description: Some("Document and file annotations".into()), + name: "Annotations".into(), + description: Some("File annotations".into()), ..Default::default() }) .tag(Tag { @@ -201,9 +196,4 @@ fn api_docs(api: TransformOpenApi) -> TransformOpenApi { description: Some("Webhook configuration".into()), ..Default::default() }) - .tag(Tag { - name: "WebSocket".into(), - description: Some("Real-time communication".into()), - ..Default::default() - }) } diff --git a/crates/nvisy-server/src/pipeline/job_handler.rs b/crates/nvisy-server/src/pipeline/job_handler.rs deleted file mode 100644 index df42ea6..0000000 --- a/crates/nvisy-server/src/pipeline/job_handler.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Job handler trait for stage-specific processing logic. - -use std::future::Future; - -use nvisy_nats::stream::{DocumentJob, Stage}; - -use super::PipelineState; -use crate::Result; - -/// Trait for implementing stage-specific job processing logic. -/// -/// Each processing stage implements this trait to define how jobs -/// are handled. The framework takes care of subscription, concurrency, -/// shutdown, and error handling. -/// -/// # Example -/// -/// ```ignore -/// pub struct MyHandler; -/// -/// impl JobHandler for MyHandler { -/// type Stage = MyStageData; -/// const TRACING_TARGET: &'static str = "my_worker::stage"; -/// const WORKER_NAME: &'static str = "my_stage"; -/// -/// async fn handle_job(state: &PipelineState, job: &DocumentJob) -> Result<()> { -/// // Process the job -/// Ok(()) -/// } -/// } -/// ``` -pub trait JobHandler: Send + Sync + 'static { - /// The processing stage this handler operates on. - type Stage: Stage; - - /// Tracing target for this handler's log messages. - const TRACING_TARGET: &'static str; - - /// Human-readable name for this worker (used in logs). - const WORKER_NAME: &'static str; - - /// Process a single job. - /// - /// This is the only method that stage-specific implementations need to define. - /// The framework handles message acknowledgment, concurrency control, and - /// error logging. - fn handle_job( - state: &PipelineState, - job: &DocumentJob, - ) -> impl Future> + Send; - - /// Optional: Log additional context when a job starts. - /// - /// Override this to add stage-specific fields to the "Processing job" log. - /// Default implementation logs nothing extra. - #[inline] - fn log_job_start(_job: &DocumentJob) { - // Default: no extra logging - } -} diff --git a/crates/nvisy-server/src/pipeline/mod.rs b/crates/nvisy-server/src/pipeline/mod.rs deleted file mode 100644 index a0d1863..0000000 --- a/crates/nvisy-server/src/pipeline/mod.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! Document processing pipeline. -//! -//! This module provides a generic worker framework for document processing stages. -//! -//! ## Architecture -//! -//! - [`JobHandler`] - Trait for implementing stage-specific job processing -//! - [`Worker`] - Generic worker that handles subscription, concurrency, and shutdown -//! - [`WorkerHandles`] - Manages all three processing workers -//! -//! ## Stages -//! -//! - **Preprocessing**: Format validation, OCR, thumbnail generation, embeddings -//! - **Processing**: VLM-based transformations, annotations, predefined tasks -//! - **Postprocessing**: Format conversion, compression, cleanup - -/// Tracing target for pipeline events. -const TRACING_TARGET: &str = "nvisy_server::pipeline"; - -mod job_handler; -mod postprocessing; -mod preprocessing; -mod processing; -mod state; -mod worker; - -pub use job_handler::JobHandler; -pub use postprocessing::PostprocessingHandler; -pub use preprocessing::PreprocessingHandler; -pub use processing::ProcessingHandler; -pub use state::{DEFAULT_MAX_CONCURRENT_JOBS, PipelineConfig, PipelineState}; -use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; -use uuid::Uuid; -pub use worker::Worker; - -use crate::{Error, Result}; - -/// Type aliases for concrete worker types. -pub type PreprocessingWorker = Worker; -pub type ProcessingWorker = Worker; -pub type PostprocessingWorker = Worker; - -/// Handles for background workers. -/// -/// Holds join handles for all spawned workers, allowing graceful shutdown -/// and status monitoring. -pub struct WorkerHandles { - preprocessing: JoinHandle>, - processing: JoinHandle>, - postprocessing: JoinHandle>, - cancel_token: CancellationToken, -} - -impl WorkerHandles { - /// Spawns all document processing workers. - /// - /// Creates preprocessing, processing, and postprocessing workers with - /// the given state and spawns them as background tasks. Each worker - /// gets a unique consumer name in the format `{uuid}-{stage}`. - /// - /// All workers share a single semaphore for global concurrency control. - pub fn spawn(state: &PipelineState) -> Self { - let cancel_token = CancellationToken::new(); - let instance_id = Uuid::now_v7(); - let semaphore = state.config.create_semaphore(); - - tracing::info!( - target: TRACING_TARGET, - instance_id = %instance_id, - max_concurrent_jobs = state.config.max_concurrent_jobs, - "Starting document processing workers" - ); - - let preprocessing = Worker::::new( - state.clone(), - format!("{}-preprocessing", instance_id), - cancel_token.clone(), - semaphore.clone(), - ) - .spawn(); - - let processing = Worker::::new( - state.clone(), - format!("{}-processing", instance_id), - cancel_token.clone(), - semaphore.clone(), - ) - .spawn(); - - let postprocessing = Worker::::new( - state.clone(), - format!("{}-postprocessing", instance_id), - cancel_token.clone(), - semaphore, - ) - .spawn(); - - tracing::debug!( - target: TRACING_TARGET, - "All workers spawned successfully" - ); - - Self { - preprocessing, - processing, - postprocessing, - cancel_token, - } - } - - /// Requests graceful shutdown of all workers. - /// - /// Workers will finish processing their current job before stopping. - /// Use [`abort_all`](Self::abort_all) for immediate cancellation. - pub fn shutdown(&self) { - tracing::info!( - target: TRACING_TARGET, - "Initiating graceful shutdown of document processing workers" - ); - self.cancel_token.cancel(); - } - - /// Aborts all worker tasks immediately. - /// - /// This cancels workers without waiting for graceful shutdown. - /// Prefer [`shutdown`](Self::shutdown) for clean termination. - pub fn abort_all(&self) { - tracing::warn!( - target: TRACING_TARGET, - "Aborting all document processing workers immediately" - ); - self.cancel_token.cancel(); - self.preprocessing.abort(); - self.processing.abort(); - self.postprocessing.abort(); - } - - /// Checks if all workers are still running. - pub fn all_running(&self) -> bool { - !self.preprocessing.is_finished() - && !self.processing.is_finished() - && !self.postprocessing.is_finished() - } - - /// Checks if any worker has finished (possibly due to error). - pub fn any_finished(&self) -> bool { - self.preprocessing.is_finished() - || self.processing.is_finished() - || self.postprocessing.is_finished() - } - - /// Waits for all workers to complete. - /// - /// Returns the first error encountered, if any. - pub async fn wait_all(self) -> Result<()> { - tracing::debug!( - target: TRACING_TARGET, - "Waiting for all workers to complete" - ); - - let (pre, proc, post) = - tokio::join!(self.preprocessing, self.processing, self.postprocessing); - - pre.map_err(|e| Error::internal("pipeline", e.to_string()))??; - proc.map_err(|e| Error::internal("pipeline", e.to_string()))??; - post.map_err(|e| Error::internal("pipeline", e.to_string()))??; - - tracing::info!( - target: TRACING_TARGET, - "All document processing workers stopped" - ); - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/postprocessing.rs b/crates/nvisy-server/src/pipeline/postprocessing.rs deleted file mode 100644 index 628743e..0000000 --- a/crates/nvisy-server/src/pipeline/postprocessing.rs +++ /dev/null @@ -1,93 +0,0 @@ -//! Postprocessing handler for document download pipeline. -//! -//! Handles jobs triggered by download requests: -//! - Format conversion to requested format -//! - Compression settings -//! - Annotation flattening (burning into document) -//! - Cleanup of temporary artifacts - -use nvisy_nats::stream::{DocumentJob, PostprocessingData}; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -const TRACING_TARGET: &str = "nvisy_server::pipeline::postprocessing"; - -/// Postprocessing job handler. -pub struct PostprocessingHandler; - -impl JobHandler for PostprocessingHandler { - type Stage = PostprocessingData; - - const TRACING_TARGET: &'static str = TRACING_TARGET; - const WORKER_NAME: &'static str = "postprocessing"; - - fn log_job_start(job: &DocumentJob) { - tracing::debug!( - target: TRACING_TARGET, - target_format = ?job.data().target_format, - "Postprocessing job context" - ); - } - - async fn handle_job(_state: &PipelineState, job: &DocumentJob) -> Result<()> { - let data = job.data(); - - // TODO: Update database status to "processing" - // TODO: Fetch document from object store - - // Step 1: Flatten annotations if requested - if let Some(true) = data.flatten_annotations { - tracing::debug!( - target: TRACING_TARGET, - "Flattening annotations into document" - ); - // TODO: Burn annotations into document - // - Fetch annotations from database - // - Render them permanently into document - } - - // Step 2: Convert format if specified - if let Some(ref target_format) = data.target_format { - tracing::debug!( - target: TRACING_TARGET, - target_format = %target_format, - source_format = %job.file_extension, - "Converting document format" - ); - // TODO: Implement format conversion - // - PDF <-> DOCX, PNG, etc. - // - Use appropriate conversion libraries - } - - // Step 3: Apply compression if specified - if let Some(ref compression_level) = data.compression_level { - tracing::debug!( - target: TRACING_TARGET, - compression_level = ?compression_level, - "Applying compression" - ); - // TODO: Implement compression - // - Compress images in document - // - Optimize file size based on level - } - - // Step 4: Run cleanup tasks - if let Some(ref cleanup_tasks) = data.cleanup_tasks { - tracing::debug!( - target: TRACING_TARGET, - task_count = cleanup_tasks.len(), - "Running cleanup tasks" - ); - // TODO: Implement cleanup - // - Remove temporary files - // - Clean intermediate processing artifacts - } - - // TODO: Store processed document to object store - // TODO: Update database with final file info - // TODO: Update database status to "completed" - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/preprocessing.rs b/crates/nvisy-server/src/pipeline/preprocessing.rs deleted file mode 100644 index bce4f01..0000000 --- a/crates/nvisy-server/src/pipeline/preprocessing.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Preprocessing handler for document upload pipeline. -//! -//! Handles jobs triggered by file uploads: -//! - Format detection and validation -//! - Metadata extraction and fixes -//! - OCR for scanned documents -//! - Thumbnail generation -//! - Embedding generation for semantic search - -use nvisy_nats::stream::{DocumentJob, PreprocessingData}; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -const TRACING_TARGET: &str = "nvisy_server::pipeline::preprocessing"; - -/// Preprocessing job handler. -pub struct PreprocessingHandler; - -impl JobHandler for PreprocessingHandler { - type Stage = PreprocessingData; - - const TRACING_TARGET: &'static str = TRACING_TARGET; - const WORKER_NAME: &'static str = "preprocessing"; - - async fn handle_job(_state: &PipelineState, job: &DocumentJob) -> Result<()> { - let data = job.data(); - - // TODO: Update database status to "processing" - - // Step 1: Validate metadata - if data.validate_metadata { - tracing::debug!( - target: TRACING_TARGET, - "Validating file metadata" - ); - // TODO: Implement metadata validation - // - Format detection - // - File integrity checks - // - Metadata extraction and fixes - } - - // Step 2: Run OCR - if data.run_ocr { - tracing::debug!(target: TRACING_TARGET, "Running OCR"); - // TODO: Implement OCR - // - Detect if document needs OCR (scanned vs native text) - // - Extract text using OCR service - // - Store extracted text in database - } - - // Step 3: Generate embeddings - if data.generate_embeddings { - tracing::debug!(target: TRACING_TARGET, "Generating embeddings"); - // TODO: Implement embedding generation - // - Split document into chunks - // - Generate embeddings using nvisy-rig - // - Store embeddings for semantic search - } - - // Step 4: Generate thumbnails - if let Some(true) = data.generate_thumbnails { - tracing::debug!(target: TRACING_TARGET, "Generating thumbnails"); - // TODO: Implement thumbnail generation - // - Render first page(s) as images - // - Store thumbnails in object store - } - - // TODO: Update database status to "completed" - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/processing.rs b/crates/nvisy-server/src/pipeline/processing.rs deleted file mode 100644 index 76490cb..0000000 --- a/crates/nvisy-server/src/pipeline/processing.rs +++ /dev/null @@ -1,93 +0,0 @@ -//! Processing handler for document editing pipeline. -//! -//! Handles jobs triggered by edit requests: -//! - VLM-based document transformations -//! - Annotation processing -//! - Predefined tasks (redaction, translation, summarization, etc.) - -use nvisy_nats::stream::{DocumentJob, ProcessingData}; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -const TRACING_TARGET: &str = "nvisy_server::pipeline::processing"; - -/// Processing job handler. -pub struct ProcessingHandler; - -impl JobHandler for ProcessingHandler { - type Stage = ProcessingData; - - const TRACING_TARGET: &'static str = TRACING_TARGET; - const WORKER_NAME: &'static str = "processing"; - - fn log_job_start(job: &DocumentJob) { - tracing::debug!( - target: TRACING_TARGET, - task_count = job.data().tasks.len(), - "Processing job context" - ); - } - - async fn handle_job(_state: &PipelineState, job: &DocumentJob) -> Result<()> { - let data = job.data(); - - // TODO: Update database status to "processing" - // TODO: Fetch document from object store - - // Step 1: Process main prompt if provided - if !data.prompt.is_empty() { - tracing::debug!( - target: TRACING_TARGET, - prompt_length = data.prompt.len(), - has_context = data.context.is_some(), - "Executing VLM prompt" - ); - // TODO: Implement VLM processing - // - Send document + prompt to nvisy-rig - // - Apply transformations based on VLM output - } - - // Step 2: Process annotations if specified - if let Some(ref annotation_ids) = data.annotation_ids { - tracing::debug!( - target: TRACING_TARGET, - annotation_count = annotation_ids.len(), - "Processing annotations" - ); - // TODO: Fetch annotations from database - // TODO: Apply each annotation using VLM - } - - // Step 3: Execute predefined tasks - for task in &data.tasks { - tracing::debug!( - target: TRACING_TARGET, - task = ?task, - "Executing predefined task" - ); - // TODO: Implement task execution - // - Redact: Find and redact sensitive patterns - // - Translate: Translate document to target language - // - Summarize: Generate document summary - // - ExtractInfo: Extract structured information - // - etc. - } - - // Step 4: Handle reference files if provided - if let Some(ref reference_ids) = data.reference_file_ids { - tracing::debug!( - target: TRACING_TARGET, - reference_count = reference_ids.len(), - "Using reference files for context" - ); - // TODO: Fetch reference files - // TODO: Include in VLM context for style matching, etc. - } - - // TODO: Store processed document back to object store - // TODO: Update database status to "completed" - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/state.rs b/crates/nvisy-server/src/pipeline/state.rs deleted file mode 100644 index bac8a64..0000000 --- a/crates/nvisy-server/src/pipeline/state.rs +++ /dev/null @@ -1,72 +0,0 @@ -//! Pipeline state and configuration. - -use std::sync::Arc; - -use clap::Args; -use nvisy_nats::NatsClient; -use nvisy_postgres::PgClient; -use serde::{Deserialize, Serialize}; -use tokio::sync::Semaphore; - -use crate::service::ServiceState; - -/// Default maximum concurrent jobs. -pub const DEFAULT_MAX_CONCURRENT_JOBS: usize = 10; - -/// Configuration for the document processing pipeline. -#[derive(Debug, Clone, Serialize, Deserialize, Args)] -pub struct PipelineConfig { - /// Maximum concurrent jobs workers can process simultaneously. - #[arg(long, env = "PIPELINE_MAX_CONCURRENT_JOBS", default_value_t = DEFAULT_MAX_CONCURRENT_JOBS)] - pub max_concurrent_jobs: usize, -} - -impl Default for PipelineConfig { - fn default() -> Self { - Self { - max_concurrent_jobs: DEFAULT_MAX_CONCURRENT_JOBS, - } - } -} - -impl PipelineConfig { - /// Creates a new pipeline configuration with default values. - pub fn new() -> Self { - Self::default() - } - - /// Sets the maximum concurrent jobs. - pub fn with_max_concurrent_jobs(mut self, max_concurrent_jobs: usize) -> Self { - self.max_concurrent_jobs = max_concurrent_jobs; - self - } - - /// Creates a semaphore for limiting concurrent job processing. - pub fn create_semaphore(&self) -> Arc { - Arc::new(Semaphore::new(self.max_concurrent_jobs)) - } -} - -/// Application state for pipeline workers. -/// -/// Contains the services needed by document processing workers. -#[derive(Clone)] -pub struct PipelineState { - /// PostgreSQL database client. - pub postgres: PgClient, - /// NATS messaging client. - pub nats: NatsClient, - /// Pipeline configuration. - pub config: PipelineConfig, -} - -impl PipelineState { - /// Creates a new pipeline state from service state and configuration. - pub fn new(state: &ServiceState, config: PipelineConfig) -> Self { - Self { - postgres: state.postgres.clone(), - nats: state.nats.clone(), - config, - } - } -} diff --git a/crates/nvisy-server/src/pipeline/worker.rs b/crates/nvisy-server/src/pipeline/worker.rs deleted file mode 100644 index 1c4035a..0000000 --- a/crates/nvisy-server/src/pipeline/worker.rs +++ /dev/null @@ -1,190 +0,0 @@ -//! Generic document processing worker. - -use std::marker::PhantomData; -use std::sync::Arc; - -use nvisy_nats::stream::{DocumentJob, DocumentJobSubscriber, Stage, TypedMessage}; -use tokio::sync::Semaphore; -use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -/// Tracing target for worker infrastructure. -const TRACING_TARGET: &str = "nvisy_server::pipeline"; - -/// Generic document processing worker. -/// -/// Handles all the boilerplate for subscribing to a NATS stream, -/// processing jobs concurrently with semaphore-based limiting, -/// and graceful shutdown via cancellation token. -/// -/// The actual job processing logic is delegated to the `H: JobHandler` implementation. -pub struct Worker { - state: PipelineState, - consumer_name: String, - cancel_token: CancellationToken, - semaphore: Arc, - _marker: PhantomData, -} - -impl Worker { - /// Creates a new worker with the given handler type. - pub fn new( - state: PipelineState, - consumer_name: impl Into, - cancel_token: CancellationToken, - semaphore: Arc, - ) -> Self { - Self { - state, - consumer_name: consumer_name.into(), - cancel_token, - semaphore, - _marker: PhantomData, - } - } - - /// Spawns the worker as a background task. - pub fn spawn(self) -> JoinHandle> { - tokio::spawn(async move { self.run().await }) - } - - /// Runs the worker loop, processing jobs as they arrive. - async fn run(self) -> Result<()> { - tracing::info!( - target: TRACING_TARGET, - worker = H::WORKER_NAME, - consumer = %self.consumer_name, - "Starting worker" - ); - - let subscriber: DocumentJobSubscriber = self - .state - .nats - .document_job_subscriber(&self.consumer_name) - .await?; - - tracing::info!( - target: TRACING_TARGET, - consumer = %self.consumer_name, - stage = ::NAME, - "Subscribed to jobs" - ); - - let mut stream = subscriber.subscribe().await?; - - loop { - tokio::select! { - biased; - - () = self.cancel_token.cancelled() => { - tracing::info!( - target: TRACING_TARGET, - worker = H::WORKER_NAME, - "Shutdown requested, stopping worker" - ); - break; - } - - result = stream.next() => { - if !self.handle_stream_result(result).await { - break; - } - } - } - } - - Ok(()) - } - - /// Handles a stream result, returning false if the worker should stop. - async fn handle_stream_result( - &self, - result: nvisy_nats::Result>>>, - ) -> bool { - let msg = match result { - Ok(Some(msg)) => msg, - Ok(None) => { - tracing::trace!(target: TRACING_TARGET, "No messages available"); - return true; - } - Err(err) => { - tracing::error!( - target: TRACING_TARGET, - error = %err, - "Failed to receive message" - ); - return true; - } - }; - - // Acquire semaphore permit for concurrency control - let permit = match self.semaphore.clone().acquire_owned().await { - Ok(permit) => permit, - Err(_) => { - tracing::error!( - target: TRACING_TARGET, - "Semaphore closed, stopping worker" - ); - return false; - } - }; - - let state = self.state.clone(); - let job = msg.payload().clone(); - let job_id = job.id; - let file_id = job.file_id; - - // Ack immediately to prevent redelivery while processing - let mut msg = msg; - if let Err(err) = msg.ack().await { - tracing::error!( - target: TRACING_TARGET, - job_id = %job_id, - error = %err, - "Failed to ack message" - ); - } - - tokio::spawn(async move { - // Hold permit until job completes - let _permit = permit; - - tracing::info!( - target: TRACING_TARGET, - job_id = %job_id, - file_id = %file_id, - stage = ::NAME, - "Processing job" - ); - - // Allow handler to log extra context - H::log_job_start(&job); - - match H::handle_job(&state, &job).await { - Ok(()) => { - tracing::info!( - target: TRACING_TARGET, - job_id = %job_id, - file_id = %file_id, - "Job completed" - ); - } - Err(err) => { - tracing::error!( - target: TRACING_TARGET, - job_id = %job_id, - file_id = %file_id, - error = %err, - "Job failed" - ); - // TODO: Implement retry logic or dead letter queue - } - } - }); - - true - } -} diff --git a/crates/nvisy-server/src/service/config.rs b/crates/nvisy-server/src/service/config.rs index 19b565c..1e14dbc 100644 --- a/crates/nvisy-server/src/service/config.rs +++ b/crates/nvisy-server/src/service/config.rs @@ -4,7 +4,6 @@ use clap::Args; use clap::Parser; use nvisy_nats::{NatsClient, NatsConfig}; use nvisy_postgres::{PgClient, PgClientMigrationExt, PgConfig}; -use nvisy_rig::RigConfig; use serde::{Deserialize, Serialize}; use crate::service::security::{SessionKeys, SessionKeysConfig}; @@ -29,10 +28,6 @@ pub struct ServiceConfig { /// Authentication key paths configuration. #[cfg_attr(any(test, feature = "config"), command(flatten))] pub session_config: SessionKeysConfig, - - /// AI services configuration. - #[cfg_attr(any(test, feature = "config"), command(flatten))] - pub rig_config: RigConfig, } impl ServiceConfig { diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index 95251f7..a84be8e 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -1,24 +1,23 @@ //! Application state and dependency injection. mod cache; -mod compression; mod config; mod integration; mod security; +mod webhook; use nvisy_nats::NatsClient; use nvisy_postgres::PgClient; -use nvisy_rig::RigService; use nvisy_webhook::WebhookService; use crate::Result; pub use crate::service::cache::HealthCache; -pub use crate::service::compression::{ArchiveFormat, ArchiveService}; pub use crate::service::config::ServiceConfig; pub use crate::service::integration::IntegrationProvider; pub use crate::service::security::{ PasswordHasher, PasswordStrength, SessionKeys, SessionKeysConfig, UserAgentParser, }; +pub use crate::service::webhook::WebhookEmitter; /// Application state. /// @@ -33,17 +32,14 @@ pub struct ServiceState { pub nats: NatsClient, pub webhook: WebhookService, - // AI services: - pub rig: RigService, - // Internal services: pub health_cache: HealthCache, - pub archive_service: ArchiveService, pub integration_provider: IntegrationProvider, pub password_hasher: PasswordHasher, pub password_strength: PasswordStrength, pub session_keys: SessionKeys, pub user_agent_parser: UserAgentParser, + pub webhook_emitter: WebhookEmitter, } impl ServiceState { @@ -57,31 +53,20 @@ impl ServiceState { let postgres = service_config.connect_postgres().await?; let nats = service_config.connect_nats().await?; - // Initialize AI services - let rig = RigService::new( - service_config.rig_config.clone(), - postgres.clone(), - nats.clone(), - ) - .await - .map_err(|e| { - crate::Error::internal("rig", "Failed to initialize rig service").with_source(e) - })?; + let webhook_emitter = WebhookEmitter::new(postgres.clone(), nats.clone()); let service_state = Self { postgres, nats, webhook: webhook_service, - rig, - health_cache: HealthCache::new(), - archive_service: ArchiveService::new(), integration_provider: IntegrationProvider::new(), password_hasher: PasswordHasher::new(), password_strength: PasswordStrength::new(), session_keys: service_config.load_session_keys().await?, user_agent_parser: UserAgentParser::new(), + webhook_emitter, }; Ok(service_state) @@ -103,14 +88,11 @@ impl_di!(postgres: PgClient); impl_di!(nats: NatsClient); impl_di!(webhook: WebhookService); -// AI services: -impl_di!(rig: RigService); - // Internal services: impl_di!(health_cache: HealthCache); -impl_di!(archive_service: ArchiveService); impl_di!(integration_provider: IntegrationProvider); impl_di!(password_hasher: PasswordHasher); impl_di!(password_strength: PasswordStrength); impl_di!(session_keys: SessionKeys); impl_di!(user_agent_parser: UserAgentParser); +impl_di!(webhook_emitter: WebhookEmitter); diff --git a/crates/nvisy-server/src/service/webhook/emitter.rs b/crates/nvisy-server/src/service/webhook/emitter.rs new file mode 100644 index 0000000..941f8ec --- /dev/null +++ b/crates/nvisy-server/src/service/webhook/emitter.rs @@ -0,0 +1,440 @@ +//! Webhook event emitter for publishing domain events to NATS. + +use std::collections::HashMap; +use std::time::Duration; + +use nvisy_nats::NatsClient; +use nvisy_nats::stream::{EventPublisher, WebhookStream}; +use nvisy_postgres::PgClient; +use nvisy_postgres::query::WorkspaceWebhookRepository; +use nvisy_postgres::types::WebhookEvent; +use nvisy_webhook::{WebhookContext, WebhookRequest}; +use url::Url; +use uuid::Uuid; + +use crate::Result; + +/// Type alias for webhook publisher. +type WebhookPublisher = EventPublisher; + +/// Tracing target for webhook event emission. +const TRACING_TARGET: &str = "nvisy_server::service::webhook"; + +/// Default timeout for webhook delivery. +const DEFAULT_DELIVERY_TIMEOUT: Duration = Duration::from_secs(30); + +/// Webhook event emitter for publishing domain events. +/// +/// This service queries webhooks subscribed to specific events and publishes +/// requests to NATS for asynchronous delivery. +#[derive(Clone)] +pub struct WebhookEmitter { + pg_client: PgClient, + nats_client: NatsClient, +} + +impl WebhookEmitter { + /// Create a new webhook emitter. + pub fn new(pg_client: PgClient, nats_client: NatsClient) -> Self { + Self { + pg_client, + nats_client, + } + } + + /// Emit a webhook event for a workspace. + /// + /// This method: + /// 1. Queries all active webhooks subscribed to the event type + /// 2. Creates a `WebhookRequest` for each webhook + /// 3. Publishes the requests to NATS for asynchronous delivery + /// + /// # Arguments + /// + /// * `workspace_id` - The workspace where the event occurred + /// * `event` - The type of event that occurred + /// * `resource_id` - The ID of the affected resource + /// * `triggered_by` - The account ID that triggered the event (if any) + /// * `data` - Additional event-specific data + #[tracing::instrument( + skip(self, data), + fields( + workspace_id = %workspace_id, + event = %event, + resource_id = %resource_id, + ) + )] + pub async fn emit( + &self, + workspace_id: Uuid, + event: WebhookEvent, + resource_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + // Find all active webhooks subscribed to this event + let mut conn = self.pg_client.get_connection().await?; + let webhooks = conn.find_webhooks_for_event(workspace_id, event).await?; + + if webhooks.is_empty() { + tracing::debug!( + target: TRACING_TARGET, + "No webhooks subscribed to event" + ); + return Ok(0); + } + + tracing::debug!( + target: TRACING_TARGET, + webhook_count = webhooks.len(), + "Found webhooks subscribed to event" + ); + + // Create webhook requests + let event_subject = event.as_subject(); + let event_str = event.to_string(); + let resource_type = event.category().to_string(); + + let requests: Vec = webhooks + .into_iter() + .filter_map(|webhook| { + // Parse URL - skip invalid URLs + let url: Url = match webhook.url.parse() { + Ok(u) => u, + Err(err) => { + tracing::warn!( + target: TRACING_TARGET, + webhook_id = %webhook.id, + url = %webhook.url, + error = %err, + "Skipping webhook with invalid URL" + ); + return None; + } + }; + + // Build context + let mut context = WebhookContext::new(webhook.id, workspace_id, resource_id) + .with_resource_type(&resource_type); + + if let Some(account_id) = triggered_by { + context = context.with_account(account_id); + } + + if let Some(ref metadata) = data { + context = context.with_metadata(metadata.clone()); + } + + // Build request + let mut request = + WebhookRequest::new(url, &event_str, format!("Event: {}", event_str), context) + .with_timeout(DEFAULT_DELIVERY_TIMEOUT) + .with_secret(webhook.secret); + + // Add custom headers from webhook config + if !webhook.headers.is_null() + && let Some(obj) = webhook.headers.as_object() + { + let header_map: HashMap = obj + .iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string()))) + .collect(); + if !header_map.is_empty() { + request = request.with_headers(header_map); + } + } + + Some(request) + }) + .collect(); + + let request_count = requests.len(); + + if request_count == 0 { + return Ok(0); + } + + // Publish requests to NATS + let publisher: WebhookPublisher = self.nats_client.event_publisher().await?; + + for request in &requests { + // Use workspace_id.event_subject as the routing subject + let subject = format!("{}.{}", request.context.workspace_id, event_subject); + publisher.publish_to(&subject, request).await?; + } + + tracing::info!( + target: TRACING_TARGET, + request_count, + "Published webhook requests" + ); + + Ok(request_count) + } + + /// Emit a document created event. + #[inline] + pub async fn emit_document_created( + &self, + workspace_id: Uuid, + document_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::DocumentCreated, + document_id, + triggered_by, + data, + ) + .await + } + + /// Emit a document updated event. + #[inline] + pub async fn emit_document_updated( + &self, + workspace_id: Uuid, + document_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::DocumentUpdated, + document_id, + triggered_by, + data, + ) + .await + } + + /// Emit a document deleted event. + #[inline] + pub async fn emit_document_deleted( + &self, + workspace_id: Uuid, + document_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::DocumentDeleted, + document_id, + triggered_by, + data, + ) + .await + } + + /// Emit a file created event. + #[inline] + pub async fn emit_file_created( + &self, + workspace_id: Uuid, + file_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::FileCreated, + file_id, + triggered_by, + data, + ) + .await + } + + /// Emit a file updated event. + #[inline] + pub async fn emit_file_updated( + &self, + workspace_id: Uuid, + file_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::FileUpdated, + file_id, + triggered_by, + data, + ) + .await + } + + /// Emit a file deleted event. + #[inline] + pub async fn emit_file_deleted( + &self, + workspace_id: Uuid, + file_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::FileDeleted, + file_id, + triggered_by, + data, + ) + .await + } + + /// Emit a member added event. + #[inline] + pub async fn emit_member_added( + &self, + workspace_id: Uuid, + member_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::MemberAdded, + member_id, + triggered_by, + data, + ) + .await + } + + /// Emit a member updated event. + #[inline] + pub async fn emit_member_updated( + &self, + workspace_id: Uuid, + member_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::MemberUpdated, + member_id, + triggered_by, + data, + ) + .await + } + + /// Emit a member deleted event. + #[inline] + pub async fn emit_member_deleted( + &self, + workspace_id: Uuid, + member_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::MemberDeleted, + member_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration created event. + #[inline] + pub async fn emit_integration_created( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationCreated, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration updated event. + #[inline] + pub async fn emit_integration_updated( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationUpdated, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration deleted event. + #[inline] + pub async fn emit_integration_deleted( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationDeleted, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration synced event. + #[inline] + pub async fn emit_integration_synced( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationSynced, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration desynced event. + #[inline] + pub async fn emit_integration_desynced( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationDesynced, + integration_id, + triggered_by, + data, + ) + .await + } +} diff --git a/crates/nvisy-server/src/service/webhook/mod.rs b/crates/nvisy-server/src/service/webhook/mod.rs new file mode 100644 index 0000000..7c3ddf6 --- /dev/null +++ b/crates/nvisy-server/src/service/webhook/mod.rs @@ -0,0 +1,7 @@ +//! Webhook event emission service. +//! +//! Provides helpers for emitting domain events to webhooks via NATS JetStream. + +mod emitter; + +pub use emitter::WebhookEmitter; diff --git a/crates/nvisy-server/src/worker/mod.rs b/crates/nvisy-server/src/worker/mod.rs new file mode 100644 index 0000000..4e558cf --- /dev/null +++ b/crates/nvisy-server/src/worker/mod.rs @@ -0,0 +1,5 @@ +//! Background workers for async processing. + +mod webhook; + +pub use webhook::WebhookWorker; diff --git a/crates/nvisy-server/src/worker/webhook.rs b/crates/nvisy-server/src/worker/webhook.rs new file mode 100644 index 0000000..d3758a4 --- /dev/null +++ b/crates/nvisy-server/src/worker/webhook.rs @@ -0,0 +1,178 @@ +//! Webhook delivery worker. +//! +//! Consumes webhook requests from NATS and delivers them to external endpoints. + +use std::time::Duration; + +use nvisy_nats::NatsClient; +use nvisy_nats::stream::{EventSubscriber, WebhookStream}; +use nvisy_webhook::{WebhookRequest, WebhookService}; +use tokio_util::sync::CancellationToken; + +use crate::Result; + +/// Type alias for webhook subscriber. +type WebhookSubscriber = EventSubscriber; + +/// Tracing target for webhook worker operations. +const TRACING_TARGET: &str = "nvisy_server::worker::webhook"; + +/// Webhook delivery worker. +/// +/// This worker subscribes to the `WEBHOOKS` NATS stream and delivers +/// webhook payloads to external endpoints with HMAC-SHA256 signatures. +pub struct WebhookWorker { + nats_client: NatsClient, + webhook_service: WebhookService, +} + +impl WebhookWorker { + /// Create a new webhook worker. + pub fn new(nats_client: NatsClient, webhook_service: WebhookService) -> Self { + Self { + nats_client, + webhook_service, + } + } + + /// Run the webhook worker until cancelled. + /// + /// This method will continuously consume webhook requests from NATS and + /// deliver them to the configured endpoints. Logs lifecycle events + /// (start, stop, errors) internally. + pub async fn run(&self, cancel: CancellationToken) -> Result<()> { + tracing::info!( + target: TRACING_TARGET, + "Starting webhook worker" + ); + + let result = self.run_inner(cancel).await; + + match &result { + Ok(()) => { + tracing::info!( + target: TRACING_TARGET, + "Webhook worker stopped" + ); + } + Err(err) => { + tracing::error!( + target: TRACING_TARGET, + error = %err, + "Webhook worker failed" + ); + } + } + + result + } + + /// Internal run loop. + async fn run_inner(&self, cancel: CancellationToken) -> Result<()> { + let subscriber: WebhookSubscriber = self.nats_client.webhook_subscriber().await?; + + let mut stream = subscriber.subscribe().await?; + + loop { + tokio::select! { + _ = cancel.cancelled() => { + tracing::info!( + target: TRACING_TARGET, + "Webhook worker shutdown requested" + ); + break; + } + result = stream.next_with_timeout(Duration::from_secs(5)) => { + match result { + Ok(Some(mut message)) => { + let request = message.payload(); + + if let Err(err) = self.deliver(request).await { + tracing::error!( + target: TRACING_TARGET, + error = %err, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + "Failed to deliver webhook" + ); + // Nack the message for redelivery + if let Err(nack_err) = message.nack().await { + tracing::error!( + target: TRACING_TARGET, + error = %nack_err, + "Failed to nack message" + ); + } + } else { + // Ack successful delivery + if let Err(ack_err) = message.ack().await { + tracing::error!( + target: TRACING_TARGET, + error = %ack_err, + "Failed to ack message" + ); + } + } + } + Ok(None) => { + // Timeout, continue loop + } + Err(err) => { + tracing::error!( + target: TRACING_TARGET, + error = %err, + "Error receiving message from stream" + ); + // Brief pause before retrying + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + } + } + + Ok(()) + } + + /// Deliver a webhook request. + /// + /// The `WebhookService` handles HMAC-SHA256 signing automatically + /// when `request.secret` is present. + async fn deliver(&self, request: &WebhookRequest) -> Result<()> { + tracing::debug!( + target: TRACING_TARGET, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + event = %request.event, + "Delivering webhook" + ); + + let response = self.webhook_service.deliver(request).await.map_err(|err| { + crate::error::Error::external("webhook", format!("Delivery failed: {}", err)) + })?; + + if response.is_success() { + tracing::info!( + target: TRACING_TARGET, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + status_code = response.status_code, + "Webhook delivered successfully" + ); + Ok(()) + } else { + tracing::warn!( + target: TRACING_TARGET, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + status_code = response.status_code, + "Webhook delivery returned non-success status" + ); + // Return error to trigger nack/retry + Err(crate::error::Error::external( + "webhook", + format!("Delivery returned status {}", response.status_code), + )) + } + } +} diff --git a/crates/nvisy-webhook/Cargo.toml b/crates/nvisy-webhook/Cargo.toml index 21df97c..8f7845e 100644 --- a/crates/nvisy-webhook/Cargo.toml +++ b/crates/nvisy-webhook/Cargo.toml @@ -1,11 +1,15 @@ [package] name = "nvisy-webhook" +description = "Webhook delivery types and traits for nvisy services" +readme = "./README.md" +keywords = ["webhook", "http", "delivery", "notifications", "events"] +categories = ["api-bindings", "network-programming"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-webhook/README.md b/crates/nvisy-webhook/README.md index cf498b8..c54f20b 100644 --- a/crates/nvisy-webhook/README.md +++ b/crates/nvisy-webhook/README.md @@ -1,5 +1,7 @@ # nvisy-webhook +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Webhook delivery types and traits for nvisy services. ## Features @@ -21,3 +23,18 @@ let service = WebhookService::new(my_provider); let request = WebhookRequest::new(url, event, payload, webhook_id, workspace_id); let response = service.deliver(&request).await?; ``` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-webhook/src/request.rs b/crates/nvisy-webhook/src/request.rs index cef1185..417aeab 100644 --- a/crates/nvisy-webhook/src/request.rs +++ b/crates/nvisy-webhook/src/request.rs @@ -9,11 +9,13 @@ use url::Url; use uuid::Uuid; /// A webhook delivery request. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] pub struct WebhookRequest { /// Unique identifier for this request. pub request_id: Uuid, /// The webhook endpoint URL. + #[cfg_attr(feature = "schema", schemars(with = "String"))] pub url: Url, /// The event type that triggered this webhook delivery. pub event: String, @@ -24,7 +26,12 @@ pub struct WebhookRequest { /// Custom headers to include in the request. pub headers: HashMap, /// Optional request timeout (uses client default if not set). + #[serde(default, skip_serializing_if = "Option::is_none")] + #[cfg_attr(feature = "schema", schemars(with = "Option"))] pub timeout: Option, + /// HMAC-SHA256 signing secret for request authentication. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub secret: Option, } impl WebhookRequest { @@ -43,6 +50,7 @@ impl WebhookRequest { context, headers: HashMap::new(), timeout: None, + secret: None, } } @@ -74,6 +82,12 @@ impl WebhookRequest { self } + /// Sets the signing secret for HMAC-SHA256 authentication. + pub fn with_secret(mut self, secret: impl Into) -> Self { + self.secret = Some(secret.into()); + self + } + /// Converts this request into a payload for serialization. pub fn into_payload(self) -> WebhookPayload { WebhookPayload { diff --git a/crates/nvisy-webhook/src/reqwest/client.rs b/crates/nvisy-webhook/src/reqwest/client.rs index a55b47f..139de98 100644 --- a/crates/nvisy-webhook/src/reqwest/client.rs +++ b/crates/nvisy-webhook/src/reqwest/client.rs @@ -148,6 +148,13 @@ impl WebhookProvider for ReqwestClient { .header("X-Webhook-Request-Id", request.request_id.to_string()) .timeout(timeout); + // Add HMAC-SHA256 signature if secret is present + if let Some(ref secret) = request.secret { + let signature = Self::sign_payload(secret, timestamp, &payload_bytes); + http_request = + http_request.header("X-Webhook-Signature", format!("sha256={}", signature)); + } + // Add custom headers for (name, value) in &request.headers { http_request = http_request.header(name, value); diff --git a/crates/nvisy-webhook/src/reqwest/error.rs b/crates/nvisy-webhook/src/reqwest/error.rs index fa0ecc5..cbff2af 100644 --- a/crates/nvisy-webhook/src/reqwest/error.rs +++ b/crates/nvisy-webhook/src/reqwest/error.rs @@ -21,20 +21,20 @@ impl From for crate::Error { match err { Error::Reqwest(e) => { if e.is_timeout() { - crate::Error::timeout() + crate::Error::new(crate::ErrorKind::Timeout) .with_message(e.to_string()) .with_source(e) } else if e.is_connect() { - crate::Error::network_error() + crate::Error::new(crate::ErrorKind::NetworkError) .with_message("Connection failed") .with_source(e) } else { - crate::Error::network_error() + crate::Error::new(crate::ErrorKind::NetworkError) .with_message(e.to_string()) .with_source(e) } } - Error::Serde(e) => crate::Error::serialization() + Error::Serde(e) => crate::Error::new(crate::ErrorKind::Serialization) .with_message(e.to_string()) .with_source(e), } diff --git a/deny.toml b/deny.toml index 90db265..67c9163 100644 --- a/deny.toml +++ b/deny.toml @@ -26,11 +26,15 @@ ignore = [ # https://github.com/nats-io/nats.rs/pull/1492 # https://github.com/programatik29/axum-server/pull/178 "RUSTSEC-2025-0134", + + # RSA Marvin Attack timing sidechannel vulnerability - no patch available + # Comes from opendal/sqlx dependencies for cloud storage and MySQL + "RUSTSEC-2023-0071", ] [licenses] # Confidence threshold for detecting a license from a license text (higher = stricter) -confidence-threshold = 0.9 +confidence-threshold = 0.8 # Private licenses are not allowed private = { ignore = false, registries = [] } # Warn if an allowed license is not used in the dependency graph @@ -39,6 +43,7 @@ unused-allowed-license = "warn" # List of explicitly allowed licenses (single licenses only) allow = [ "MIT", + "MIT-0", "Apache-2.0", "Apache-2.0 WITH LLVM-exception", "BSD-2-Clause", @@ -87,4 +92,6 @@ unknown-git = "deny" # List of URLs for allowed crate registries allow-registry = ["https://github.com/rust-lang/crates.io-index"] # List of URLs for allowed Git repositories -allow-git = [] +allow-git = [ + "https://github.com/nvisycom/runtime.git", +] diff --git a/docker/README.md b/docker/README.md index 1cadf86..2b23273 100644 --- a/docker/README.md +++ b/docker/README.md @@ -39,58 +39,6 @@ docker compose up -d --build | NATS | 4222, 8222 | Message queue (JetStream) | | Server | 8080 | Nvisy API | -## Optional Integrations (Development) - -The development compose file includes optional services that can be enabled using Docker Compose profiles. These are useful for testing integrations locally. - -### Available Profiles - -| Profile | Services | Description | -| -------------- | -------------- | ------------------------------------ | -| `minio` | MinIO | S3-compatible object storage | -| `n8n` | N8n | Workflow automation platform | -| `integrations` | MinIO + N8n | All optional integration services | - -### Optional Services - -| Service | Port(s) | Console URL | Description | -| ------- | ----------- | ----------------------- | ---------------------------- | -| MinIO | 9000, 9001 | http://localhost:9001 | S3-compatible object storage | -| N8n | 5678 | http://localhost:5678 | Workflow automation | - -### Usage - -Start core services only (PostgreSQL + NATS): - -```bash -docker compose -f docker-compose.dev.yml up -d -``` - -Start with MinIO: - -```bash -docker compose -f docker-compose.dev.yml --profile minio up -d -``` - -Start with N8n: - -```bash -docker compose -f docker-compose.dev.yml --profile n8n up -d -``` - -Start with all integrations: - -```bash -docker compose -f docker-compose.dev.yml --profile integrations up -d -``` - -### Default Credentials - -| Service | Username | Password | Environment Variables | -| ------- | ------------ | ------------ | ------------------------------------------------ | -| MinIO | `minioadmin` | `minioadmin` | `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD` | -| N8n | `admin` | `admin` | `N8N_BASIC_AUTH_USER`, `N8N_BASIC_AUTH_PASSWORD` | - ## Commands ```bash diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 6b2f75b..a287d72 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -44,67 +44,9 @@ services: networks: - nvisy-dev - # Optional: MinIO (S3-compatible object storage) - minio: - image: minio/minio:latest - container_name: nvisy-minio-dev - profiles: - - minio - - integrations - restart: unless-stopped - ports: - - "${MINIO_API_PORT:-9000}:9000" - - "${MINIO_CONSOLE_PORT:-9001}:9001" - environment: - MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin} - MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin} - command: server /data --console-address ":9001" - volumes: - - minio_data:/data - healthcheck: - test: ["CMD", "mc", "ready", "local"] - interval: 5s - timeout: 5s - retries: 5 - start_period: 5s - networks: - - nvisy-dev - - # Optional: N8n (workflow automation) - n8n: - image: n8nio/n8n:latest - container_name: nvisy-n8n-dev - profiles: - - n8n - - integrations - restart: unless-stopped - ports: - - "${N8N_PORT:-5678}:5678" - environment: - N8N_HOST: ${N8N_HOST:-localhost} - N8N_PORT: 5678 - N8N_PROTOCOL: ${N8N_PROTOCOL:-http} - WEBHOOK_URL: ${N8N_WEBHOOK_URL:-http://localhost:5678} - GENERIC_TIMEZONE: ${TZ:-UTC} - N8N_BASIC_AUTH_ACTIVE: ${N8N_BASIC_AUTH_ACTIVE:-true} - N8N_BASIC_AUTH_USER: ${N8N_BASIC_AUTH_USER:-admin} - N8N_BASIC_AUTH_PASSWORD: ${N8N_BASIC_AUTH_PASSWORD:-admin} - volumes: - - n8n_data:/home/node/.n8n - healthcheck: - test: ["CMD-SHELL", "wget -qO- http://localhost:5678/healthz || exit 1"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 30s - networks: - - nvisy-dev - volumes: postgres_data: nats_data: - minio_data: - n8n_data: networks: nvisy-dev: diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..4f10c27 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,84 @@ +# Architecture + +## Crate Structure + +| Crate | Responsibility | +|-------|----------------| +| `nvisy-server` | HTTP API, handlers, middleware, auth | +| `nvisy-postgres` | Database models, queries, migrations | +| `nvisy-nats` | Messaging, job queues, object storage | +| `nvisy-rig` | LLM orchestration, RAG, chat agents | +| `nvisy-webhook` | External event delivery | +| `nvisy-core` | Shared types and utilities | +| `nvisy-cli` | Command-line interface | + +## Technology Stack + +| Layer | Technology | Purpose | +|-------|------------|---------| +| Language | Rust | Memory safety, performance, concurrency | +| Database | PostgreSQL + pgvector | Relational data + vector embeddings | +| Messaging | NATS | Pub/sub, job queues, object storage | +| AI Framework | rig-core | LLM orchestration | +| HTTP Server | Axum + Tower | API endpoints and middleware | +| Real-time | SSE | Streaming AI responses | +| Auth | JWT | Stateless authentication | + +## Data Model + +### Core Entities + +| Entity | Purpose | +|--------|---------| +| Account | User authentication and profile | +| Workspace | Collaborative space for documents | +| Document | Logical grouping of related files | +| File | Individual uploaded file with metadata | +| Version | Parsed representation at a point in time | +| Section | Hierarchical content structure | +| Chunk | Indexed segment with vector embedding | +| Entity | Extracted person, company, date, amount | +| ChatSession | AI conversation context | + +### Hierarchy + +- **Workspace** contains Documents +- **Document** contains Files and Versions +- **Version** contains Sections +- **Section** contains Chunks +- **Chunk** contains Entities, Claims, and References + +### Content Types + +| Type | Examples | Processing | +|------|----------|------------| +| Document | PDF, DOCX, TXT | Text extraction, structure parsing | +| Image | PNG, JPG, SVG | OCR, visual analysis | +| Spreadsheet | XLSX, CSV | Table normalization, schema inference | +| Presentation | PPTX, KEY | Slide extraction, structure parsing | +| Audio | MP3, WAV | Transcription with timestamps | +| Video | MP4, MOV | Transcription, frame extraction | +| Archive | ZIP, TAR | Recursive extraction and processing | +| Data | JSON, XML | Schema inference, normalization | + +## Canonical Representation + +All source files normalize to a common schema containing: + +- **Sections**: Hierarchical structure +- **Entities**: People, companies, dates, amounts +- **Tables**: Structured data +- **Claims**: Assertions that can be verified +- **References**: Links to other documents/sections +- **Provenance**: Source file, extraction method, confidence + +## Chunking Strategy + +Effective cross-file intelligence depends on chunking quality. + +Requirements: +- **Semantic chunks**: Based on meaning, not fixed token sizes +- **Stable chunk IDs**: Enable diffs, history, and references +- **Hierarchical chunks**: Document → Section → Paragraph → Sentence + +Each chunk maintains: stable content-addressable ID, hierarchical location, vector embedding, extracted entities, token count, and byte range in source. diff --git a/docs/INTELLIGENCE.md b/docs/INTELLIGENCE.md new file mode 100644 index 0000000..0c11028 --- /dev/null +++ b/docs/INTELLIGENCE.md @@ -0,0 +1,67 @@ +# Intelligence Layer + +## Cross-Document Linking + +Related content across files must be explicitly linked. This is relationship modeling, not retrieval. + +| Technique | Purpose | +|-----------|---------| +| Entity resolution | Same person/company across files | +| Concept embeddings | Same idea, different wording | +| Citation graphs | What references what | +| Contradiction detection | Conflicting statements across documents | + +## Hybrid Search + +Vector search alone is insufficient for cross-file queries. + +| Layer | Purpose | Example | +|-------|---------|---------| +| Vector search | Semantic similarity | "Find clauses about liability" | +| Symbolic filters | Dates, types, authors | "After 2021", "Type: NDA" | +| Graph traversal | Relationships | "Related to Company X" | + +A query like "Show me all NDA clauses after 2021 that conflict with policy X" requires all three layers. + +## Temporal Intelligence + +| Capability | Description | +|------------|-------------| +| Versioned representations | Track document evolution | +| Semantic diffing | Changes in meaning, not just text | +| Temporal queries | "What changed since last quarter?" | +| Change attribution | Who changed what and when | + +## Grounded Reasoning + +Every assertion links to evidence: file, section, exact text, and relevance score. Without this, enterprise users cannot validate conclusions. + +## Cross-File Reasoning Patterns + +Reusable patterns across any document set: + +| Pattern | Question | Example | +|---------|----------|---------| +| Consistency | Do all docs use the same definition? | "Is 'confidential' defined consistently?" | +| Coverage | Is X addressed somewhere? | "Do all contracts have termination clauses?" | +| Conflict | Do any statements contradict? | "Are there conflicting liability terms?" | +| Redundancy | Are we repeating ourselves? | "Is the same clause duplicated?" | +| Completeness | What's missing? | "Which required sections are absent?" | +| Drift | Has X changed from the standard? | "How does this differ from the template?" | + +## Entity Resolution + +The same entity appears differently across files. + +| Challenge | Example | +|-----------|---------| +| Name variations | "IBM", "International Business Machines", "Big Blue" | +| Role changes | "John Smith (CEO)" vs "John Smith (Board Member)" | +| Temporal | "Acme Corp" acquired by "MegaCorp" in 2022 | +| Abbreviations | "NDA", "Non-Disclosure Agreement" | + +Resolution process: extraction → clustering → disambiguation → linking → propagation. + +## Knowledge Graph + +Entities link to Sections. Sections reference Sections. Documents relate to Documents. This graph grows over time and cannot be replicated by tools that process files in isolation. diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md new file mode 100644 index 0000000..9846425 --- /dev/null +++ b/docs/PROVIDERS.md @@ -0,0 +1,321 @@ +# Provider Architecture + +Data providers enable reading from and writing to external systems (storage, databases, vector stores). This document defines the architecture for implementing providers in Python while maintaining type safety with the Rust core. + +## Design Principles + +1. **Rust owns the API boundary** - All HTTP schemas defined in Rust, Python conforms to them +2. **Python owns integrations** - Provider implementations leverage Python's ecosystem +3. **Type safety across the boundary** - Schemas generated from Rust, validated in Python +4. **Async-first** - No synchronous APIs, no blocking calls +5. **Minimal coupling** - Providers are independent, share only core protocols + +## Architecture + +``` +┌────────────────────────────────────────────────────┐ +│ Rust Core │ +│ │ +│ OpenAPI Schema ◄── schemars ◄── Rust Types │ +│ │ │ │ +│ ▼ ▼ │ +│ JSON Schema files nvisy-dal traits │ +│ │ │ │ +└────────┼────────────────────────────┼──────────────┘ + │ │ + ▼ ▼ +┌────────────────────────────────────────────────────┐ +│ Python Providers │ +│ │ +│ datamodel-codegen ──► Pydantic Models │ +│ │ │ +│ ▼ │ +│ Provider Protocols │ +│ │ │ +│ ▼ │ +│ Provider Implementations │ +│ │ +└────────────────────────────────────────────────────┘ +``` + +## Schema Flow + +### 1. Define in Rust + +Schemas are defined once in Rust using `schemars`: + +```rust +#[derive(JsonSchema, Serialize, Deserialize)] +pub struct ObjectContext { + pub prefix: Option, + pub continuation_token: Option, + pub limit: Option, +} +``` + +### 2. Export to JSON Schema + +Build script exports schemas to `schemas/`: + +``` +schemas/ +├── contexts/ +│ ├── object.json +│ ├── relational.json +│ └── vector.json +├── credentials/ +│ ├── s3.json +│ ├── gcs.json +│ └── ... +└── datatypes/ + ├── blob.json + ├── document.json + └── ... +``` + +### 3. Generate Python Models + +Python models generated from JSON Schema at build time: + +```bash +uv run datamodel-codegen \ + --input schemas/ \ + --output packages/nvisy-dal-core/nvisy_dal_core/generated/ +``` + +### 4. Validate at Runtime + +Generated models used in provider implementations with Pydantic validation. + +## Provider Interface + +Providers implement async protocols for reading and writing data. + +### Input Protocol + +```python +@runtime_checkable +class DataInput(Protocol[T_co, Ctx_contra]): + """Protocol for reading data from external sources.""" + + async def read(self, ctx: Ctx_contra) -> AsyncIterator[T_co]: + """Yield items from the source based on context.""" + ... +``` + +### Output Protocol + +```python +@runtime_checkable +class DataOutput(Protocol[T_contra, Ctx_contra]): + """Protocol for writing data to external sinks.""" + + async def write(self, ctx: Ctx_contra, items: Sequence[T_contra]) -> None: + """Write a batch of items to the sink.""" + ... +``` + +### Provider Protocol + +```python +@runtime_checkable +class Provider(Protocol[Cred, Params]): + """Protocol for provider lifecycle management.""" + + @classmethod + async def connect(cls, credentials: Cred, params: Params) -> Self: + """Establish connection to the external service.""" + ... + + async def disconnect(self) -> None: + """Release resources and close connections.""" + ... +``` + +## Package Structure + +Single package with optional dependencies per provider: + +``` +packages/nvisy-dal/ +├── pyproject.toml +├── py.typed # PEP 561 marker +└── src/ + └── nvisy_dal/ + ├── __init__.py + ├── protocols.py # DataInput, DataOutput, Provider + ├── errors.py # DalError, error kinds + ├── _generated/ # From JSON Schema (committed) + │ ├── __init__.py + │ ├── contexts.py + │ └── datatypes.py + └── providers/ + ├── __init__.py + ├── s3.py + ├── gcs.py + ├── azure.py + ├── postgres.py + ├── mysql.py + ├── qdrant.py + └── pinecone.py +``` + +### Layout Rationale + +- **Single package** - Internal code, not publishing separately to PyPI +- **`src/` layout** - Prevents accidental imports from project root during development +- **Flat providers** - One module per provider, no nested input/output structure +- **`_generated/` committed** - Reproducible builds, `_` prefix indicates internal +- **Optional deps** - `pip install nvisy-dal[s3,postgres]` for selective installation + +### Dependencies + +```toml +# pyproject.toml +[project] +name = "nvisy-dal" +dependencies = [ + "pydantic>=2.0", +] + +[project.optional-dependencies] +s3 = ["boto3>=1.35", "types-boto3"] +gcs = ["google-cloud-storage>=2.18"] +azure = ["azure-storage-blob>=12.23"] +postgres = ["asyncpg>=0.30"] +mysql = ["aiomysql>=0.2"] +qdrant = ["qdrant-client>=1.12"] +pinecone = ["pinecone-client>=5.0"] +all = ["nvisy-dal[s3,gcs,azure,postgres,mysql,qdrant,pinecone]"] +dev = ["nvisy-dal[all]", "pytest>=8.0", "pytest-asyncio>=0.24", "moto>=5.0"] +``` + +## Python Standards + +### Tooling + +| Tool | Purpose | +|------|---------| +| `uv` | Package management, virtualenv, lockfile | +| `ruff` | Linting + formatting (replaces black, isort, flake8) | +| `pyright` | Type checking in strict mode | +| `pytest` | Testing with `pytest-asyncio` | + +### Configuration + +All config in `pyproject.toml`: + +```toml +[project] +requires-python = ">=3.12" + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["ALL"] +ignore = ["D", "ANN101", "ANN102", "COM812", "ISC001"] + +[tool.ruff.lint.isort] +known-first-party = ["nvisy_dal"] + +[tool.pyright] +pythonVersion = "3.12" +typeCheckingMode = "strict" + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +``` + +### Code Style + +- Type hints on all public APIs +- Protocols over ABCs (structural typing) +- `Final` for constants, `ClassVar` for class attributes +- `Sequence` over `list` in parameters (covariance) +- `Mapping` over `dict` in parameters +- `async def` always, no sync wrappers +- Context managers for resource cleanup +- `structlog` for structured logging + +### Error Handling + +```python +from enum import StrEnum +from typing import final + +class ErrorKind(StrEnum): + """Classification of provider errors.""" + + CONNECTION = "connection" + NOT_FOUND = "not_found" + INVALID_INPUT = "invalid_input" + TIMEOUT = "timeout" + PROVIDER = "provider" + +@final +class DalError(Exception): + """Base error for all provider operations.""" + + __slots__ = ("message", "kind", "source") + + def __init__( + self, + message: str, + kind: ErrorKind = ErrorKind.PROVIDER, + source: BaseException | None = None, + ) -> None: + super().__init__(message) + self.message = message + self.kind = kind + self.source = source +``` + +## PyO3 Bridge + +The bridge module in `nvisy-dal` handles: + +1. **Runtime management** - Python interpreter lifecycle +2. **Async bridging** - Rust futures ↔ Python coroutines +3. **Type conversion** - Via `pythonize` using shared JSON Schema +4. **Error propagation** - Python exceptions → Rust errors +5. **GIL coordination** - Release during I/O for concurrency + +### Guarantees + +- Provider methods are called with validated inputs (Pydantic) +- Outputs conform to expected schema (Pydantic serialization) +- Errors include Python traceback for debugging +- GIL released during all I/O operations + +## Testing Strategy + +### Unit Tests (Python) + +- Mock external services (`moto` for AWS, `responses` for HTTP) +- Test protocol conformance +- Test error handling paths + +### Integration Tests (Rust) + +- Test PyO3 bridge with real Python runtime +- Verify type conversion round-trips +- Test async behavior across boundary + +### Contract Tests + +- Validate generated Python models against Rust schemas +- Run on CI after schema changes + +## Adding a Provider + +1. Define credentials/params schema in Rust (`crates/nvisy-dal/src/schemas/`) +2. Export JSON Schema (`make schemas`) +3. Regenerate Python models (`make codegen`) +4. Add optional dependency to `pyproject.toml` +5. Create provider module in `src/nvisy_dal/providers/` +6. Implement `DataInput` and/or `DataOutput` protocols +7. Add unit tests with mocked external service +8. Register in PyO3 bridge diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..ba28667 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,36 @@ +# Nvisy Documentation + +## Overview + +Nvisy transforms uploaded files into structured, normalized representations that enable cross-file intelligence. The knowledge graph—not the files—is the primary asset. + +## Problem + +Document intelligence tools typically treat files as the unit of work. This prevents cross-file reasoning, entity resolution across documents, and institutional memory accumulation. + +## Design Principles + +| Principle | Description | +|-----------|-------------| +| Structure over blobs | Every file converts to machine-readable structure with content and metadata | +| Canonical representation | Single internal schema normalizes all source formats | +| Grounded reasoning | Every conclusion links to source: file, section, exact text, confidence | +| Isolation & trust | Tenant-aware embeddings, permission-filtered retrieval, audit logs | +| Time awareness | Versioned representations, semantic diffing, temporal queries | + +## Core Capabilities + +| Capability | Description | +|------------|-------------| +| Reading | Parse and normalize any supported file format | +| Search | Hybrid search combining vector, symbolic, and graph queries | +| Comparison | Identify differences, conflicts, and drift across documents | +| Extraction | Pull entities, tables, claims, and structured data | + +## Documentation + +| Document | Description | +|----------|-------------| +| [Architecture](./ARCHITECTURE.md) | System design, data model, and technology stack | +| [Intelligence](./INTELLIGENCE.md) | Cross-file reasoning, search, and extraction | +| [Providers](./PROVIDERS.md) | Data provider architecture with PyO3 | diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md new file mode 100644 index 0000000..f87f1ad --- /dev/null +++ b/docs/ROADMAP.md @@ -0,0 +1,63 @@ +# DAL Provider Roadmap + +Status of provider implementations for the Data Abstraction Layer. + +## Completed + +- [x] Core protocols and error types + +## Relational Databases + +- [ ] PostgreSQL provider +- [ ] MySQL provider +- [ ] SQLite provider +- [ ] SQL Server provider +- [ ] Oracle provider + +## Object Storage + +- [ ] S3 provider +- [ ] GCS provider +- [ ] Azure Blob provider +- [ ] MinIO provider +- [ ] Cloudflare R2 provider + +## Vector Databases + +- [ ] Pinecone provider +- [ ] Qdrant provider +- [ ] Weaviate provider +- [ ] Milvus provider +- [ ] Chroma provider +- [ ] pgvector provider + +## Document Databases + +- [ ] MongoDB provider +- [ ] DynamoDB provider +- [ ] Firestore provider +- [ ] CouchDB provider + +## Key-Value Stores + +- [ ] Redis provider +- [ ] Memcached provider +- [ ] etcd provider + +## Message Queues + +- [ ] Kafka provider +- [ ] RabbitMQ provider +- [ ] NATS provider +- [ ] SQS provider + +## Graph Databases + +- [ ] Neo4j provider +- [ ] Neptune provider + +## Search Engines + +- [ ] Elasticsearch provider +- [ ] OpenSearch provider +- [ ] Algolia provider diff --git a/migrations/2025-05-21-222840_workspaces/up.sql b/migrations/2025-05-21-222840_workspaces/up.sql index 79d47dd..646dc84 100644 --- a/migrations/2025-05-21-222840_workspaces/up.sql +++ b/migrations/2025-05-21-222840_workspaces/up.sql @@ -498,9 +498,11 @@ CREATE TABLE workspace_webhooks ( -- Event configuration events WEBHOOK_EVENT[] NOT NULL DEFAULT '{}', headers JSONB NOT NULL DEFAULT '{}', + secret TEXT NOT NULL DEFAULT encode(gen_random_bytes(32), 'hex'), CONSTRAINT workspace_webhooks_events_not_empty CHECK (array_length(events, 1) > 0), CONSTRAINT workspace_webhooks_headers_size CHECK (length(headers::TEXT) BETWEEN 2 AND 4096), + CONSTRAINT workspace_webhooks_secret_length CHECK (length(secret) = 64), -- Webhook status status WEBHOOK_STATUS NOT NULL DEFAULT 'active', diff --git a/migrations/2025-05-27-011852_documents/down.sql b/migrations/2025-05-27-011852_documents/down.sql index 821a0f2..b9ff2d3 100644 --- a/migrations/2025-05-27-011852_documents/down.sql +++ b/migrations/2025-05-27-011852_documents/down.sql @@ -1,23 +1,20 @@ -- Drop all objects created in the documents migration -- Drop in reverse order of creation to avoid dependency issues --- Drop functions -DROP FUNCTION IF EXISTS find_duplicate_files(_document_id UUID); +-- Drop tables (indexes dropped automatically with tables) +DROP TABLE IF EXISTS file_annotations; +DROP TABLE IF EXISTS file_chunks; --- Drop views -DROP VIEW IF EXISTS processing_queue; -DROP VIEW IF EXISTS document_processing_summary; +-- Drop trigger before the function it depends on +DROP TRIGGER IF EXISTS files_set_version_trigger ON files; --- Drop tables (indexes and remaining triggers dropped automatically with tables) -DROP TABLE IF EXISTS document_annotations; -DROP TABLE IF EXISTS document_comments; -DROP TABLE IF EXISTS document_chunks; -DROP TABLE IF EXISTS document_files; -DROP TABLE IF EXISTS documents; +-- Drop files table +DROP TABLE IF EXISTS files; + +-- Drop functions (after triggers that depend on them) +DROP FUNCTION IF EXISTS find_duplicate_files(UUID); +DROP FUNCTION IF EXISTS set_file_version_number(); -- Drop enum types DROP TYPE IF EXISTS ANNOTATION_TYPE; -DROP TYPE IF EXISTS CONTENT_SEGMENTATION; -DROP TYPE IF EXISTS REQUIRE_MODE; -DROP TYPE IF EXISTS PROCESSING_STATUS; -DROP TYPE IF EXISTS DOCUMENT_STATUS; +DROP TYPE IF EXISTS FILE_SOURCE; diff --git a/migrations/2025-05-27-011852_documents/up.sql b/migrations/2025-05-27-011852_documents/up.sql index 9a2474d..37da9d5 100644 --- a/migrations/2025-05-27-011852_documents/up.sql +++ b/migrations/2025-05-27-011852_documents/up.sql @@ -1,142 +1,43 @@ --- This migration creates tables for documents, files, processing pipeline, and security features +-- This migration creates tables for files, chunks, and annotations --- Create documents table - Document containers/folders -CREATE TABLE documents ( - -- Primary identifiers - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, - account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - - -- Core attributes - display_name TEXT NOT NULL DEFAULT 'Untitled', - description TEXT DEFAULT NULL, - tags TEXT[] NOT NULL DEFAULT '{}', - - CONSTRAINT documents_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), - CONSTRAINT documents_description_length_max CHECK (length(description) <= 2048), - CONSTRAINT documents_tags_count_max CHECK (array_length(tags, 1) IS NULL OR array_length(tags, 1) <= 32), - - -- Configuration - metadata JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT documents_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 16384), - - -- Lifecycle timestamps - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - deleted_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT documents_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT documents_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT documents_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) -); - --- Set up automatic updated_at trigger -SELECT setup_updated_at('documents'); - --- Create indexes for documents -CREATE INDEX documents_workspace_idx - ON documents (workspace_id, created_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX documents_account_recent_idx - ON documents (account_id, updated_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX documents_tags_search_idx - ON documents USING gin (tags) - WHERE array_length(tags, 1) > 0 AND deleted_at IS NULL; - -CREATE INDEX documents_metadata_search_idx - ON documents USING gin (metadata) - WHERE deleted_at IS NULL; - -CREATE INDEX documents_display_name_trgm_idx - ON documents USING gin (display_name gin_trgm_ops) - WHERE deleted_at IS NULL; - --- Add table and column comments -COMMENT ON TABLE documents IS - 'Document containers for organizing and managing file collections with metadata.'; - -COMMENT ON COLUMN documents.id IS 'Unique document identifier'; -COMMENT ON COLUMN documents.workspace_id IS 'Parent workspace reference'; -COMMENT ON COLUMN documents.account_id IS 'Creating account reference'; -COMMENT ON COLUMN documents.display_name IS 'Human-readable document name (1-255 chars)'; -COMMENT ON COLUMN documents.description IS 'Document description (up to 2048 chars)'; -COMMENT ON COLUMN documents.tags IS 'Classification tags (max 32)'; -COMMENT ON COLUMN documents.metadata IS 'Extended metadata (JSON, 2B-16KB)'; -COMMENT ON COLUMN documents.created_at IS 'Creation timestamp'; -COMMENT ON COLUMN documents.updated_at IS 'Last modification timestamp'; -COMMENT ON COLUMN documents.deleted_at IS 'Soft deletion timestamp'; - --- Create file processing status enum -CREATE TYPE PROCESSING_STATUS AS ENUM ( - 'pending', -- File is queued for processing - 'processing', -- File is currently being processed - 'ready', -- Processing completed, file is ready for use - 'canceled' -- Processing was canceled -); - -COMMENT ON TYPE PROCESSING_STATUS IS - 'File processing pipeline status for tracking processing workflows.'; - --- Create processing requirements enum -CREATE TYPE REQUIRE_MODE AS ENUM ( - 'none', -- No special processing required - 'optical', -- Requires OCR to extract text from images - 'language', -- Requires VLM for advanced content understanding - 'both' -- Requires both OCR and VLM processing +-- Create file source enum +CREATE TYPE FILE_SOURCE AS ENUM ( + 'uploaded', -- File was manually uploaded by a user + 'imported', -- File was imported from an external source + 'generated' -- File was generated by the system (pipeline output) ); -COMMENT ON TYPE REQUIRE_MODE IS - 'Processing requirements for input files based on content type.'; +COMMENT ON TYPE FILE_SOURCE IS + 'Indicates how a file was created in the system.'; --- Create content segmentation enum -CREATE TYPE CONTENT_SEGMENTATION AS ENUM ( - 'none', -- No segmentation applied - 'semantic', -- Semantic-based segmentation - 'fixed' -- Fixed-size segmentation -); - -COMMENT ON TYPE CONTENT_SEGMENTATION IS - 'Content segmentation strategy for document processing.'; - --- Create document files table - Source files for processing -CREATE TABLE document_files ( +-- Create files table (renamed from document_files, standalone without documents container) +CREATE TABLE files ( -- Primary identifiers id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- References - workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, - document_id UUID DEFAULT NULL REFERENCES documents (id) ON DELETE CASCADE, + workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - parent_id UUID DEFAULT NULL REFERENCES document_files (id) ON DELETE SET NULL, + parent_id UUID DEFAULT NULL REFERENCES files (id) ON DELETE SET NULL, + + -- Version tracking (parent_id links to previous version, version_number tracks sequence) + version_number INTEGER NOT NULL DEFAULT 1, + + CONSTRAINT files_version_number_min CHECK (version_number >= 1), -- File metadata display_name TEXT NOT NULL DEFAULT 'Untitled', original_filename TEXT NOT NULL DEFAULT 'Untitled', file_extension TEXT NOT NULL DEFAULT 'txt', + mime_type TEXT DEFAULT NULL, tags TEXT[] NOT NULL DEFAULT '{}', + source FILE_SOURCE NOT NULL DEFAULT 'uploaded', - CONSTRAINT document_files_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), - CONSTRAINT document_files_original_filename_length CHECK (length(original_filename) BETWEEN 1 AND 255), - CONSTRAINT document_files_file_extension_format CHECK (file_extension ~ '^[a-zA-Z0-9]{1,20}$'), - CONSTRAINT document_files_tags_count_max CHECK (array_length(tags, 1) IS NULL OR array_length(tags, 1) <= 32), - - -- Processing configuration - require_mode REQUIRE_MODE NOT NULL DEFAULT 'none', - processing_priority INTEGER NOT NULL DEFAULT 5, - processing_status PROCESSING_STATUS NOT NULL DEFAULT 'pending', - - CONSTRAINT document_files_processing_priority_range CHECK (processing_priority BETWEEN 1 AND 10), - - -- Knowledge extraction configuration - is_indexed BOOLEAN NOT NULL DEFAULT FALSE, - content_segmentation CONTENT_SEGMENTATION NOT NULL DEFAULT 'semantic', - visual_support BOOLEAN NOT NULL DEFAULT FALSE, + CONSTRAINT files_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), + CONSTRAINT files_original_filename_length CHECK (length(original_filename) BETWEEN 1 AND 255), + CONSTRAINT files_file_extension_format CHECK (file_extension ~ '^[a-zA-Z0-9]{1,20}$'), + CONSTRAINT files_mime_type_format CHECK (mime_type IS NULL OR mime_type ~ '^[a-zA-Z0-9\-]+/[a-zA-Z0-9\-\.\+]+$'), + CONSTRAINT files_tags_count_max CHECK (array_length(tags, 1) IS NULL OR array_length(tags, 1) <= 32), -- Storage and integrity file_size_bytes BIGINT NOT NULL, @@ -144,89 +45,114 @@ CREATE TABLE document_files ( storage_path TEXT NOT NULL, storage_bucket TEXT NOT NULL, - CONSTRAINT document_files_file_size_min CHECK (file_size_bytes >= 0), - CONSTRAINT document_files_file_hash_sha256_length CHECK (octet_length(file_hash_sha256) = 32), - CONSTRAINT document_files_storage_path_not_empty CHECK (trim(storage_path) <> ''), - CONSTRAINT document_files_storage_bucket_not_empty CHECK (trim(storage_bucket) <> ''), + CONSTRAINT files_file_size_min CHECK (file_size_bytes >= 0), + CONSTRAINT files_file_hash_sha256_length CHECK (octet_length(file_hash_sha256) = 32), + CONSTRAINT files_storage_path_not_empty CHECK (trim(storage_path) <> ''), + CONSTRAINT files_storage_bucket_not_empty CHECK (trim(storage_bucket) <> ''), -- Configuration metadata JSONB NOT NULL DEFAULT '{}', - CONSTRAINT document_files_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 8192), + CONSTRAINT files_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 65536), -- Lifecycle timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, deleted_at TIMESTAMPTZ DEFAULT NULL, - CONSTRAINT document_files_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT document_files_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT document_files_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) + CONSTRAINT files_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT files_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), + CONSTRAINT files_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) ); -- Set up automatic updated_at trigger -SELECT setup_updated_at('document_files'); +SELECT setup_updated_at('files'); --- Create indexes for document files -CREATE INDEX document_files_processing_status_idx - ON document_files (document_id, processing_status, created_at DESC) +-- Create indexes for files +CREATE INDEX files_workspace_idx + ON files (workspace_id, created_at DESC) WHERE deleted_at IS NULL; -CREATE INDEX document_files_processing_queue_idx - ON document_files (processing_status, processing_priority DESC, created_at ASC) - WHERE processing_status = 'pending' AND deleted_at IS NULL; +CREATE INDEX files_account_idx + ON files (account_id, created_at DESC) + WHERE deleted_at IS NULL; -CREATE INDEX document_files_hash_dedup_idx - ON document_files (file_hash_sha256, file_size_bytes) +CREATE INDEX files_hash_dedup_idx + ON files (file_hash_sha256, file_size_bytes) WHERE deleted_at IS NULL; -CREATE INDEX document_files_tags_search_idx - ON document_files USING gin (tags) +CREATE INDEX files_tags_search_idx + ON files USING gin (tags) WHERE array_length(tags, 1) > 0 AND deleted_at IS NULL; -CREATE INDEX document_files_indexed_idx - ON document_files (is_indexed, content_segmentation) - WHERE is_indexed = TRUE AND deleted_at IS NULL; +CREATE INDEX files_display_name_trgm_idx + ON files USING gin (display_name gin_trgm_ops) + WHERE deleted_at IS NULL; + +CREATE INDEX files_version_chain_idx + ON files (parent_id, version_number DESC) + WHERE parent_id IS NOT NULL AND deleted_at IS NULL; -CREATE INDEX document_files_display_name_trgm_idx - ON document_files USING gin (display_name gin_trgm_ops) +CREATE INDEX files_source_idx + ON files (source, workspace_id) WHERE deleted_at IS NULL; +-- Trigger function to auto-set version_number based on parent +CREATE OR REPLACE FUNCTION set_file_version_number() +RETURNS TRIGGER AS $$ +BEGIN + -- If parent_id is set, calculate version as parent's version + 1 + IF NEW.parent_id IS NOT NULL THEN + SELECT version_number + 1 INTO NEW.version_number + FROM files + WHERE id = NEW.parent_id; + ELSE + -- No parent means version 1 + NEW.version_number := 1; + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER files_set_version_trigger + BEFORE INSERT ON files + FOR EACH ROW + EXECUTE FUNCTION set_file_version_number(); + +COMMENT ON FUNCTION set_file_version_number() IS + 'Automatically sets version_number based on parent file version.'; + -- Add table and column comments -COMMENT ON TABLE document_files IS - 'Source files for document processing with pipeline management.'; - -COMMENT ON COLUMN document_files.id IS 'Unique file identifier'; -COMMENT ON COLUMN document_files.workspace_id IS 'Parent workspace reference (required)'; -COMMENT ON COLUMN document_files.document_id IS 'Parent document reference (optional)'; -COMMENT ON COLUMN document_files.account_id IS 'Uploading account reference'; -COMMENT ON COLUMN document_files.display_name IS 'Display name (1-255 chars)'; -COMMENT ON COLUMN document_files.original_filename IS 'Original upload filename (1-255 chars)'; -COMMENT ON COLUMN document_files.file_extension IS 'File extension (1-20 alphanumeric)'; -COMMENT ON COLUMN document_files.tags IS 'Classification tags (max 32)'; -COMMENT ON COLUMN document_files.require_mode IS 'Processing mode required'; -COMMENT ON COLUMN document_files.processing_priority IS 'Priority 1-10 (10=highest)'; -COMMENT ON COLUMN document_files.processing_status IS 'Current processing status'; -COMMENT ON COLUMN document_files.is_indexed IS 'Whether file content has been indexed for search'; -COMMENT ON COLUMN document_files.content_segmentation IS 'Content segmentation strategy'; -COMMENT ON COLUMN document_files.visual_support IS 'Whether to enable visual content processing'; -COMMENT ON COLUMN document_files.file_size_bytes IS 'File size in bytes'; -COMMENT ON COLUMN document_files.file_hash_sha256 IS 'SHA256 content hash'; -COMMENT ON COLUMN document_files.storage_path IS 'Storage system path'; -COMMENT ON COLUMN document_files.storage_bucket IS 'Storage bucket/container'; -COMMENT ON COLUMN document_files.metadata IS 'Extended metadata (JSON, 2B-8KB)'; -COMMENT ON COLUMN document_files.parent_id IS 'Parent file reference for hierarchical relationships'; -COMMENT ON COLUMN document_files.created_at IS 'Upload timestamp'; -COMMENT ON COLUMN document_files.updated_at IS 'Last modification timestamp'; -COMMENT ON COLUMN document_files.deleted_at IS 'Soft deletion timestamp'; - --- Create document chunks table - Text chunks with vector embeddings for semantic search -CREATE TABLE document_chunks ( +COMMENT ON TABLE files IS + 'Files stored in the system with version tracking and deduplication.'; + +COMMENT ON COLUMN files.id IS 'Unique file identifier'; +COMMENT ON COLUMN files.workspace_id IS 'Parent workspace reference'; +COMMENT ON COLUMN files.account_id IS 'Uploading/creating account reference'; +COMMENT ON COLUMN files.parent_id IS 'Parent file reference for version chains'; +COMMENT ON COLUMN files.version_number IS 'Version number (1 for original, increments via parent_id chain)'; +COMMENT ON COLUMN files.display_name IS 'Display name (1-255 chars)'; +COMMENT ON COLUMN files.original_filename IS 'Original upload filename (1-255 chars)'; +COMMENT ON COLUMN files.file_extension IS 'File extension (1-20 alphanumeric)'; +COMMENT ON COLUMN files.mime_type IS 'MIME type of the file'; +COMMENT ON COLUMN files.tags IS 'Classification tags (max 32)'; +COMMENT ON COLUMN files.source IS 'How the file was created (uploaded, imported, generated)'; +COMMENT ON COLUMN files.file_size_bytes IS 'File size in bytes'; +COMMENT ON COLUMN files.file_hash_sha256 IS 'SHA256 content hash'; +COMMENT ON COLUMN files.storage_path IS 'Storage system path'; +COMMENT ON COLUMN files.storage_bucket IS 'Storage bucket/container'; +COMMENT ON COLUMN files.metadata IS 'Extended metadata (JSON)'; +COMMENT ON COLUMN files.created_at IS 'Upload timestamp'; +COMMENT ON COLUMN files.updated_at IS 'Last modification timestamp'; +COMMENT ON COLUMN files.deleted_at IS 'Soft deletion timestamp'; + +-- Create file chunks table - Text chunks with vector embeddings for semantic search +CREATE TABLE file_chunks ( -- Primary identifiers id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- References - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, + file_id UUID NOT NULL REFERENCES files (id) ON DELETE CASCADE, -- Chunk position and content info chunk_index INTEGER NOT NULL DEFAULT 0, @@ -234,135 +160,63 @@ CREATE TABLE document_chunks ( content_size INTEGER NOT NULL DEFAULT 0, token_count INTEGER NOT NULL DEFAULT 0, - CONSTRAINT document_chunks_chunk_index_min CHECK (chunk_index >= 0), - CONSTRAINT document_chunks_content_sha256_length CHECK (octet_length(content_sha256) = 32), - CONSTRAINT document_chunks_content_size_min CHECK (content_size >= 0), - CONSTRAINT document_chunks_token_count_min CHECK (token_count >= 0), + CONSTRAINT file_chunks_chunk_index_min CHECK (chunk_index >= 0), + CONSTRAINT file_chunks_content_sha256_length CHECK (octet_length(content_sha256) = 32), + CONSTRAINT file_chunks_content_size_min CHECK (content_size >= 0), + CONSTRAINT file_chunks_token_count_min CHECK (token_count >= 0), -- Vector embedding (1536 dimensions for OpenAI ada-002, adjust as needed) embedding VECTOR(1536) NOT NULL, embedding_model TEXT NOT NULL, - CONSTRAINT document_chunks_embedding_model_format CHECK (embedding_model ~ '^[a-zA-Z0-9_\-:/\.]+$'), + CONSTRAINT file_chunks_embedding_model_format CHECK (embedding_model ~ '^[a-zA-Z0-9_\-:/\.]+$'), -- Chunk metadata (positions, page numbers, etc.) metadata JSONB NOT NULL DEFAULT '{}', - CONSTRAINT document_chunks_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), + CONSTRAINT file_chunks_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), -- Lifecycle timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - CONSTRAINT document_chunks_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT file_chunks_updated_after_created CHECK (updated_at >= created_at), -- Unique constraint on file + chunk index - CONSTRAINT document_chunks_file_chunk_unique UNIQUE (file_id, chunk_index) + CONSTRAINT file_chunks_file_chunk_unique UNIQUE (file_id, chunk_index) ); -- Set up automatic updated_at trigger -SELECT setup_updated_at('document_chunks'); +SELECT setup_updated_at('file_chunks'); --- Create indexes for document chunks -CREATE INDEX document_chunks_file_idx - ON document_chunks (file_id, chunk_index ASC); +-- Create indexes for file chunks +CREATE INDEX file_chunks_file_idx + ON file_chunks (file_id, chunk_index ASC); -CREATE INDEX document_chunks_embedded_idx - ON document_chunks (file_id) +CREATE INDEX file_chunks_embedded_idx + ON file_chunks (file_id) WHERE embedding IS NOT NULL; --- Create HNSW index for vector similarity search (L2 distance) -CREATE INDEX document_chunks_embedding_idx - ON document_chunks USING hnsw (embedding vector_cosine_ops) +-- Create HNSW index for vector similarity search (cosine distance) +CREATE INDEX file_chunks_embedding_idx + ON file_chunks USING hnsw (embedding vector_cosine_ops) WHERE embedding IS NOT NULL; -- Add table and column comments -COMMENT ON TABLE document_chunks IS - 'Text chunks extracted from document files with vector embeddings for semantic search.'; - -COMMENT ON COLUMN document_chunks.id IS 'Unique chunk identifier'; -COMMENT ON COLUMN document_chunks.file_id IS 'Parent document file reference'; -COMMENT ON COLUMN document_chunks.chunk_index IS 'Sequential index of chunk within file (0-based)'; -COMMENT ON COLUMN document_chunks.content_sha256 IS 'SHA-256 hash of chunk content'; -COMMENT ON COLUMN document_chunks.content_size IS 'Size of chunk content in bytes'; -COMMENT ON COLUMN document_chunks.token_count IS 'Approximate token count for the chunk'; -COMMENT ON COLUMN document_chunks.embedding IS 'Vector embedding (1536 dimensions)'; -COMMENT ON COLUMN document_chunks.embedding_model IS 'Model used to generate the embedding'; -COMMENT ON COLUMN document_chunks.metadata IS 'Extended metadata (positions, page numbers, etc.)'; -COMMENT ON COLUMN document_chunks.created_at IS 'Chunk creation timestamp'; -COMMENT ON COLUMN document_chunks.updated_at IS 'Last modification timestamp'; - --- Create document comments table - User discussions and annotations -CREATE TABLE document_comments ( - -- Primary identifiers - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - - -- Thread references - parent_comment_id UUID DEFAULT NULL REFERENCES document_comments (id) ON DELETE CASCADE, - reply_to_account_id UUID DEFAULT NULL REFERENCES accounts (id) ON DELETE SET NULL, - - -- Comment content - content TEXT NOT NULL, - - CONSTRAINT document_comments_content_length CHECK (length(trim(content)) BETWEEN 1 AND 10000), - - -- Metadata - metadata JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT document_comments_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), - - -- Lifecycle timestamps - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - deleted_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT document_comments_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT document_comments_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT document_comments_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) -); - --- Set up automatic updated_at trigger -SELECT setup_updated_at('document_comments'); - --- Create indexes for document comments -CREATE INDEX document_comments_file_idx - ON document_comments (file_id, created_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX document_comments_account_idx - ON document_comments (account_id, created_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX document_comments_thread_idx - ON document_comments (parent_comment_id, created_at ASC) - WHERE parent_comment_id IS NOT NULL AND deleted_at IS NULL; - -CREATE INDEX document_comments_reply_to_idx - ON document_comments (reply_to_account_id, created_at DESC) - WHERE reply_to_account_id IS NOT NULL AND deleted_at IS NULL; - -CREATE INDEX document_comments_metadata_idx - ON document_comments USING gin (metadata) - WHERE deleted_at IS NULL; - --- Add table and column comments -COMMENT ON TABLE document_comments IS - 'User comments and discussions on files, supporting threaded conversations and @mentions.'; - -COMMENT ON COLUMN document_comments.id IS 'Unique comment identifier'; -COMMENT ON COLUMN document_comments.file_id IS 'Parent file reference'; -COMMENT ON COLUMN document_comments.account_id IS 'Comment author reference'; -COMMENT ON COLUMN document_comments.parent_comment_id IS 'Parent comment for threaded replies (NULL for top-level)'; -COMMENT ON COLUMN document_comments.reply_to_account_id IS 'Account being replied to (@mention)'; -COMMENT ON COLUMN document_comments.content IS 'Comment text content (1-10000 chars)'; -COMMENT ON COLUMN document_comments.metadata IS 'Extended metadata (JSON, 2B-4KB)'; -COMMENT ON COLUMN document_comments.created_at IS 'Comment creation timestamp'; -COMMENT ON COLUMN document_comments.updated_at IS 'Last edit timestamp'; -COMMENT ON COLUMN document_comments.deleted_at IS 'Soft deletion timestamp'; +COMMENT ON TABLE file_chunks IS + 'Text chunks extracted from files with vector embeddings for semantic search.'; + +COMMENT ON COLUMN file_chunks.id IS 'Unique chunk identifier'; +COMMENT ON COLUMN file_chunks.file_id IS 'Parent file reference'; +COMMENT ON COLUMN file_chunks.chunk_index IS 'Sequential index of chunk within file (0-based)'; +COMMENT ON COLUMN file_chunks.content_sha256 IS 'SHA-256 hash of chunk content'; +COMMENT ON COLUMN file_chunks.content_size IS 'Size of chunk content in bytes'; +COMMENT ON COLUMN file_chunks.token_count IS 'Approximate token count for the chunk'; +COMMENT ON COLUMN file_chunks.embedding IS 'Vector embedding (1536 dimensions)'; +COMMENT ON COLUMN file_chunks.embedding_model IS 'Model used to generate the embedding'; +COMMENT ON COLUMN file_chunks.metadata IS 'Extended metadata (positions, page numbers, etc.)'; +COMMENT ON COLUMN file_chunks.created_at IS 'Chunk creation timestamp'; +COMMENT ON COLUMN file_chunks.updated_at IS 'Last modification timestamp'; -- Create annotation type enum CREATE TYPE ANNOTATION_TYPE AS ENUM ( @@ -371,111 +225,70 @@ CREATE TYPE ANNOTATION_TYPE AS ENUM ( ); COMMENT ON TYPE ANNOTATION_TYPE IS - 'Type classification for document annotations.'; + 'Type classification for file annotations.'; --- Create document annotations table - Annotations for document content -CREATE TABLE document_annotations ( +-- Create file annotations table +CREATE TABLE file_annotations ( -- Primary identifiers id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- References - document_file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, + file_id UUID NOT NULL REFERENCES files (id) ON DELETE CASCADE, account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, -- Annotation content content TEXT NOT NULL, annotation_type ANNOTATION_TYPE NOT NULL DEFAULT 'annotation', - CONSTRAINT document_annotations_content_length CHECK (length(trim(content)) BETWEEN 1 AND 10000), + CONSTRAINT file_annotations_content_length CHECK (length(trim(content)) BETWEEN 1 AND 10000), - -- Metadata + -- Metadata (position, page, bounds, etc.) metadata JSONB NOT NULL DEFAULT '{}', - CONSTRAINT document_annotations_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), + CONSTRAINT file_annotations_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), -- Lifecycle timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, deleted_at TIMESTAMPTZ DEFAULT NULL, - CONSTRAINT document_annotations_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT document_annotations_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT document_annotations_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) + CONSTRAINT file_annotations_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT file_annotations_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), + CONSTRAINT file_annotations_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) ); -- Set up automatic updated_at trigger -SELECT setup_updated_at('document_annotations'); +SELECT setup_updated_at('file_annotations'); --- Create indexes for document annotations -CREATE INDEX document_annotations_file_idx - ON document_annotations (document_file_id, created_at DESC) +-- Create indexes for file annotations +CREATE INDEX file_annotations_file_idx + ON file_annotations (file_id, created_at DESC) WHERE deleted_at IS NULL; -CREATE INDEX document_annotations_account_idx - ON document_annotations (account_id, created_at DESC) +CREATE INDEX file_annotations_account_idx + ON file_annotations (account_id, created_at DESC) WHERE deleted_at IS NULL; -CREATE INDEX document_annotations_type_idx - ON document_annotations (annotation_type, document_file_id) +CREATE INDEX file_annotations_type_idx + ON file_annotations (annotation_type, file_id) WHERE deleted_at IS NULL; -- Add table and column comments -COMMENT ON TABLE document_annotations IS - 'User annotations and highlights on document content.'; - -COMMENT ON COLUMN document_annotations.id IS 'Unique annotation identifier'; -COMMENT ON COLUMN document_annotations.document_file_id IS 'Parent document file reference'; -COMMENT ON COLUMN document_annotations.account_id IS 'Annotation author reference'; -COMMENT ON COLUMN document_annotations.content IS 'Annotation text content (1-10000 chars)'; -COMMENT ON COLUMN document_annotations.annotation_type IS 'Type of annotation (note, highlight, etc.)'; -COMMENT ON COLUMN document_annotations.metadata IS 'Extended metadata including position/location (JSON, 2B-4KB)'; -COMMENT ON COLUMN document_annotations.created_at IS 'Annotation creation timestamp'; -COMMENT ON COLUMN document_annotations.updated_at IS 'Last edit timestamp'; -COMMENT ON COLUMN document_annotations.deleted_at IS 'Soft deletion timestamp'; - --- Create document processing summary view -CREATE VIEW document_processing_summary AS -SELECT - d.id, - d.display_name, - d.workspace_id, - COUNT(df.id) FILTER (WHERE df.deleted_at IS NULL) AS input_files_count, - d.created_at, - d.updated_at -FROM documents d - LEFT JOIN document_files df ON d.id = df.document_id -WHERE d.deleted_at IS NULL -GROUP BY d.id, d.display_name, d.workspace_id, d.created_at, d.updated_at; - -COMMENT ON VIEW document_processing_summary IS - 'Overview of document processing status, metrics, and costs.'; - --- Create processing queue view -CREATE VIEW processing_queue AS -SELECT - df.id, - df.document_id, - d.display_name AS document_name, - d.workspace_id, - df.display_name AS file_name, - df.require_mode, - df.processing_priority, - df.processing_status, - df.file_size_bytes, - df.created_at, - EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - df.created_at)) AS queue_time_seconds -FROM document_files df - JOIN documents d ON df.document_id = d.id -WHERE df.processing_status IN ('pending', 'processing') - AND df.deleted_at IS NULL - AND d.deleted_at IS NULL -ORDER BY df.processing_priority DESC, df.created_at ASC; - -COMMENT ON VIEW processing_queue IS - 'Files queued for processing, ordered by priority and age.'; +COMMENT ON TABLE file_annotations IS + 'User annotations and highlights on file content.'; + +COMMENT ON COLUMN file_annotations.id IS 'Unique annotation identifier'; +COMMENT ON COLUMN file_annotations.file_id IS 'Parent file reference'; +COMMENT ON COLUMN file_annotations.account_id IS 'Annotation author reference'; +COMMENT ON COLUMN file_annotations.content IS 'Annotation text content (1-10000 chars)'; +COMMENT ON COLUMN file_annotations.annotation_type IS 'Type of annotation (annotation, highlight)'; +COMMENT ON COLUMN file_annotations.metadata IS 'Extended metadata including position/location (JSON)'; +COMMENT ON COLUMN file_annotations.created_at IS 'Annotation creation timestamp'; +COMMENT ON COLUMN file_annotations.updated_at IS 'Last edit timestamp'; +COMMENT ON COLUMN file_annotations.deleted_at IS 'Soft deletion timestamp'; -- Create duplicate detection function -CREATE OR REPLACE FUNCTION find_duplicate_files(_document_id UUID DEFAULT NULL) +CREATE OR REPLACE FUNCTION find_duplicate_files(_workspace_id UUID DEFAULT NULL) RETURNS TABLE ( file_hash TEXT, file_size BIGINT, @@ -486,18 +299,18 @@ LANGUAGE plpgsql AS $$ BEGIN RETURN QUERY SELECT - ENCODE(df.file_hash_sha256, 'hex'), - df.file_size_bytes, + ENCODE(f.file_hash_sha256, 'hex'), + f.file_size_bytes, COUNT(*), - ARRAY_AGG(df.id) - FROM document_files df - WHERE (_document_id IS NULL OR df.document_id = _document_id) - AND df.deleted_at IS NULL - GROUP BY df.file_hash_sha256, df.file_size_bytes + ARRAY_AGG(f.id) + FROM files f + WHERE (_workspace_id IS NULL OR f.workspace_id = _workspace_id) + AND f.deleted_at IS NULL + GROUP BY f.file_hash_sha256, f.file_size_bytes HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC; END; $$; COMMENT ON FUNCTION find_duplicate_files(UUID) IS - 'Finds duplicate files by hash and size. Optionally scoped to a specific document.'; + 'Finds duplicate files by hash and size. Optionally scoped to a specific workspace.'; diff --git a/migrations/2026-01-09-002114_studio/down.sql b/migrations/2026-01-09-002114_studio/down.sql deleted file mode 100644 index 18160a6..0000000 --- a/migrations/2026-01-09-002114_studio/down.sql +++ /dev/null @@ -1,10 +0,0 @@ --- Revert studio migration - --- Drop tables in reverse order (respecting foreign key dependencies) -DROP TABLE IF EXISTS studio_operations; -DROP TABLE IF EXISTS studio_tool_calls; -DROP TABLE IF EXISTS studio_sessions; - --- Drop enums -DROP TYPE IF EXISTS STUDIO_TOOL_STATUS; -DROP TYPE IF EXISTS STUDIO_SESSION_STATUS; diff --git a/migrations/2026-01-09-002114_studio/up.sql b/migrations/2026-01-09-002114_studio/up.sql deleted file mode 100644 index 584f089..0000000 --- a/migrations/2026-01-09-002114_studio/up.sql +++ /dev/null @@ -1,202 +0,0 @@ --- Studio: LLM-powered document editing sessions and operations tracking - --- Studio session lifecycle status -CREATE TYPE STUDIO_SESSION_STATUS AS ENUM ( - 'active', - 'paused', - 'archived' -); - -COMMENT ON TYPE STUDIO_SESSION_STATUS IS - 'Lifecycle status for studio editing sessions.'; - --- Studio sessions table definition -CREATE TABLE studio_sessions ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, - account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - primary_file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - - -- Session attributes - display_name TEXT NOT NULL DEFAULT 'Untitled Session', - session_status STUDIO_SESSION_STATUS NOT NULL DEFAULT 'active', - - CONSTRAINT studio_sessions_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), - - -- Model configuration (model name, temperature, max tokens, etc.) - model_config JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT studio_sessions_model_config_size CHECK (length(model_config::TEXT) BETWEEN 2 AND 8192), - - -- Usage statistics - message_count INTEGER NOT NULL DEFAULT 0, - token_count INTEGER NOT NULL DEFAULT 0, - - CONSTRAINT studio_sessions_message_count_min CHECK (message_count >= 0), - CONSTRAINT studio_sessions_token_count_min CHECK (token_count >= 0), - - -- Lifecycle timestamps - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - - CONSTRAINT studio_sessions_updated_after_created CHECK (updated_at >= created_at) -); - --- Triggers for studio_sessions table -SELECT setup_updated_at('studio_sessions'); - --- Indexes for studio_sessions table -CREATE INDEX studio_sessions_workspace_idx - ON studio_sessions (workspace_id, created_at DESC); - -CREATE INDEX studio_sessions_account_idx - ON studio_sessions (account_id, created_at DESC); - -CREATE INDEX studio_sessions_file_idx - ON studio_sessions (primary_file_id); - -CREATE INDEX studio_sessions_status_idx - ON studio_sessions (session_status, workspace_id) - WHERE session_status = 'active'; - --- Comments for studio_sessions table -COMMENT ON TABLE studio_sessions IS - 'LLM-assisted document editing sessions.'; - -COMMENT ON COLUMN studio_sessions.id IS 'Unique session identifier'; -COMMENT ON COLUMN studio_sessions.workspace_id IS 'Reference to the workspace'; -COMMENT ON COLUMN studio_sessions.account_id IS 'Account that created the session'; -COMMENT ON COLUMN studio_sessions.primary_file_id IS 'Primary file being edited in this session'; -COMMENT ON COLUMN studio_sessions.display_name IS 'User-friendly session name (1-255 chars)'; -COMMENT ON COLUMN studio_sessions.session_status IS 'Session lifecycle status (active, paused, archived)'; -COMMENT ON COLUMN studio_sessions.model_config IS 'LLM configuration (model, temperature, etc.)'; -COMMENT ON COLUMN studio_sessions.message_count IS 'Total number of messages exchanged in this session'; -COMMENT ON COLUMN studio_sessions.token_count IS 'Total tokens used in this session'; -COMMENT ON COLUMN studio_sessions.created_at IS 'Timestamp when session was created'; -COMMENT ON COLUMN studio_sessions.updated_at IS 'Timestamp when session was last modified'; - --- Tool execution status -CREATE TYPE STUDIO_TOOL_STATUS AS ENUM ( - 'pending', - 'running', - 'completed', - 'cancelled' -); - -COMMENT ON TYPE STUDIO_TOOL_STATUS IS - 'Execution status for studio tool calls.'; - --- Studio tool calls table definition -CREATE TABLE studio_tool_calls ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - session_id UUID NOT NULL REFERENCES studio_sessions (id) ON DELETE CASCADE, - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, - - -- Tool attributes - tool_name TEXT NOT NULL, - tool_input JSONB NOT NULL DEFAULT '{}', - tool_output JSONB NOT NULL DEFAULT '{}', - tool_status STUDIO_TOOL_STATUS NOT NULL DEFAULT 'pending', - - CONSTRAINT studio_tool_calls_tool_name_length CHECK (length(trim(tool_name)) BETWEEN 1 AND 128), - CONSTRAINT studio_tool_calls_tool_input_size CHECK (length(tool_input::TEXT) BETWEEN 2 AND 65536), - CONSTRAINT studio_tool_calls_tool_output_size CHECK (length(tool_output::TEXT) BETWEEN 2 AND 65536), - - -- Timing - started_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - completed_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT studio_tool_calls_completed_after_started CHECK (completed_at IS NULL OR completed_at >= started_at) -); - --- Indexes for studio_tool_calls table -CREATE INDEX studio_tool_calls_session_idx - ON studio_tool_calls (session_id, started_at DESC); - -CREATE INDEX studio_tool_calls_file_idx - ON studio_tool_calls (file_id, started_at DESC); - -CREATE INDEX studio_tool_calls_status_idx - ON studio_tool_calls (tool_status, started_at DESC) - WHERE tool_status IN ('pending', 'running'); - -CREATE INDEX studio_tool_calls_tool_name_idx - ON studio_tool_calls (tool_name); - --- Comments for studio_tool_calls table -COMMENT ON TABLE studio_tool_calls IS - 'Tool invocations for debugging and usage tracking. Input/output contain references, not document content.'; - -COMMENT ON COLUMN studio_tool_calls.id IS 'Unique tool call identifier'; -COMMENT ON COLUMN studio_tool_calls.session_id IS 'Reference to the studio session'; -COMMENT ON COLUMN studio_tool_calls.file_id IS 'Reference to the file being operated on'; -COMMENT ON COLUMN studio_tool_calls.chunk_id IS 'Optional reference to a specific chunk'; -COMMENT ON COLUMN studio_tool_calls.tool_name IS 'Name of the tool (merge, split, redact, translate, etc.)'; -COMMENT ON COLUMN studio_tool_calls.tool_input IS 'Tool parameters as JSON (references, not content)'; -COMMENT ON COLUMN studio_tool_calls.tool_output IS 'Tool result as JSON (references, not content)'; -COMMENT ON COLUMN studio_tool_calls.tool_status IS 'Execution status (pending, running, completed, cancelled)'; -COMMENT ON COLUMN studio_tool_calls.started_at IS 'Timestamp when tool call was created/started'; -COMMENT ON COLUMN studio_tool_calls.completed_at IS 'Timestamp when tool execution completed'; - --- Studio operations table definition -CREATE TABLE studio_operations ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - tool_call_id UUID NOT NULL REFERENCES studio_tool_calls (id) ON DELETE CASCADE, - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, - - -- Operation attributes - operation_type TEXT NOT NULL, - operation_diff JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT studio_operations_operation_type_length CHECK (length(trim(operation_type)) BETWEEN 1 AND 64), - CONSTRAINT studio_operations_operation_diff_size CHECK (length(operation_diff::TEXT) BETWEEN 2 AND 131072), - - -- Application state - applied BOOLEAN NOT NULL DEFAULT FALSE, - reverted BOOLEAN NOT NULL DEFAULT FALSE, - - CONSTRAINT studio_operations_revert_requires_applied CHECK (NOT reverted OR applied), - - -- Timing - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - applied_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT studio_operations_applied_after_created CHECK (applied_at IS NULL OR applied_at >= created_at) -); - --- Indexes for studio_operations table -CREATE INDEX studio_operations_tool_call_idx - ON studio_operations (tool_call_id); - -CREATE INDEX studio_operations_file_idx - ON studio_operations (file_id, created_at DESC); - -CREATE INDEX studio_operations_pending_idx - ON studio_operations (file_id, applied) - WHERE NOT applied; - --- Comments for studio_operations table -COMMENT ON TABLE studio_operations IS - 'Document operations (diffs) produced by tool calls. Stores positions, not content.'; - -COMMENT ON COLUMN studio_operations.id IS 'Unique operation identifier'; -COMMENT ON COLUMN studio_operations.tool_call_id IS 'Reference to the tool call that produced this operation'; -COMMENT ON COLUMN studio_operations.file_id IS 'Reference to the file being modified'; -COMMENT ON COLUMN studio_operations.chunk_id IS 'Optional reference to a specific chunk'; -COMMENT ON COLUMN studio_operations.operation_type IS 'Type of operation (insert, replace, delete, format, merge, split, etc.)'; -COMMENT ON COLUMN studio_operations.operation_diff IS 'The diff specification as JSON (positions, not content)'; -COMMENT ON COLUMN studio_operations.applied IS 'Whether this operation has been applied to the document'; -COMMENT ON COLUMN studio_operations.reverted IS 'Whether this operation was reverted by the user'; -COMMENT ON COLUMN studio_operations.created_at IS 'Timestamp when operation was created'; -COMMENT ON COLUMN studio_operations.applied_at IS 'Timestamp when operation was applied'; diff --git a/migrations/2026-01-19-045012_pipelines/down.sql b/migrations/2026-01-19-045012_pipelines/down.sql new file mode 100644 index 0000000..fdd315c --- /dev/null +++ b/migrations/2026-01-19-045012_pipelines/down.sql @@ -0,0 +1,11 @@ +-- Revert pipeline tables + +DROP VIEW IF EXISTS pipeline_run_history; +DROP VIEW IF EXISTS active_pipeline_runs; + +DROP TABLE IF EXISTS pipeline_runs; +DROP TABLE IF EXISTS pipelines; + +DROP TYPE IF EXISTS PIPELINE_TRIGGER_TYPE; +DROP TYPE IF EXISTS PIPELINE_RUN_STATUS; +DROP TYPE IF EXISTS PIPELINE_STATUS; diff --git a/migrations/2026-01-19-045012_pipelines/up.sql b/migrations/2026-01-19-045012_pipelines/up.sql new file mode 100644 index 0000000..edb8fb8 --- /dev/null +++ b/migrations/2026-01-19-045012_pipelines/up.sql @@ -0,0 +1,231 @@ +-- Pipeline: Workflow definitions and execution tracking +-- This migration creates tables for user-defined processing pipelines + +-- Pipeline status enum +CREATE TYPE PIPELINE_STATUS AS ENUM ( + 'draft', -- Pipeline is being configured + 'enabled', -- Pipeline is ready to run + 'disabled' -- Pipeline is disabled +); + +COMMENT ON TYPE PIPELINE_STATUS IS + 'Lifecycle status for pipeline definitions.'; + +-- Pipeline run status enum +CREATE TYPE PIPELINE_RUN_STATUS AS ENUM ( + 'queued', -- Run is waiting to start + 'running', -- Run is in progress + 'completed', -- Run finished successfully + 'failed', -- Run failed with error + 'cancelled' -- Run was cancelled by user +); + +COMMENT ON TYPE PIPELINE_RUN_STATUS IS + 'Execution status for pipeline runs.'; + +-- Pipeline run trigger type enum +CREATE TYPE PIPELINE_TRIGGER_TYPE AS ENUM ( + 'manual', -- Manually triggered by user + 'source', -- Triggered by source connector (upload, webhook, etc.) + 'scheduled' -- Triggered by schedule (future) +); + +COMMENT ON TYPE PIPELINE_TRIGGER_TYPE IS + 'How a pipeline run was initiated.'; + +-- Pipeline definitions table +CREATE TABLE pipelines ( + -- Primary identifier + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + + -- References + workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, + account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, + + -- Core attributes + name TEXT NOT NULL, + description TEXT DEFAULT NULL, + status PIPELINE_STATUS NOT NULL DEFAULT 'draft', + + CONSTRAINT pipelines_name_length CHECK (length(trim(name)) BETWEEN 1 AND 255), + CONSTRAINT pipelines_description_length CHECK (description IS NULL OR length(description) <= 4096), + + -- Pipeline definition (flexible JSONB structure) + -- Contains: steps[], input_schema, output_schema, variables, etc. + definition JSONB NOT NULL DEFAULT '{"steps": []}', + + CONSTRAINT pipelines_definition_size CHECK (length(definition::TEXT) BETWEEN 2 AND 1048576), + + -- Configuration + metadata JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipelines_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 65536), + + -- Lifecycle timestamps + created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + deleted_at TIMESTAMPTZ DEFAULT NULL, + + CONSTRAINT pipelines_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT pipelines_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at) +); + +-- Triggers +SELECT setup_updated_at('pipelines'); + +-- Indexes +CREATE INDEX pipelines_workspace_idx + ON pipelines (workspace_id, created_at DESC) + WHERE deleted_at IS NULL; + +CREATE INDEX pipelines_account_idx + ON pipelines (account_id, created_at DESC) + WHERE deleted_at IS NULL; + +CREATE INDEX pipelines_status_idx + ON pipelines (status, workspace_id) + WHERE deleted_at IS NULL; + +CREATE INDEX pipelines_name_trgm_idx + ON pipelines USING gin (name gin_trgm_ops) + WHERE deleted_at IS NULL; + +-- Comments +COMMENT ON TABLE pipelines IS + 'User-defined processing pipeline definitions with step configurations.'; + +COMMENT ON COLUMN pipelines.id IS 'Unique pipeline identifier'; +COMMENT ON COLUMN pipelines.workspace_id IS 'Parent workspace reference'; +COMMENT ON COLUMN pipelines.account_id IS 'Creator account reference'; +COMMENT ON COLUMN pipelines.name IS 'Pipeline name (1-255 chars)'; +COMMENT ON COLUMN pipelines.description IS 'Pipeline description (up to 4096 chars)'; +COMMENT ON COLUMN pipelines.status IS 'Pipeline lifecycle status'; +COMMENT ON COLUMN pipelines.definition IS 'Pipeline definition JSON (steps, input/output schemas, etc.)'; +COMMENT ON COLUMN pipelines.metadata IS 'Extended metadata'; +COMMENT ON COLUMN pipelines.created_at IS 'Creation timestamp'; +COMMENT ON COLUMN pipelines.updated_at IS 'Last modification timestamp'; +COMMENT ON COLUMN pipelines.deleted_at IS 'Soft deletion timestamp'; + +-- Pipeline runs table (execution instances) +CREATE TABLE pipeline_runs ( + -- Primary identifier + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + + -- References + pipeline_id UUID NOT NULL REFERENCES pipelines (id) ON DELETE CASCADE, + workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, + account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, + + -- Run attributes + trigger_type PIPELINE_TRIGGER_TYPE NOT NULL DEFAULT 'manual', + status PIPELINE_RUN_STATUS NOT NULL DEFAULT 'queued', + + -- Input/output configuration for this run + input_config JSONB NOT NULL DEFAULT '{}', + output_config JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipeline_runs_input_config_size CHECK (length(input_config::TEXT) BETWEEN 2 AND 262144), + CONSTRAINT pipeline_runs_output_config_size CHECK (length(output_config::TEXT) BETWEEN 2 AND 262144), + + -- Snapshot of pipeline definition at run time (for reproducibility) + definition_snapshot JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipeline_runs_definition_snapshot_size CHECK (length(definition_snapshot::TEXT) BETWEEN 2 AND 1048576), + + -- Error details (if failed) + error JSONB DEFAULT NULL, + + CONSTRAINT pipeline_runs_error_size CHECK (error IS NULL OR length(error::TEXT) <= 65536), + + -- Metrics + metrics JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipeline_runs_metrics_size CHECK (length(metrics::TEXT) BETWEEN 2 AND 65536), + + -- Timing + started_at TIMESTAMPTZ DEFAULT NULL, + completed_at TIMESTAMPTZ DEFAULT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + + CONSTRAINT pipeline_runs_started_after_created CHECK (started_at IS NULL OR started_at >= created_at), + CONSTRAINT pipeline_runs_completed_after_started CHECK (completed_at IS NULL OR (started_at IS NOT NULL AND completed_at >= started_at)) +); + +-- Indexes +CREATE INDEX pipeline_runs_pipeline_idx + ON pipeline_runs (pipeline_id, created_at DESC); + +CREATE INDEX pipeline_runs_workspace_idx + ON pipeline_runs (workspace_id, created_at DESC); + +CREATE INDEX pipeline_runs_account_idx + ON pipeline_runs (account_id, created_at DESC); + +CREATE INDEX pipeline_runs_status_idx + ON pipeline_runs (status, created_at DESC) + WHERE status IN ('queued', 'running'); + +CREATE INDEX pipeline_runs_trigger_idx + ON pipeline_runs (trigger_type, workspace_id); + +-- Comments +COMMENT ON TABLE pipeline_runs IS + 'Pipeline execution instances with status tracking and metrics.'; + +COMMENT ON COLUMN pipeline_runs.id IS 'Unique run identifier'; +COMMENT ON COLUMN pipeline_runs.pipeline_id IS 'Reference to pipeline definition'; +COMMENT ON COLUMN pipeline_runs.workspace_id IS 'Parent workspace reference'; +COMMENT ON COLUMN pipeline_runs.account_id IS 'Account that triggered the run'; +COMMENT ON COLUMN pipeline_runs.trigger_type IS 'How the run was initiated'; +COMMENT ON COLUMN pipeline_runs.status IS 'Current execution status'; +COMMENT ON COLUMN pipeline_runs.input_config IS 'Runtime input configuration'; +COMMENT ON COLUMN pipeline_runs.output_config IS 'Runtime output configuration'; +COMMENT ON COLUMN pipeline_runs.definition_snapshot IS 'Pipeline definition snapshot at run time'; +COMMENT ON COLUMN pipeline_runs.error IS 'Error details if run failed'; +COMMENT ON COLUMN pipeline_runs.metrics IS 'Run metrics (duration, resources, etc.)'; +COMMENT ON COLUMN pipeline_runs.started_at IS 'When execution started'; +COMMENT ON COLUMN pipeline_runs.completed_at IS 'When execution completed'; +COMMENT ON COLUMN pipeline_runs.created_at IS 'When run was created/queued'; + +-- View for active pipeline runs +CREATE VIEW active_pipeline_runs AS +SELECT + pr.id, + pr.pipeline_id, + p.name AS pipeline_name, + pr.workspace_id, + pr.account_id, + pr.trigger_type, + pr.status, + pr.started_at, + pr.created_at, + EXTRACT(EPOCH FROM (COALESCE(pr.completed_at, current_timestamp) - pr.started_at)) AS duration_seconds +FROM pipeline_runs pr + JOIN pipelines p ON pr.pipeline_id = p.id +WHERE pr.status IN ('queued', 'running') +ORDER BY pr.created_at DESC; + +COMMENT ON VIEW active_pipeline_runs IS + 'Currently active pipeline runs with progress information.'; + +-- View for pipeline run history +CREATE VIEW pipeline_run_history AS +SELECT + pr.id, + pr.pipeline_id, + p.name AS pipeline_name, + pr.workspace_id, + pr.trigger_type, + pr.status, + pr.started_at, + pr.completed_at, + EXTRACT(EPOCH FROM (pr.completed_at - pr.started_at)) AS duration_seconds, + pr.error IS NOT NULL AS has_error, + pr.created_at +FROM pipeline_runs pr + JOIN pipelines p ON pr.pipeline_id = p.id +WHERE pr.status IN ('completed', 'failed', 'cancelled') +ORDER BY pr.completed_at DESC; + +COMMENT ON VIEW pipeline_run_history IS + 'Completed pipeline runs for history and analytics.'; diff --git a/packages/nvisy-dal/README.md b/packages/nvisy-dal/README.md new file mode 100644 index 0000000..84e4f50 --- /dev/null +++ b/packages/nvisy-dal/README.md @@ -0,0 +1,116 @@ +# nvisy-dal + +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + +Data abstraction layer for external integrations. Provides unified async interfaces for storage, databases, and vector stores. + +## Installation + +```bash +# Core package +uv add nvisy-dal + +# With specific providers +uv add "nvisy-dal[postgres,s3,pinecone]" + +# All providers +uv add "nvisy-dal[all]" +``` + +## Available Providers + +| Provider | Extra | Description | +|----------|-------|-------------| +| PostgreSQL | `postgres` | Relational database via asyncpg | +| MySQL | `mysql` | Relational database via aiomysql | +| S3 | `s3` | Object storage (AWS S3, MinIO) | +| GCS | `gcs` | Google Cloud Storage | +| Azure Blob | `azure` | Azure Blob Storage | +| Qdrant | `qdrant` | Vector database | +| Pinecone | `pinecone` | Vector database | + +## Usage + +```python +from nvisy_dal import Provider, DataInput, DataOutput +from nvisy_dal.providers.postgres import PostgresProvider, PostgresCredentials, PostgresParams + +# Connect to provider +provider = await PostgresProvider.connect( + credentials=PostgresCredentials( + host="localhost", + port=5432, + user="postgres", + password="password", + database="mydb", + ), + params=PostgresParams(table="users"), +) + +# Read data +async for record in provider.read(ctx): + print(record) + +# Write data +await provider.write(ctx, records) + +# Disconnect +await provider.disconnect() +``` + +## Architecture + +This package is the Python half of the nvisy DAL system: + +- **Rust (nvisy-dal crate)**: Streaming, observability, unified interface, server integration +- **Python (nvisy-dal package)**: Provider implementations, client libraries, external integrations + +The Rust layer loads this package via PyO3 to delegate actual provider calls to Python. + +## Protocols + +All providers implement these core protocols: + +```python +class Provider(Protocol[Cred, Params]): + @classmethod + async def connect(cls, credentials: Cred, params: Params) -> Self: ... + async def disconnect(self) -> None: ... + +class DataInput(Protocol[T, Ctx]): + async def read(self, ctx: Ctx) -> AsyncIterator[T]: ... + +class DataOutput(Protocol[T, Ctx]): + async def write(self, ctx: Ctx, items: Sequence[T]) -> None: ... +``` + +## Development + +```bash +# Install dev dependencies +uv sync --extra dev + +# Run tests +uv run pytest + +# Type check +uv run pyright + +# Lint +uv run ruff check . +``` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/packages/nvisy-dal/py.typed b/packages/nvisy-dal/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/nvisy-dal/pyproject.toml b/packages/nvisy-dal/pyproject.toml new file mode 100644 index 0000000..9440352 --- /dev/null +++ b/packages/nvisy-dal/pyproject.toml @@ -0,0 +1,51 @@ +[project] +name = "nvisy-dal" +version = "0.1.0" +description = "Data abstraction layer for external integrations" +requires-python = ">=3.12" +dependencies = ["pydantic>=2.10"] + +[project.optional-dependencies] +s3 = ["boto3>=1.35", "boto3-stubs[s3]"] +gcs = ["google-cloud-storage>=2.18"] +azure = ["azure-storage-blob>=12.23"] +postgres = ["asyncpg>=0.30", "asyncpg-stubs>=0.30"] +mysql = ["aiomysql>=0.2"] +qdrant = ["qdrant-client>=1.12"] +pinecone = ["pinecone>=5.0"] +all = ["nvisy-dal[s3,gcs,azure,postgres,mysql,qdrant,pinecone]"] +dev = ["nvisy-dal[all]", "pytest>=8.0", "pytest-asyncio>=0.24", "moto>=5.0"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/nvisy_dal"] + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["ALL"] +ignore = ["D", "COM812", "ISC001"] + +[tool.ruff.lint.isort] +known-first-party = ["nvisy_dal"] + +[tool.basedpyright] +pythonVersion = "3.12" +typeCheckingMode = "strict" +# Third-party libraries (boto3, pinecone) have incomplete type stubs with **kwargs: Unknown +reportUnknownMemberType = "warning" + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" + +[dependency-groups] +dev = [ + "pyright>=1.1.408", + "ruff>=0.14.14", +] diff --git a/packages/nvisy-dal/pyrightconfig.json b/packages/nvisy-dal/pyrightconfig.json new file mode 100644 index 0000000..8fd8643 --- /dev/null +++ b/packages/nvisy-dal/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "venvPath": ".", + "venv": ".venv" +} diff --git a/packages/nvisy-dal/src/nvisy_dal/__init__.py b/packages/nvisy-dal/src/nvisy_dal/__init__.py new file mode 100644 index 0000000..0eb72d3 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/__init__.py @@ -0,0 +1,12 @@ +"""Data abstraction layer for external integrations.""" + +from nvisy_dal.errors import DalError, ErrorKind +from nvisy_dal.protocols import DataInput, DataOutput, Provider + +__all__ = [ + "DalError", + "DataInput", + "DataOutput", + "ErrorKind", + "Provider", +] diff --git a/packages/nvisy-dal/src/nvisy_dal/errors.py b/packages/nvisy-dal/src/nvisy_dal/errors.py new file mode 100644 index 0000000..4a40404 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/errors.py @@ -0,0 +1,37 @@ +"""Error types for provider operations.""" + +from enum import StrEnum +from typing import final, override + + +class ErrorKind(StrEnum): + """Classification of provider errors.""" + + CONNECTION = "connection" + NOT_FOUND = "not_found" + INVALID_INPUT = "invalid_input" + TIMEOUT = "timeout" + PROVIDER = "provider" + + +@final +class DalError(Exception): + """Base error for all provider operations.""" + + __slots__ = ("kind", "message", "source") + + def __init__( + self, + message: str, + *, + kind: ErrorKind = ErrorKind.PROVIDER, + source: BaseException | None = None, + ) -> None: + super().__init__(message) + self.message = message + self.kind = kind + self.source = source + + @override + def __repr__(self) -> str: + return f"DalError({self.message!r}, kind={self.kind!r})" diff --git a/packages/nvisy-dal/src/nvisy_dal/generated/__init__.py b/packages/nvisy-dal/src/nvisy_dal/generated/__init__.py new file mode 100644 index 0000000..92878c8 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/generated/__init__.py @@ -0,0 +1,48 @@ +"""Generated types from Rust JSON schemas. + +This module contains Pydantic models generated from the Rust schema definitions. +Do not edit manually - regenerate with `make codegen`. +""" + +from nvisy_dal.generated.contexts import ObjectContext, RelationalContext, VectorContext +from nvisy_dal.generated.datatypes import ( + Document, + Edge, + Embedding, + Graph, + JsonValue, + Message, + Metadata, + Node, + Object, + Record, +) +from nvisy_dal.generated.params import ( + DistanceMetric, + ObjectParams, + RelationalParams, + VectorParams, +) + +__all__ = [ + # Contexts (runtime state) + "ObjectContext", + "RelationalContext", + "VectorContext", + # Params (configuration) + "DistanceMetric", + "ObjectParams", + "RelationalParams", + "VectorParams", + # Data types + "Document", + "Edge", + "Embedding", + "Graph", + "JsonValue", + "Message", + "Metadata", + "Node", + "Object", + "Record", +] diff --git a/packages/nvisy-dal/src/nvisy_dal/generated/contexts.py b/packages/nvisy-dal/src/nvisy_dal/generated/contexts.py new file mode 100644 index 0000000..34af2b6 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/generated/contexts.py @@ -0,0 +1,29 @@ +"""Context types for provider operations. + +Generated from Rust schemas. Do not edit manually. +""" + +from pydantic import BaseModel + + +class ObjectContext(BaseModel, frozen=True): + """Context for object storage operations (S3, GCS, Azure Blob).""" + + prefix: str | None = None + token: str | None = None + limit: int | None = None + + +class RelationalContext(BaseModel, frozen=True): + """Context for relational database operations (Postgres, MySQL).""" + + cursor: str | None = None + tiebreaker: str | None = None + limit: int | None = None + + +class VectorContext(BaseModel, frozen=True): + """Context for vector database operations (Qdrant, Pinecone, pgvector).""" + + token: str | None = None + limit: int | None = None diff --git a/packages/nvisy-dal/src/nvisy_dal/generated/datatypes.py b/packages/nvisy-dal/src/nvisy_dal/generated/datatypes.py new file mode 100644 index 0000000..e8b843b --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/generated/datatypes.py @@ -0,0 +1,77 @@ +"""Data types for provider input/output. + +Generated from Rust schemas. Do not edit manually. +""" + +from pydantic import BaseModel, Field + +# JSON-compatible value type (matches serde_json::Value) +type JsonValue = str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"] + +# Metadata associated with data items +type Metadata = dict[str, JsonValue] + + +class Object(BaseModel): + """An object representing a file or binary data (S3, GCS, Azure Blob).""" + + path: str + data: bytes + content_type: str | None = None + metadata: Metadata = Field(default_factory=dict) + + +class Document(BaseModel): + """A document with flexible JSON content.""" + + id: str + content: JsonValue + metadata: Metadata = Field(default_factory=dict) + + +class Embedding(BaseModel): + """A vector embedding with metadata.""" + + id: str + vector: list[float] + metadata: Metadata = Field(default_factory=dict) + + +class Record(BaseModel): + """A record representing a row in a relational table.""" + + columns: dict[str, JsonValue] = Field(default_factory=dict) + + +class Message(BaseModel): + """A message from a queue or stream.""" + + id: str + payload: bytes + headers: dict[str, str] = Field(default_factory=dict) + timestamp: str | None = None + + +class Node(BaseModel): + """A node in a graph.""" + + id: str + labels: list[str] = Field(default_factory=list) + properties: dict[str, JsonValue] = Field(default_factory=dict) + + +class Edge(BaseModel): + """An edge in a graph.""" + + id: str + from_: str = Field(alias="from") + to: str + label: str + properties: dict[str, JsonValue] = Field(default_factory=dict) + + +class Graph(BaseModel): + """A graph containing nodes and edges.""" + + nodes: list[Node] = Field(default_factory=list) + edges: list[Edge] = Field(default_factory=list) diff --git a/packages/nvisy-dal/src/nvisy_dal/generated/params.py b/packages/nvisy-dal/src/nvisy_dal/generated/params.py new file mode 100644 index 0000000..db7b784 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/generated/params.py @@ -0,0 +1,42 @@ +"""Parameter types for provider configuration. + +Generated from Rust schemas. Do not edit manually. +""" + +from enum import Enum + +from pydantic import BaseModel, Field + + +class RelationalParams(BaseModel, frozen=True): + """Common parameters for relational database operations.""" + + table: str | None = None + cursor_column: str | None = None + tiebreaker_column: str | None = None + batch_size: int = Field(default=1000) + + +class ObjectParams(BaseModel, frozen=True): + """Common parameters for object storage operations.""" + + bucket: str | None = None + prefix: str | None = None + batch_size: int = Field(default=1000) + + +class DistanceMetric(str, Enum): + """Distance metric for vector similarity search.""" + + COSINE = "cosine" + EUCLIDEAN = "euclidean" + DOT_PRODUCT = "dot_product" + + +class VectorParams(BaseModel, frozen=True): + """Common parameters for vector database operations.""" + + collection: str | None = None + dimension: int | None = None + metric: DistanceMetric = DistanceMetric.COSINE + batch_size: int = Field(default=1000) diff --git a/packages/nvisy-dal/src/nvisy_dal/protocols.py b/packages/nvisy-dal/src/nvisy_dal/protocols.py new file mode 100644 index 0000000..c8a9a42 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/protocols.py @@ -0,0 +1,42 @@ +"""Core protocols for data providers.""" + +from collections.abc import AsyncIterator, Sequence +from typing import Protocol, Self, TypeVar, runtime_checkable + +T_co = TypeVar("T_co", covariant=True) +T_contra = TypeVar("T_contra", contravariant=True) +Ctx_contra = TypeVar("Ctx_contra", contravariant=True) +Cred_contra = TypeVar("Cred_contra", contravariant=True) +Params_contra = TypeVar("Params_contra", contravariant=True) + + +@runtime_checkable +class DataInput(Protocol[T_co, Ctx_contra]): + """Protocol for reading data from external sources.""" + + async def read(self, ctx: Ctx_contra) -> AsyncIterator[T_co]: + """Yield items from the source based on context.""" + ... + + +@runtime_checkable +class DataOutput(Protocol[T_contra, Ctx_contra]): + """Protocol for writing data to external sinks.""" + + async def write(self, ctx: Ctx_contra, items: Sequence[T_contra]) -> None: + """Write a batch of items to the sink.""" + ... + + +@runtime_checkable +class Provider(Protocol[Cred_contra, Params_contra]): + """Protocol for provider lifecycle management.""" + + @classmethod + async def connect(cls, credentials: Cred_contra, params: Params_contra) -> Self: + """Establish connection to the external service.""" + ... + + async def disconnect(self) -> None: + """Release resources and close connections.""" + ... diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py b/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py new file mode 100644 index 0000000..7510fd5 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py @@ -0,0 +1,18 @@ +"""Provider implementations for external services. + +Each provider module exports a `Provider` class alias for the main provider class, +along with its credentials, params, and context types. + +Available providers (require optional dependencies): +- postgres: PostgreSQL via asyncpg +- s3: AWS S3 / MinIO via boto3 +- pinecone: Pinecone vector database +""" + +from nvisy_dal.providers import pinecone, postgres, s3 + +__all__ = [ + "pinecone", + "postgres", + "s3", +] diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/pinecone.py b/packages/nvisy-dal/src/nvisy_dal/providers/pinecone.py new file mode 100644 index 0000000..223359e --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/pinecone.py @@ -0,0 +1,104 @@ +"""Pinecone vector database provider.""" + +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING, ClassVar, Self, cast + +from pydantic import BaseModel + +from nvisy_dal.errors import DalError, ErrorKind + +if TYPE_CHECKING: + from pinecone import Pinecone + from pinecone.db_data.index import Index + +try: + from pinecone import Pinecone, UpsertResponse, Vector +except ImportError as e: + _msg = "pinecone is required for Pinecone support. Install with: uv add 'nvisy-dal[pinecone]'" + raise ImportError(_msg) from e + +# Pinecone metadata value types +type MetadataValue = str | int | float | list[str] | list[int] | list[float] +type Metadata = Mapping[str, MetadataValue] + + +class PineconeCredentials(BaseModel): + """Credentials for Pinecone connection.""" + + api_key: str + + +class PineconeParams(BaseModel): + """Parameters for Pinecone operations.""" + + index_name: str + namespace: str = "" + + +class PineconeVector(BaseModel): + """Representation of a Pinecone vector.""" + + id: str + values: list[float] + metadata: dict[str, MetadataValue] | None = None + + +class PineconeProvider: + """Pinecone provider for vector upsert operations.""" + + __slots__: ClassVar[tuple[str, str, str]] = ("_client", "_index", "_params") + + _client: "Pinecone" + _index: "Index" + _params: PineconeParams + + def __init__(self, client: "Pinecone", index: "Index", params: PineconeParams) -> None: + self._client = client + self._index = index + self._params = params + + @classmethod + async def connect(cls, credentials: PineconeCredentials, params: PineconeParams) -> Self: + """Create Pinecone client and connect to index.""" + try: + client = Pinecone(api_key=credentials.api_key) + index = client.Index(params.index_name) # pyright: ignore[reportUnknownMemberType] + # Verify connection + _ = index.describe_index_stats() # pyright: ignore[reportUnknownMemberType] + except Exception as e: + msg = f"Failed to connect to Pinecone: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + + return cls(client, index, params) + + async def disconnect(self) -> None: + """Close the Pinecone client (no-op).""" + + async def upsert(self, vectors: Sequence[PineconeVector]) -> int: + """Upsert vectors to Pinecone. Returns count of upserted vectors.""" + if not vectors: + return 0 + + try: + records = [Vector(id=v.id, values=v.values, metadata=v.metadata) for v in vectors] + + upserted = 0 + batch_size = 100 + for i in range(0, len(records), batch_size): + batch = list(records[i : i + batch_size]) + response = cast( + UpsertResponse, + self._index.upsert( # pyright: ignore[reportUnknownMemberType] + vectors=batch, + namespace=self._params.namespace, + ), + ) + upserted += response.upserted_count or len(batch) + except Exception as e: + msg = f"Failed to upsert to Pinecone: {e}" + raise DalError(msg, source=e) from e + else: + return upserted + + +Provider = PineconeProvider diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py b/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py new file mode 100644 index 0000000..ce7b09b --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py @@ -0,0 +1,173 @@ +"""PostgreSQL provider using asyncpg.""" + +from collections.abc import AsyncIterator, Sequence +from typing import TYPE_CHECKING, ClassVar, Self + +from pydantic import BaseModel + +from nvisy_dal.errors import DalError, ErrorKind + +if TYPE_CHECKING: + from asyncpg import Pool + +try: + import asyncpg +except ImportError as e: + _msg = "asyncpg is required for PostgreSQL support. Install with: uv add 'nvisy-dal[postgres]'" + raise ImportError(_msg) from e + + +class PostgresCredentials(BaseModel): + """Credentials for PostgreSQL connection. + + Uses a connection string (DSN) format: postgres://user:pass@host:port/database + """ + + dsn: str + + +class PostgresParams(BaseModel): + """Parameters for PostgreSQL operations.""" + + table: str + schema_name: str = "public" + batch_size: int = 1000 + + +class PostgresContext(BaseModel): + """Context for read/write operations.""" + + columns: list[str] | None = None + where: dict[str, object] | None = None + order_by: str | None = None + limit: int | None = None + offset: int | None = None + + +class PostgresProvider: + """PostgreSQL provider for relational data operations.""" + + __slots__: ClassVar[tuple[str, str]] = ("_params", "_pool") + + _params: PostgresParams + _pool: "Pool" + + def __init__(self, pool: "Pool", params: PostgresParams) -> None: + self._pool = pool + self._params = params + + @classmethod + async def connect( + cls, + credentials: PostgresCredentials, + params: PostgresParams, + ) -> Self: + """Establish connection pool to PostgreSQL.""" + try: + pool = await asyncpg.create_pool( + dsn=credentials.dsn, + min_size=1, + max_size=10, + ) + except Exception as e: + msg = f"Failed to connect to PostgreSQL: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + + return cls(pool, params) + + async def disconnect(self) -> None: + """Close the connection pool.""" + await self._pool.close() + + async def read(self, ctx: PostgresContext) -> AsyncIterator[dict[str, object]]: + """Read records from the database using parameterized queries.""" + try: + async with self._pool.acquire() as conn: + # Build query with proper parameter binding + columns = ", ".join(f'"{c}"' for c in ctx.columns) if ctx.columns else "*" + table = f'"{self._params.schema_name}"."{self._params.table}"' + + query_parts: list[str] = [f"SELECT {columns} FROM {table}"] # noqa: S608 + params: list[object] = [] + + if ctx.where: + conditions: list[str] = [] + for key, value in ctx.where.items(): + if value is None: + conditions.append(f'"{key}" IS NULL') + else: + params.append(value) + conditions.append(f'"{key}" = ${len(params)}') + if conditions: + query_parts.append("WHERE " + " AND ".join(conditions)) + + if ctx.order_by: + # Order by should be validated/sanitized by caller + query_parts.append(f"ORDER BY {ctx.order_by}") + + if ctx.limit is not None: + params.append(ctx.limit) + query_parts.append(f"LIMIT ${len(params)}") + + if ctx.offset is not None: + params.append(ctx.offset) + query_parts.append(f"OFFSET ${len(params)}") + + query = " ".join(query_parts) + async for record in conn.cursor(query, *params): + yield dict(record) + except Exception as e: + msg = f"Failed to read from PostgreSQL: {e}" + raise DalError(msg, source=e) from e + + async def write(self, items: Sequence[dict[str, object]]) -> None: + """Write records to the database.""" + if not items: + return + + columns = list(items[0].keys()) + placeholders = ", ".join(f"${i + 1}" for i in range(len(columns))) + column_names = ", ".join(f'"{c}"' for c in columns) + table = f'"{self._params.schema_name}"."{self._params.table}"' + query = f"INSERT INTO {table} ({column_names}) VALUES ({placeholders})" # noqa: S608 + + try: + async with self._pool.acquire() as conn: + for i in range(0, len(items), self._params.batch_size): + batch = items[i : i + self._params.batch_size] + await conn.executemany(query, [tuple(item.values()) for item in batch]) + except Exception as e: + msg = f"Failed to write to PostgreSQL: {e}" + raise DalError(msg, source=e) from e + + async def execute(self, query: str, *args: object) -> str: + """Execute a raw SQL query.""" + try: + async with self._pool.acquire() as conn: + return await conn.execute(query, *args) + except Exception as e: + msg = f"Failed to execute query: {e}" + raise DalError(msg, source=e) from e + + async def fetch_one(self, query: str, *args: object) -> dict[str, object] | None: + """Fetch a single record.""" + try: + async with self._pool.acquire() as conn: + record = await conn.fetchrow(query, *args) + return dict(record) if record else None + except Exception as e: + msg = f"Failed to fetch record: {e}" + raise DalError(msg, source=e) from e + + async def fetch_all(self, query: str, *args: object) -> list[dict[str, object]]: + """Fetch all records.""" + try: + async with self._pool.acquire() as conn: + records = await conn.fetch(query, *args) + return [dict(record) for record in records] + except Exception as e: + msg = f"Failed to fetch records: {e}" + raise DalError(msg, source=e) from e + + +Provider = PostgresProvider diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/s3.py b/packages/nvisy-dal/src/nvisy_dal/providers/s3.py new file mode 100644 index 0000000..ed1bd62 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/s3.py @@ -0,0 +1,242 @@ +"""S3 provider using boto3.""" + +from collections.abc import AsyncIterator, Sequence +from typing import TYPE_CHECKING, ClassVar, Self + +from pydantic import BaseModel + +from nvisy_dal.errors import DalError, ErrorKind + +if TYPE_CHECKING: + from mypy_boto3_s3 import S3Client + +try: + import boto3 + from botocore.exceptions import ClientError +except ImportError as e: + _msg = "boto3 is required for S3 support. Install with: uv add 'nvisy-dal[s3]'" + raise ImportError(_msg) from e + + +class S3Credentials(BaseModel): + """Credentials for S3 connection.""" + + access_key_id: str + secret_access_key: str + region: str = "us-east-1" + endpoint_url: str | None = None + + +class S3Params(BaseModel): + """Parameters for S3 operations.""" + + bucket: str + prefix: str = "" + + +class S3Context(BaseModel): + """Context for read/write operations.""" + + key: str | None = None + prefix: str | None = None + max_keys: int = 1000 + content_type: str = "application/octet-stream" + + +class S3Object(BaseModel): + """Representation of an S3 object.""" + + key: str + size: int + last_modified: str + etag: str + content: bytes | None = None + + +class S3Provider: + """S3 provider for object storage operations.""" + + __slots__: ClassVar[tuple[str, str]] = ("_client", "_params") + + _client: "S3Client" + _params: S3Params + + def __init__(self, client: "S3Client", params: S3Params) -> None: + self._client = client + self._params = params + + @classmethod + async def connect(cls, credentials: S3Credentials, params: S3Params) -> Self: + """Create S3 client.""" + try: + client: S3Client = boto3.client( # pyright: ignore[reportUnknownMemberType] + "s3", + aws_access_key_id=credentials.access_key_id, + aws_secret_access_key=credentials.secret_access_key, + region_name=credentials.region, + endpoint_url=credentials.endpoint_url, + ) + # Verify connection by checking bucket exists + _ = client.head_bucket(Bucket=params.bucket) + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code", "Unknown") + if error_code == "404": + msg = f"Bucket '{params.bucket}' not found" + raise DalError(msg, kind=ErrorKind.NOT_FOUND, source=e) from e + msg = f"Failed to connect to S3: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + except Exception as e: + msg = f"Failed to connect to S3: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + + return cls(client, params) + + async def disconnect(self) -> None: + """Close the S3 client (no-op for boto3).""" + + async def read(self, ctx: S3Context) -> AsyncIterator[S3Object]: + """List and optionally read objects from S3.""" + prefix = ctx.prefix or self._params.prefix + continuation_token: str | None = None + + try: + while True: + if continuation_token: + response = self._client.list_objects_v2( + Bucket=self._params.bucket, + Prefix=prefix, + MaxKeys=ctx.max_keys, + ContinuationToken=continuation_token, + ) + else: + response = self._client.list_objects_v2( + Bucket=self._params.bucket, + Prefix=prefix, + MaxKeys=ctx.max_keys, + ) + + for obj in response.get("Contents", []): + obj_key = obj.get("Key") + obj_size = obj.get("Size") + obj_modified = obj.get("LastModified") + obj_etag = obj.get("ETag") + + if not obj_key or obj_size is None or not obj_modified or not obj_etag: + continue + + content = None + if ctx.key and obj_key == ctx.key: + get_response = self._client.get_object( + Bucket=self._params.bucket, + Key=obj_key, + ) + content = get_response["Body"].read() + + yield S3Object( + key=obj_key, + size=obj_size, + last_modified=obj_modified.isoformat(), + etag=obj_etag.strip('"'), + content=content, + ) + + if not response.get("IsTruncated"): + break + + continuation_token = response.get("NextContinuationToken") + + except ClientError as e: + msg = f"Failed to read from S3: {e}" + raise DalError(msg, source=e) from e + + async def write(self, ctx: S3Context, items: Sequence[S3Object]) -> None: + """Write objects to S3.""" + try: + for item in items: + if item.content is None: + continue + + key = self._resolve_key(item.key) + _ = self._client.put_object( + Bucket=self._params.bucket, + Key=key, + Body=item.content, + ContentType=ctx.content_type, + ) + except ClientError as e: + msg = f"Failed to write to S3: {e}" + raise DalError(msg, source=e) from e + + async def get(self, key: str) -> bytes: + """Get object content by key.""" + try: + full_key = self._resolve_key(key) + response = self._client.get_object( + Bucket=self._params.bucket, + Key=full_key, + ) + return response["Body"].read() + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code", "Unknown") + if error_code == "NoSuchKey": + msg = f"Object '{key}' not found" + raise DalError(msg, kind=ErrorKind.NOT_FOUND, source=e) from e + msg = f"Failed to get object: {e}" + raise DalError(msg, source=e) from e + + async def put( + self, + key: str, + content: bytes, + content_type: str = "application/octet-stream", + ) -> None: + """Put object content by key.""" + try: + full_key = self._resolve_key(key) + _ = self._client.put_object( + Bucket=self._params.bucket, + Key=full_key, + Body=content, + ContentType=content_type, + ) + except ClientError as e: + msg = f"Failed to put object: {e}" + raise DalError(msg, source=e) from e + + async def delete(self, key: str) -> None: + """Delete object by key.""" + try: + full_key = self._resolve_key(key) + _ = self._client.delete_object( + Bucket=self._params.bucket, + Key=full_key, + ) + except ClientError as e: + msg = f"Failed to remove object: {e}" + raise DalError(msg, source=e) from e + + async def exists(self, key: str) -> bool: + """Check if object exists.""" + try: + full_key = self._resolve_key(key) + _ = self._client.head_object( + Bucket=self._params.bucket, + Key=full_key, + ) + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code", "Unknown") + if error_code == "404": + return False + msg = f"Failed to check object existence: {e}" + raise DalError(msg, source=e) from e + else: + return True + + def _resolve_key(self, key: str) -> str: + """Resolve key with prefix if needed.""" + if self._params.prefix and not key.startswith(self._params.prefix): + return f"{self._params.prefix}{key}" + return key + + +Provider = S3Provider diff --git a/packages/nvisy-dal/src/nvisy_dal/py.typed b/packages/nvisy-dal/src/nvisy_dal/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/nvisy-dal/uv.lock b/packages/nvisy-dal/uv.lock new file mode 100644 index 0000000..6c5d179 --- /dev/null +++ b/packages/nvisy-dal/uv.lock @@ -0,0 +1,1460 @@ +version = 1 +revision = 1 +requires-python = ">=3.12" +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version < '3.13'", +] + +[[package]] +name = "aiomysql" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pymysql" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/e0/302aeffe8d90853556f47f3106b89c16cc2ec2a4d269bdfd82e3f4ae12cc/aiomysql-0.3.2.tar.gz", hash = "sha256:72d15ef5cfc34c03468eb41e1b90adb9fd9347b0b589114bd23ead569a02ac1a", size = 108311 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/af/aae0153c3e28712adaf462328f6c7a3c196a1c1c27b491de4377dd3e6b52/aiomysql-0.3.2-py3-none-any.whl", hash = "sha256:c82c5ba04137d7afd5c693a258bea8ead2aad77101668044143a991e04632eb2", size = 71834 }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592 }, +] + +[[package]] +name = "asyncpg" +version = "0.31.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042 }, + { url = "https://files.pythonhosted.org/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504 }, + { url = "https://files.pythonhosted.org/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241 }, + { url = "https://files.pythonhosted.org/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321 }, + { url = "https://files.pythonhosted.org/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685 }, + { url = "https://files.pythonhosted.org/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858 }, + { url = "https://files.pythonhosted.org/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852 }, + { url = "https://files.pythonhosted.org/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175 }, + { url = "https://files.pythonhosted.org/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111 }, + { url = "https://files.pythonhosted.org/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928 }, + { url = "https://files.pythonhosted.org/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067 }, + { url = "https://files.pythonhosted.org/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156 }, + { url = "https://files.pythonhosted.org/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636 }, + { url = "https://files.pythonhosted.org/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079 }, + { url = "https://files.pythonhosted.org/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606 }, + { url = "https://files.pythonhosted.org/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569 }, + { url = "https://files.pythonhosted.org/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867 }, + { url = "https://files.pythonhosted.org/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349 }, + { url = "https://files.pythonhosted.org/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428 }, + { url = "https://files.pythonhosted.org/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678 }, + { url = "https://files.pythonhosted.org/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505 }, + { url = "https://files.pythonhosted.org/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744 }, + { url = "https://files.pythonhosted.org/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251 }, + { url = "https://files.pythonhosted.org/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901 }, + { url = "https://files.pythonhosted.org/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280 }, + { url = "https://files.pythonhosted.org/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931 }, + { url = "https://files.pythonhosted.org/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608 }, + { url = "https://files.pythonhosted.org/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738 }, + { url = "https://files.pythonhosted.org/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026 }, + { url = "https://files.pythonhosted.org/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426 }, + { url = "https://files.pythonhosted.org/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495 }, + { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062 }, +] + +[[package]] +name = "asyncpg-stubs" +version = "0.31.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asyncpg" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/e1/a51adefd76533eeff03d442bb4acbc96c2e27e04c85ce4be410b2ea92f33/asyncpg_stubs-0.31.1.tar.gz", hash = "sha256:6d7342417f867365c98b67d5ae40cb57ce6b2a9eb921fff39d9296961fca18be", size = 20591 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/ab/2301aace8c32be52832f3af75aadfd3c8516b8e7764ba8fa82c6008a99aa/asyncpg_stubs-0.31.1-py3-none-any.whl", hash = "sha256:96c0cf3786948f313207b990d26bf3430daf385ca2913ba65d9dd0ede6bf8bf4", size = 27651 }, +] + +[[package]] +name = "azure-core" +version = "1.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/1b/e503e08e755ea94e7d3419c9242315f888fc664211c90d032e40479022bf/azure_core-1.38.0.tar.gz", hash = "sha256:8194d2682245a3e4e3151a667c686464c3786fed7918b394d035bdcd61bb5993", size = 363033 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/d8/b8fcba9464f02b121f39de2db2bf57f0b216fe11d014513d666e8634380d/azure_core-1.38.0-py3-none-any.whl", hash = "sha256:ab0c9b2cd71fecb1842d52c965c95285d3cfb38902f6766e4a471f1cd8905335", size = 217825 }, +] + +[[package]] +name = "azure-storage-blob" +version = "12.28.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "cryptography" }, + { name = "isodate" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/24/072ba8e27b0e2d8fec401e9969b429d4f5fc4c8d4f0f05f4661e11f7234a/azure_storage_blob-12.28.0.tar.gz", hash = "sha256:e7d98ea108258d29aa0efbfd591b2e2075fa1722a2fae8699f0b3c9de11eff41", size = 604225 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/3a/6ef2047a072e54e1142718d433d50e9514c999a58f51abfff7902f3a72f8/azure_storage_blob-12.28.0-py3-none-any.whl", hash = "sha256:00fb1db28bf6a7b7ecaa48e3b1d5c83bfadacc5a678b77826081304bd87d6461", size = 431499 }, +] + +[[package]] +name = "boto3" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d0/69/c0d4cc77add3cdf66f8573555d71dc23ba32dfe77df40e1c91385f7a9bdc/boto3-1.42.34.tar.gz", hash = "sha256:75d7443c81a029283442fad138629be1eefaa3e6d430c28118a0f4cdbd57855d", size = 112876 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/55/25c543864abc270f5fdd7814fa7b69fd23de1c40fb3d7993f4b6391f8d3b/boto3-1.42.34-py3-none-any.whl", hash = "sha256:db3fb539e3f806b911ec4ca991f2f8bff333c5f0b87132a82e28b521fc5ec164", size = 140574 }, +] + +[[package]] +name = "boto3-stubs" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore-stubs" }, + { name = "types-s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/e4/959e63b009194cae2fad6ddff8ef1c0e7e2f9113bca4c7ec20fa579e4d7a/boto3_stubs-1.42.34.tar.gz", hash = "sha256:fafcc3713c331bac11bf55fe913e5a3a01820f0cde640cfc4694df5a94aa9557", size = 100898 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/c4/1aba1653afc3cf5ef985235cea05d3e9e6736033f10ebbf102a23fc0152d/boto3_stubs-1.42.34-py3-none-any.whl", hash = "sha256:eb98cf3cc0a74ed75ea4945152cf10da57c8c9628104a13db16cde10176219ab", size = 69782 }, +] + +[package.optional-dependencies] +s3 = [ + { name = "mypy-boto3-s3" }, +] + +[[package]] +name = "botocore" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/f0/5702b704844e8920e01ce865cde0da574827163fbd7c0207d351ff6eea2c/botocore-1.42.34.tar.gz", hash = "sha256:92e44747da7890270d8dcc494ecc61fc315438440c55e00dc37a57d402b1bb66", size = 14907713 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/99/226fb4b2d141d7ac59465e3cdd2ca3a9a2917d85e1a3160884a78b097bbb/botocore-1.42.34-py3-none-any.whl", hash = "sha256:94099b5d09d0c4bfa6414fb3cffd54275ce6e51d7ba016f17a0e79f9274f68f7", size = 14579956 }, +] + +[[package]] +name = "botocore-stubs" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-awscrt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/1e/024e45fb46a21d085b541ce0ad8f1bef97ce17c5e72d1dc0e4d09d29e399/botocore_stubs-1.42.34.tar.gz", hash = "sha256:f3d1c5b45c2cbe16f63719abe639b23a1eeb3fec9c3ea0a72688585b462e8ce3", size = 42408 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/c8/3845c17b89ff19e2c2474801a6737d1766ee8e80cf38d7d97e1fedc28537/botocore_stubs-1.42.34-py3-none-any.whl", hash = "sha256:afc08661122eff6939d88cd250084ac148e392f8a1a389d51a31a4b9dab59358", size = 66760 }, +] + +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900 }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271 }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048 }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529 }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097 }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983 }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519 }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572 }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963 }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361 }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932 }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557 }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762 }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230 }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043 }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446 }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101 }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948 }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422 }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499 }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928 }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302 }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909 }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402 }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780 }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320 }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487 }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049 }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793 }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300 }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244 }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828 }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926 }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328 }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650 }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687 }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773 }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013 }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593 }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354 }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480 }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584 }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443 }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437 }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487 }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726 }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "cryptography" +version = "46.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004 }, + { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667 }, + { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807 }, + { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615 }, + { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800 }, + { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707 }, + { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541 }, + { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464 }, + { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838 }, + { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596 }, + { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782 }, + { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381 }, + { url = "https://files.pythonhosted.org/packages/96/92/8a6a9525893325fc057a01f654d7efc2c64b9de90413adcf605a85744ff4/cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018", size = 3055988 }, + { url = "https://files.pythonhosted.org/packages/7e/bf/80fbf45253ea585a1e492a6a17efcb93467701fa79e71550a430c5e60df0/cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb", size = 3514451 }, + { url = "https://files.pythonhosted.org/packages/2e/af/9b302da4c87b0beb9db4e756386a7c6c5b8003cd0e742277888d352ae91d/cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c", size = 2928007 }, + { url = "https://files.pythonhosted.org/packages/f5/e2/a510aa736755bffa9d2f75029c229111a1d02f8ecd5de03078f4c18d91a3/cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217", size = 7158012 }, + { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728 }, + { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078 }, + { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460 }, + { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237 }, + { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344 }, + { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564 }, + { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415 }, + { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457 }, + { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074 }, + { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569 }, + { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941 }, + { url = "https://files.pythonhosted.org/packages/fd/30/27654c1dbaf7e4a3531fa1fc77986d04aefa4d6d78259a62c9dc13d7ad36/cryptography-46.0.3-cp314-cp314t-win32.whl", hash = "sha256:8a6e050cb6164d3f830453754094c086ff2d0b2f3a897a1d9820f6139a1f0914", size = 3022339 }, + { url = "https://files.pythonhosted.org/packages/f6/30/640f34ccd4d2a1bc88367b54b926b781b5a018d65f404d409aba76a84b1c/cryptography-46.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:760f83faa07f8b64e9c33fc963d790a2edb24efb479e3520c14a45741cd9b2db", size = 3494315 }, + { url = "https://files.pythonhosted.org/packages/ba/8b/88cc7e3bd0a8e7b861f26981f7b820e1f46aa9d26cc482d0feba0ecb4919/cryptography-46.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:516ea134e703e9fe26bcd1277a4b59ad30586ea90c365a87781d7887a646fe21", size = 2919331 }, + { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248 }, + { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089 }, + { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029 }, + { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222 }, + { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280 }, + { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958 }, + { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714 }, + { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970 }, + { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236 }, + { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642 }, + { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126 }, + { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573 }, + { url = "https://files.pythonhosted.org/packages/0a/6e/1c8331ddf91ca4730ab3086a0f1be19c65510a33b5a441cb334e7a2d2560/cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df", size = 3036695 }, + { url = "https://files.pythonhosted.org/packages/90/45/b0d691df20633eff80955a0fc7695ff9051ffce8b69741444bd9ed7bd0db/cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f", size = 3501720 }, + { url = "https://files.pythonhosted.org/packages/e8/cb/2da4cc83f5edb9c3257d09e1e7ab7b23f049c7962cae8d842bbef0a9cec9/cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372", size = 2918740 }, +] + +[[package]] +name = "google-api-core" +version = "2.29.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0d/10/05572d33273292bac49c2d1785925f7bc3ff2fe50e3044cf1062c1dde32e/google_api_core-2.29.0.tar.gz", hash = "sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7", size = 177828 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/b6/85c4d21067220b9a78cfb81f516f9725ea6befc1544ec9bd2c1acd97c324/google_api_core-2.29.0-py3-none-any.whl", hash = "sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9", size = 173906 }, +] + +[[package]] +name = "google-auth" +version = "2.47.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1-modules" }, + { name = "rsa" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/3c/ec64b9a275ca22fa1cd3b6e77fefcf837b0732c890aa32d2bd21313d9b33/google_auth-2.47.0.tar.gz", hash = "sha256:833229070a9dfee1a353ae9877dcd2dec069a8281a4e72e72f77d4a70ff945da", size = 323719 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/18/79e9008530b79527e0d5f79e7eef08d3b179b7f851cfd3a2f27822fbdfa9/google_auth-2.47.0-py3-none-any.whl", hash = "sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498", size = 234867 }, +] + +[[package]] +name = "google-cloud-core" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/03/ef0bc99d0e0faf4fdbe67ac445e18cdaa74824fd93cd069e7bb6548cb52d/google_cloud_core-2.5.0.tar.gz", hash = "sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963", size = 36027 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/20/bfa472e327c8edee00f04beecc80baeddd2ab33ee0e86fd7654da49d45e9/google_cloud_core-2.5.0-py3-none-any.whl", hash = "sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc", size = 29469 }, +] + +[[package]] +name = "google-cloud-storage" +version = "3.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-cloud-core" }, + { name = "google-crc32c" }, + { name = "google-resumable-media" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/90/4398cecc2704cb066bc7dee6111a5c93c59bcd6fb751f0541315655774a8/google_cloud_storage-3.8.0.tar.gz", hash = "sha256:cc67952dce84ebc9d44970e24647a58260630b7b64d72360cedaf422d6727f28", size = 17273792 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/db/326279870d349fb9592263343dca4ad76088c17c88ba97b0f64c1088276c/google_cloud_storage-3.8.0-py3-none-any.whl", hash = "sha256:78cfeae7cac2ca9441d0d0271c2eb4ebfa21aa4c6944dd0ccac0389e81d955a7", size = 312430 }, +] + +[[package]] +name = "google-crc32c" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300 }, + { url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867 }, + { url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364 }, + { url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740 }, + { url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437 }, + { url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297 }, + { url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867 }, + { url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344 }, + { url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694 }, + { url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435 }, + { url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301 }, + { url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868 }, + { url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381 }, + { url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734 }, + { url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878 }, +] + +[[package]] +name = "google-resumable-media" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-crc32c" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/d7/520b62a35b23038ff005e334dba3ffc75fcf583bee26723f1fd8fd4b6919/google_resumable_media-2.8.0.tar.gz", hash = "sha256:f1157ed8b46994d60a1bc432544db62352043113684d4e030ee02e77ebe9a1ae", size = 2163265 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/0b/93afde9cfe012260e9fe1522f35c9b72d6ee222f316586b1f23ecf44d518/google_resumable_media-2.8.0-py3-none-any.whl", hash = "sha256:dd14a116af303845a8d932ddae161a26e86cc229645bc98b39f026f9b1717582", size = 81340 }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.72.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515 }, +] + +[[package]] +name = "grpcio" +version = "1.76.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718 }, + { url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627 }, + { url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167 }, + { url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267 }, + { url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963 }, + { url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484 }, + { url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777 }, + { url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014 }, + { url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750 }, + { url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003 }, + { url = "https://files.pythonhosted.org/packages/fc/ed/71467ab770effc9e8cef5f2e7388beb2be26ed642d567697bb103a790c72/grpcio-1.76.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:26ef06c73eb53267c2b319f43e6634c7556ea37672029241a056629af27c10e2", size = 5807716 }, + { url = "https://files.pythonhosted.org/packages/2c/85/c6ed56f9817fab03fa8a111ca91469941fb514e3e3ce6d793cb8f1e1347b/grpcio-1.76.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:45e0111e73f43f735d70786557dc38141185072d7ff8dc1829d6a77ac1471468", size = 11821522 }, + { url = "https://files.pythonhosted.org/packages/ac/31/2b8a235ab40c39cbc141ef647f8a6eb7b0028f023015a4842933bc0d6831/grpcio-1.76.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83d57312a58dcfe2a3a0f9d1389b299438909a02db60e2f2ea2ae2d8034909d3", size = 6362558 }, + { url = "https://files.pythonhosted.org/packages/bd/64/9784eab483358e08847498ee56faf8ff6ea8e0a4592568d9f68edc97e9e9/grpcio-1.76.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3e2a27c89eb9ac3d81ec8835e12414d73536c6e620355d65102503064a4ed6eb", size = 7049990 }, + { url = "https://files.pythonhosted.org/packages/2b/94/8c12319a6369434e7a184b987e8e9f3b49a114c489b8315f029e24de4837/grpcio-1.76.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61f69297cba3950a524f61c7c8ee12e55c486cb5f7db47ff9dcee33da6f0d3ae", size = 6575387 }, + { url = "https://files.pythonhosted.org/packages/15/0f/f12c32b03f731f4a6242f771f63039df182c8b8e2cf8075b245b409259d4/grpcio-1.76.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6a15c17af8839b6801d554263c546c69c4d7718ad4321e3166175b37eaacca77", size = 7166668 }, + { url = "https://files.pythonhosted.org/packages/ff/2d/3ec9ce0c2b1d92dd59d1c3264aaec9f0f7c817d6e8ac683b97198a36ed5a/grpcio-1.76.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:25a18e9810fbc7e7f03ec2516addc116a957f8cbb8cbc95ccc80faa072743d03", size = 8124928 }, + { url = "https://files.pythonhosted.org/packages/1a/74/fd3317be5672f4856bcdd1a9e7b5e17554692d3db9a3b273879dc02d657d/grpcio-1.76.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:931091142fd8cc14edccc0845a79248bc155425eee9a98b2db2ea4f00a235a42", size = 7589983 }, + { url = "https://files.pythonhosted.org/packages/45/bb/ca038cf420f405971f19821c8c15bcbc875505f6ffadafe9ffd77871dc4c/grpcio-1.76.0-cp313-cp313-win32.whl", hash = "sha256:5e8571632780e08526f118f74170ad8d50fb0a48c23a746bef2a6ebade3abd6f", size = 3984727 }, + { url = "https://files.pythonhosted.org/packages/41/80/84087dc56437ced7cdd4b13d7875e7439a52a261e3ab4e06488ba6173b0a/grpcio-1.76.0-cp313-cp313-win_amd64.whl", hash = "sha256:f9f7bd5faab55f47231ad8dba7787866b69f5e93bc306e3915606779bbfb4ba8", size = 4702799 }, + { url = "https://files.pythonhosted.org/packages/b4/46/39adac80de49d678e6e073b70204091e76631e03e94928b9ea4ecf0f6e0e/grpcio-1.76.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:ff8a59ea85a1f2191a0ffcc61298c571bc566332f82e5f5be1b83c9d8e668a62", size = 5808417 }, + { url = "https://files.pythonhosted.org/packages/9c/f5/a4531f7fb8b4e2a60b94e39d5d924469b7a6988176b3422487be61fe2998/grpcio-1.76.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06c3d6b076e7b593905d04fdba6a0525711b3466f43b3400266f04ff735de0cd", size = 11828219 }, + { url = "https://files.pythonhosted.org/packages/4b/1c/de55d868ed7a8bd6acc6b1d6ddc4aa36d07a9f31d33c912c804adb1b971b/grpcio-1.76.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fd5ef5932f6475c436c4a55e4336ebbe47bd3272be04964a03d316bbf4afbcbc", size = 6367826 }, + { url = "https://files.pythonhosted.org/packages/59/64/99e44c02b5adb0ad13ab3adc89cb33cb54bfa90c74770f2607eea629b86f/grpcio-1.76.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b331680e46239e090f5b3cead313cc772f6caa7d0fc8de349337563125361a4a", size = 7049550 }, + { url = "https://files.pythonhosted.org/packages/43/28/40a5be3f9a86949b83e7d6a2ad6011d993cbe9b6bd27bea881f61c7788b6/grpcio-1.76.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2229ae655ec4e8999599469559e97630185fdd53ae1e8997d147b7c9b2b72cba", size = 6575564 }, + { url = "https://files.pythonhosted.org/packages/4b/a9/1be18e6055b64467440208a8559afac243c66a8b904213af6f392dc2212f/grpcio-1.76.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:490fa6d203992c47c7b9e4a9d39003a0c2bcc1c9aa3c058730884bbbb0ee9f09", size = 7176236 }, + { url = "https://files.pythonhosted.org/packages/0f/55/dba05d3fcc151ce6e81327541d2cc8394f442f6b350fead67401661bf041/grpcio-1.76.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:479496325ce554792dba6548fae3df31a72cef7bad71ca2e12b0e58f9b336bfc", size = 8125795 }, + { url = "https://files.pythonhosted.org/packages/4a/45/122df922d05655f63930cf42c9e3f72ba20aadb26c100ee105cad4ce4257/grpcio-1.76.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1c9b93f79f48b03ada57ea24725d83a30284a012ec27eab2cf7e50a550cbbbcc", size = 7592214 }, + { url = "https://files.pythonhosted.org/packages/4a/6e/0b899b7f6b66e5af39e377055fb4a6675c9ee28431df5708139df2e93233/grpcio-1.76.0-cp314-cp314-win32.whl", hash = "sha256:747fa73efa9b8b1488a95d0ba1039c8e2dca0f741612d80415b1e1c560febf4e", size = 4062961 }, + { url = "https://files.pythonhosted.org/packages/19/41/0b430b01a2eb38ee887f88c1f07644a1df8e289353b78e82b37ef988fb64/grpcio-1.76.0-cp314-cp314-win_amd64.whl", hash = "sha256:922fa70ba549fce362d2e2871ab542082d66e2aaf0c19480ea453905b01f384e", size = 4834462 }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779 }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, +] + +[[package]] +name = "isodate" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, +] + +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615 }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020 }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332 }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947 }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962 }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760 }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529 }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015 }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540 }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105 }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906 }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622 }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029 }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374 }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980 }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990 }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784 }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588 }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041 }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543 }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113 }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911 }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658 }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066 }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639 }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569 }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284 }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801 }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769 }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642 }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612 }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200 }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973 }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619 }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029 }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408 }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005 }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048 }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821 }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606 }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043 }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747 }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341 }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073 }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661 }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069 }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670 }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598 }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261 }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835 }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733 }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672 }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819 }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426 }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146 }, +] + +[[package]] +name = "moto" +version = "5.1.20" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "boto3" }, + { name = "botocore" }, + { name = "cryptography" }, + { name = "jinja2" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "responses" }, + { name = "werkzeug" }, + { name = "xmltodict" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/93/6b696aab5174721696a17716a488086e21f7b2547b4c9517f799a9b25e9e/moto-5.1.20.tar.gz", hash = "sha256:6d12d781e26a550d80e4b7e01d5538178e3adec6efbdec870e06e84750f13ec0", size = 8318716 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/2f/f50892fdb28097917b87d358a5fcefd30976289884ff142893edcb0243ba/moto-5.1.20-py3-none-any.whl", hash = "sha256:58c82c8e6b2ef659ef3a562fa415dce14da84bc7a797943245d9a338496ea0ea", size = 6392751 }, +] + +[[package]] +name = "mypy-boto3-s3" +version = "1.42.21" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a0/32/aa7208348dc8db8bd4ea357e5e6e1e8bcba44419033d03456c3b767a6c98/mypy_boto3_s3-1.42.21.tar.gz", hash = "sha256:cab71c918aac7d98c4d742544c722e37d8e7178acb8bc88a0aead7b1035026d2", size = 76024 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/c0/01babfa8cef5f992a2a0f3d52fc1123fbbc336ab6decfdfc8f702e88a8af/mypy_boto3_s3-1.42.21-py3-none-any.whl", hash = "sha256:f5b7d1ed718ba5b00f67e95a9a38c6a021159d3071ea235e6cf496e584115ded", size = 83169 }, +] + +[[package]] +name = "nodeenv" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438 }, +] + +[[package]] +name = "numpy" +version = "2.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/7f/ec53e32bf10c813604edf07a3682616bd931d026fcde7b6d13195dfb684a/numpy-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", size = 16656888 }, + { url = "https://files.pythonhosted.org/packages/b8/e0/1f9585d7dae8f14864e948fd7fa86c6cb72dee2676ca2748e63b1c5acfe0/numpy-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", size = 12373956 }, + { url = "https://files.pythonhosted.org/packages/8e/43/9762e88909ff2326f5e7536fa8cb3c49fb03a7d92705f23e6e7f553d9cb3/numpy-2.4.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", size = 5202567 }, + { url = "https://files.pythonhosted.org/packages/4b/ee/34b7930eb61e79feb4478800a4b95b46566969d837546aa7c034c742ef98/numpy-2.4.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", size = 6549459 }, + { url = "https://files.pythonhosted.org/packages/79/e3/5f115fae982565771be994867c89bcd8d7208dbfe9469185497d70de5ddf/numpy-2.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", size = 14404859 }, + { url = "https://files.pythonhosted.org/packages/d9/7d/9c8a781c88933725445a859cac5d01b5871588a15969ee6aeb618ba99eee/numpy-2.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", size = 16371419 }, + { url = "https://files.pythonhosted.org/packages/a6/d2/8aa084818554543f17cf4162c42f162acbd3bb42688aefdba6628a859f77/numpy-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", size = 16182131 }, + { url = "https://files.pythonhosted.org/packages/60/db/0425216684297c58a8df35f3284ef56ec4a043e6d283f8a59c53562caf1b/numpy-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", size = 18295342 }, + { url = "https://files.pythonhosted.org/packages/31/4c/14cb9d86240bd8c386c881bafbe43f001284b7cce3bc01623ac9475da163/numpy-2.4.1-cp312-cp312-win32.whl", hash = "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", size = 5959015 }, + { url = "https://files.pythonhosted.org/packages/51/cf/52a703dbeb0c65807540d29699fef5fda073434ff61846a564d5c296420f/numpy-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", size = 12310730 }, + { url = "https://files.pythonhosted.org/packages/69/80/a828b2d0ade5e74a9fe0f4e0a17c30fdc26232ad2bc8c9f8b3197cf7cf18/numpy-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", size = 10312166 }, + { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495 }, + { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657 }, + { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256 }, + { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212 }, + { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871 }, + { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305 }, + { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909 }, + { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380 }, + { url = "https://files.pythonhosted.org/packages/67/78/722b62bd31842ff029412271556a1a27a98f45359dea78b1548a3a9996aa/numpy-2.4.1-cp313-cp313-win32.whl", hash = "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", size = 5957089 }, + { url = "https://files.pythonhosted.org/packages/da/a6/cf32198b0b6e18d4fbfa9a21a992a7fca535b9bb2b0cdd217d4a3445b5ca/numpy-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", size = 12307230 }, + { url = "https://files.pythonhosted.org/packages/44/6c/534d692bfb7d0afe30611320c5fb713659dcb5104d7cc182aff2aea092f5/numpy-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", size = 10313125 }, + { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156 }, + { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663 }, + { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224 }, + { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352 }, + { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279 }, + { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316 }, + { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884 }, + { url = "https://files.pythonhosted.org/packages/37/a4/b073f3e9d77f9aec8debe8ca7f9f6a09e888ad1ba7488f0c3b36a94c03ac/numpy-2.4.1-cp313-cp313t-win32.whl", hash = "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", size = 6081138 }, + { url = "https://files.pythonhosted.org/packages/16/16/af42337b53844e67752a092481ab869c0523bc95c4e5c98e4dac4e9581ac/numpy-2.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", size = 12447478 }, + { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981 }, + { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046 }, + { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858 }, + { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417 }, + { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643 }, + { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963 }, + { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811 }, + { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643 }, + { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601 }, + { url = "https://files.pythonhosted.org/packages/80/9a/0d44b468cad50315127e884802351723daca7cf1c98d102929468c81d439/numpy-2.4.1-cp314-cp314-win32.whl", hash = "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", size = 6005722 }, + { url = "https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", size = 12438590 }, + { url = "https://files.pythonhosted.org/packages/e9/da/a598d5cb260780cf4d255102deba35c1d072dc028c4547832f45dd3323a8/numpy-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", size = 10596180 }, + { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774 }, + { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274 }, + { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306 }, + { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653 }, + { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144 }, + { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425 }, + { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053 }, + { url = "https://files.pythonhosted.org/packages/23/12/8b5fc6b9c487a09a7957188e0943c9ff08432c65e34567cabc1623b03a51/numpy-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", size = 6152482 }, + { url = "https://files.pythonhosted.org/packages/00/a5/9f8ca5856b8940492fc24fbe13c1bc34d65ddf4079097cf9e53164d094e1/numpy-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", size = 12627117 }, + { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121 }, +] + +[[package]] +name = "nvisy-dal" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "pydantic" }, +] + +[package.optional-dependencies] +all = [ + { name = "aiomysql" }, + { name = "asyncpg" }, + { name = "asyncpg-stubs" }, + { name = "azure-storage-blob" }, + { name = "boto3" }, + { name = "boto3-stubs", extra = ["s3"] }, + { name = "google-cloud-storage" }, + { name = "pinecone" }, + { name = "qdrant-client" }, +] +azure = [ + { name = "azure-storage-blob" }, +] +dev = [ + { name = "aiomysql" }, + { name = "asyncpg" }, + { name = "asyncpg-stubs" }, + { name = "azure-storage-blob" }, + { name = "boto3" }, + { name = "boto3-stubs", extra = ["s3"] }, + { name = "google-cloud-storage" }, + { name = "moto" }, + { name = "pinecone" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "qdrant-client" }, +] +gcs = [ + { name = "google-cloud-storage" }, +] +mysql = [ + { name = "aiomysql" }, +] +pinecone = [ + { name = "pinecone" }, +] +postgres = [ + { name = "asyncpg" }, + { name = "asyncpg-stubs" }, +] +qdrant = [ + { name = "qdrant-client" }, +] +s3 = [ + { name = "boto3" }, + { name = "boto3-stubs", extra = ["s3"] }, +] + +[package.dev-dependencies] +dev = [ + { name = "pyright" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiomysql", marker = "extra == 'mysql'", specifier = ">=0.2" }, + { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30" }, + { name = "asyncpg-stubs", marker = "extra == 'postgres'", specifier = ">=0.30" }, + { name = "azure-storage-blob", marker = "extra == 'azure'", specifier = ">=12.23" }, + { name = "boto3", marker = "extra == 's3'", specifier = ">=1.35" }, + { name = "boto3-stubs", extras = ["s3"], marker = "extra == 's3'" }, + { name = "google-cloud-storage", marker = "extra == 'gcs'", specifier = ">=2.18" }, + { name = "moto", marker = "extra == 'dev'", specifier = ">=5.0" }, + { name = "nvisy-dal", extras = ["all"], marker = "extra == 'dev'" }, + { name = "nvisy-dal", extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone"], marker = "extra == 'all'" }, + { name = "pinecone", marker = "extra == 'pinecone'", specifier = ">=5.0" }, + { name = "pydantic", specifier = ">=2.10" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24" }, + { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.12" }, +] +provides-extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone", "all", "dev"] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.408" }, + { name = "ruff", specifier = ">=0.14.14" }, +] + +[[package]] +name = "orjson" +version = "3.11.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347 }, + { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435 }, + { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074 }, + { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520 }, + { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209 }, + { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837 }, + { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307 }, + { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020 }, + { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099 }, + { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540 }, + { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530 }, + { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863 }, + { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255 }, + { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252 }, + { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777 }, + { url = "https://files.pythonhosted.org/packages/10/43/61a77040ce59f1569edf38f0b9faadc90c8cf7e9bec2e0df51d0132c6bb7/orjson-3.11.5-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3b01799262081a4c47c035dd77c1301d40f568f77cc7ec1bb7db5d63b0a01629", size = 245271 }, + { url = "https://files.pythonhosted.org/packages/55/f9/0f79be617388227866d50edd2fd320cb8fb94dc1501184bb1620981a0aba/orjson-3.11.5-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:61de247948108484779f57a9f406e4c84d636fa5a59e411e6352484985e8a7c3", size = 129422 }, + { url = "https://files.pythonhosted.org/packages/77/42/f1bf1549b432d4a78bfa95735b79b5dac75b65b5bb815bba86ad406ead0a/orjson-3.11.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:894aea2e63d4f24a7f04a1908307c738d0dce992e9249e744b8f4e8dd9197f39", size = 132060 }, + { url = "https://files.pythonhosted.org/packages/25/49/825aa6b929f1a6ed244c78acd7b22c1481fd7e5fda047dc8bf4c1a807eb6/orjson-3.11.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ddc21521598dbe369d83d4d40338e23d4101dad21dae0e79fa20465dbace019f", size = 130391 }, + { url = "https://files.pythonhosted.org/packages/42/ec/de55391858b49e16e1aa8f0bbbb7e5997b7345d8e984a2dec3746d13065b/orjson-3.11.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cce16ae2f5fb2c53c3eafdd1706cb7b6530a67cc1c17abe8ec747f5cd7c0c51", size = 135964 }, + { url = "https://files.pythonhosted.org/packages/1c/40/820bc63121d2d28818556a2d0a09384a9f0262407cf9fa305e091a8048df/orjson-3.11.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e46c762d9f0e1cfb4ccc8515de7f349abbc95b59cb5a2bd68df5973fdef913f8", size = 139817 }, + { url = "https://files.pythonhosted.org/packages/09/c7/3a445ca9a84a0d59d26365fd8898ff52bdfcdcb825bcc6519830371d2364/orjson-3.11.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7345c759276b798ccd6d77a87136029e71e66a8bbf2d2755cbdde1d82e78706", size = 137336 }, + { url = "https://files.pythonhosted.org/packages/9a/b3/dc0d3771f2e5d1f13368f56b339c6782f955c6a20b50465a91acb79fe961/orjson-3.11.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75bc2e59e6a2ac1dd28901d07115abdebc4563b5b07dd612bf64260a201b1c7f", size = 138993 }, + { url = "https://files.pythonhosted.org/packages/d1/a2/65267e959de6abe23444659b6e19c888f242bf7725ff927e2292776f6b89/orjson-3.11.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:54aae9b654554c3b4edd61896b978568c6daa16af96fa4681c9b5babd469f863", size = 141070 }, + { url = "https://files.pythonhosted.org/packages/63/c9/da44a321b288727a322c6ab17e1754195708786a04f4f9d2220a5076a649/orjson-3.11.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:4bdd8d164a871c4ec773f9de0f6fe8769c2d6727879c37a9666ba4183b7f8228", size = 413505 }, + { url = "https://files.pythonhosted.org/packages/7f/17/68dc14fa7000eefb3d4d6d7326a190c99bb65e319f02747ef3ebf2452f12/orjson-3.11.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a261fef929bcf98a60713bf5e95ad067cea16ae345d9a35034e73c3990e927d2", size = 151342 }, + { url = "https://files.pythonhosted.org/packages/c4/c5/ccee774b67225bed630a57478529fc026eda33d94fe4c0eac8fe58d4aa52/orjson-3.11.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c028a394c766693c5c9909dec76b24f37e6a1b91999e8d0c0d5feecbe93c3e05", size = 141823 }, + { url = "https://files.pythonhosted.org/packages/67/80/5d00e4155d0cd7390ae2087130637671da713959bb558db9bac5e6f6b042/orjson-3.11.5-cp313-cp313-win32.whl", hash = "sha256:2cc79aaad1dfabe1bd2d50ee09814a1253164b3da4c00a78c458d82d04b3bdef", size = 135236 }, + { url = "https://files.pythonhosted.org/packages/95/fe/792cc06a84808dbdc20ac6eab6811c53091b42f8e51ecebf14b540e9cfe4/orjson-3.11.5-cp313-cp313-win_amd64.whl", hash = "sha256:ff7877d376add4e16b274e35a3f58b7f37b362abf4aa31863dadacdd20e3a583", size = 133167 }, + { url = "https://files.pythonhosted.org/packages/46/2c/d158bd8b50e3b1cfdcf406a7e463f6ffe3f0d167b99634717acdaf5e299f/orjson-3.11.5-cp313-cp313-win_arm64.whl", hash = "sha256:59ac72ea775c88b163ba8d21b0177628bd015c5dd060647bbab6e22da3aad287", size = 126712 }, + { url = "https://files.pythonhosted.org/packages/c2/60/77d7b839e317ead7bb225d55bb50f7ea75f47afc489c81199befc5435b50/orjson-3.11.5-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e446a8ea0a4c366ceafc7d97067bfd55292969143b57e3c846d87fc701e797a0", size = 245252 }, + { url = "https://files.pythonhosted.org/packages/f1/aa/d4639163b400f8044cef0fb9aa51b0337be0da3a27187a20d1166e742370/orjson-3.11.5-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:53deb5addae9c22bbe3739298f5f2196afa881ea75944e7720681c7080909a81", size = 129419 }, + { url = "https://files.pythonhosted.org/packages/30/94/9eabf94f2e11c671111139edf5ec410d2f21e6feee717804f7e8872d883f/orjson-3.11.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82cd00d49d6063d2b8791da5d4f9d20539c5951f965e45ccf4e96d33505ce68f", size = 132050 }, + { url = "https://files.pythonhosted.org/packages/3d/c8/ca10f5c5322f341ea9a9f1097e140be17a88f88d1cfdd29df522970d9744/orjson-3.11.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3fd15f9fc8c203aeceff4fda211157fad114dde66e92e24097b3647a08f4ee9e", size = 130370 }, + { url = "https://files.pythonhosted.org/packages/25/d4/e96824476d361ee2edd5c6290ceb8d7edf88d81148a6ce172fc00278ca7f/orjson-3.11.5-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9df95000fbe6777bf9820ae82ab7578e8662051bb5f83d71a28992f539d2cda7", size = 136012 }, + { url = "https://files.pythonhosted.org/packages/85/8e/9bc3423308c425c588903f2d103cfcfe2539e07a25d6522900645a6f257f/orjson-3.11.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92a8d676748fca47ade5bc3da7430ed7767afe51b2f8100e3cd65e151c0eaceb", size = 139809 }, + { url = "https://files.pythonhosted.org/packages/e9/3c/b404e94e0b02a232b957c54643ce68d0268dacb67ac33ffdee24008c8b27/orjson-3.11.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa0f513be38b40234c77975e68805506cad5d57b3dfd8fe3baa7f4f4051e15b4", size = 137332 }, + { url = "https://files.pythonhosted.org/packages/51/30/cc2d69d5ce0ad9b84811cdf4a0cd5362ac27205a921da524ff42f26d65e0/orjson-3.11.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1863e75b92891f553b7922ce4ee10ed06db061e104f2b7815de80cdcb135ad", size = 138983 }, + { url = "https://files.pythonhosted.org/packages/0e/87/de3223944a3e297d4707d2fe3b1ffb71437550e165eaf0ca8bbe43ccbcb1/orjson-3.11.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d4be86b58e9ea262617b8ca6251a2f0d63cc132a6da4b5fcc8e0a4128782c829", size = 141069 }, + { url = "https://files.pythonhosted.org/packages/65/30/81d5087ae74be33bcae3ff2d80f5ccaa4a8fedc6d39bf65a427a95b8977f/orjson-3.11.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:b923c1c13fa02084eb38c9c065afd860a5cff58026813319a06949c3af5732ac", size = 413491 }, + { url = "https://files.pythonhosted.org/packages/d0/6f/f6058c21e2fc1efaf918986dbc2da5cd38044f1a2d4b7b91ad17c4acf786/orjson-3.11.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:1b6bd351202b2cd987f35a13b5e16471cf4d952b42a73c391cc537974c43ef6d", size = 151375 }, + { url = "https://files.pythonhosted.org/packages/54/92/c6921f17d45e110892899a7a563a925b2273d929959ce2ad89e2525b885b/orjson-3.11.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bb150d529637d541e6af06bbe3d02f5498d628b7f98267ff87647584293ab439", size = 141850 }, + { url = "https://files.pythonhosted.org/packages/88/86/cdecb0140a05e1a477b81f24739da93b25070ee01ce7f7242f44a6437594/orjson-3.11.5-cp314-cp314-win32.whl", hash = "sha256:9cc1e55c884921434a84a0c3dd2699eb9f92e7b441d7f53f3941079ec6ce7499", size = 135278 }, + { url = "https://files.pythonhosted.org/packages/e4/97/b638d69b1e947d24f6109216997e38922d54dcdcdb1b11c18d7efd2d3c59/orjson-3.11.5-cp314-cp314-win_amd64.whl", hash = "sha256:a4f3cb2d874e03bc7767c8f88adaa1a9a05cecea3712649c3b58589ec7317310", size = 133170 }, + { url = "https://files.pythonhosted.org/packages/8f/dd/f4fff4a6fe601b4f8f3ba3aa6da8ac33d17d124491a3b804c662a70e1636/orjson-3.11.5-cp314-cp314-win_arm64.whl", hash = "sha256:38b22f476c351f9a1c43e5b07d8b5a02eb24a6ab8e75f700f7d479d4568346a5", size = 126713 }, +] + +[[package]] +name = "packaging" +version = "24.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, +] + +[[package]] +name = "pinecone" +version = "8.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "orjson" }, + { name = "pinecone-plugin-assistant" }, + { name = "pinecone-plugin-interface" }, + { name = "python-dateutil" }, + { name = "typing-extensions" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/33/13/f4c481a6a93dab92132d6d863b70a0e6c903f62940389435b31cf0c7d7d2/pinecone-8.0.0.tar.gz", hash = "sha256:feca7ff607706c09ffbd127ec93fa3b7110896b30c0d7a57672da73c69698d53", size = 1092653 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/6c/1d870a9211eb8f0bf60214182de001b480f94590eca9d6164a5d6d7de031/pinecone-8.0.0-py3-none-any.whl", hash = "sha256:95f714a496a91d80f3405165aedfea76ca8ac16e51e618df0434241838e353f8", size = 745902 }, +] + +[[package]] +name = "pinecone-plugin-assistant" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/1a/33249870c9e8c774dafc038419b48aa63b380b461e9a1c1cb042db31be49/pinecone_plugin_assistant-3.0.1.tar.gz", hash = "sha256:6b00e94ef1bf55ed601d2316ee6f71f96f93bf2155277a826638395e1090dde3", size = 152060 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/88/4b801675b4d58c5f8acd96bfd4847e6d7bc1a93ee4ff916e913dd6bda2de/pinecone_plugin_assistant-3.0.1-py3-none-any.whl", hash = "sha256:cd86ca5c98137221170e90fe81e03bbe71999992096da68c77f4af3503017622", size = 280865 }, +] + +[[package]] +name = "pinecone-plugin-interface" +version = "0.0.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/fb/e8a4063264953ead9e2b24d9b390152c60f042c951c47f4592e9996e57ff/pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846", size = 3370 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/1d/a21fdfcd6d022cb64cef5c2a29ee6691c6c103c4566b41646b080b7536a5/pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8", size = 6249 }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, +] + +[[package]] +name = "portalocker" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424 }, +] + +[[package]] +name = "proto-plus" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/89/9cbe2f4bba860e149108b683bc2efec21f14d5f7ed6e25562ad86acbc373/proto_plus-1.27.0.tar.gz", hash = "sha256:873af56dd0d7e91836aee871e5799e1c6f1bda86ac9a983e0bb9f0c266a568c4", size = 56158 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cd/24/3b7a0818484df9c28172857af32c2397b6d8fcd99d9468bd4684f98ebf0a/proto_plus-1.27.0-py3-none-any.whl", hash = "sha256:1baa7f81cf0f8acb8bc1f6d085008ba4171eaf669629d1b6d1673b21ed1c0a82", size = 50205 }, +] + +[[package]] +name = "protobuf" +version = "6.33.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/b8/cda15d9d46d03d4aa3a67cb6bffe05173440ccf86a9541afaf7ac59a1b6b/protobuf-6.33.4.tar.gz", hash = "sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91", size = 444346 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/be/24ef9f3095bacdf95b458543334d0c4908ccdaee5130420bf064492c325f/protobuf-6.33.4-cp310-abi3-win32.whl", hash = "sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d", size = 425612 }, + { url = "https://files.pythonhosted.org/packages/31/ad/e5693e1974a28869e7cd244302911955c1cebc0161eb32dfa2b25b6e96f0/protobuf-6.33.4-cp310-abi3-win_amd64.whl", hash = "sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc", size = 436962 }, + { url = "https://files.pythonhosted.org/packages/66/15/6ee23553b6bfd82670207ead921f4d8ef14c107e5e11443b04caeb5ab5ec/protobuf-6.33.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0", size = 427612 }, + { url = "https://files.pythonhosted.org/packages/2b/48/d301907ce6d0db75f959ca74f44b475a9caa8fcba102d098d3c3dd0f2d3f/protobuf-6.33.4-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e", size = 324484 }, + { url = "https://files.pythonhosted.org/packages/92/1c/e53078d3f7fe710572ab2dcffd993e1e3b438ae71cfc031b71bae44fcb2d/protobuf-6.33.4-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6", size = 339256 }, + { url = "https://files.pythonhosted.org/packages/e8/8e/971c0edd084914f7ee7c23aa70ba89e8903918adca179319ee94403701d5/protobuf-6.33.4-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9", size = 323311 }, + { url = "https://files.pythonhosted.org/packages/75/b1/1dc83c2c661b4c62d56cc081706ee33a4fc2835bd90f965baa2663ef7676/protobuf-6.33.4-py3-none-any.whl", hash = "sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc", size = 170532 }, +] + +[[package]] +name = "pyasn1" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/6e630dff89739fcd427e3f72b3d905ce0acb85a45d4ec3e2678718a3487f/pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b", size = 146586 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/b5/a96872e5184f354da9c84ae119971a0a4c221fe9b27a4d94bd43f2596727/pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf", size = 83371 }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259 }, +] + +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172 }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580 }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990 }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003 }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200 }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578 }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504 }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816 }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366 }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698 }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603 }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591 }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068 }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908 }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145 }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179 }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403 }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206 }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307 }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258 }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917 }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186 }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164 }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146 }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788 }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133 }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852 }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679 }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766 }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005 }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622 }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725 }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040 }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691 }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897 }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302 }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877 }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680 }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960 }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102 }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039 }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126 }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489 }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288 }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255 }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760 }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092 }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385 }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832 }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585 }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078 }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914 }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560 }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244 }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955 }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "pymysql" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/ae/1fe3fcd9f959efa0ebe200b8de88b5a5ce3e767e38c7ac32fb179f16a388/pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03", size = 48258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300 }, +] + +[[package]] +name = "pyright" +version = "1.1.408" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nodeenv" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/b2/5db700e52554b8f025faa9c3c624c59f1f6c8841ba81ab97641b54322f16/pyright-1.1.408.tar.gz", hash = "sha256:f28f2321f96852fa50b5829ea492f6adb0e6954568d1caa3f3af3a5f555eb684", size = 4400578 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/82/a2c93e32800940d9573fb28c346772a14778b84ba7524e691b324620ab89/pyright-1.1.408-py3-none-any.whl", hash = "sha256:090b32865f4fdb1e0e6cd82bf5618480d48eecd2eb2e70f960982a3d9a4c17c1", size = 6399144 }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801 }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543 }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040 }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102 }, + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700 }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700 }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318 }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714 }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800 }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, +] + +[[package]] +name = "qdrant-client" +version = "1.16.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "httpx", extra = ["http2"] }, + { name = "numpy" }, + { name = "portalocker" }, + { name = "protobuf" }, + { name = "pydantic" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/7d/3cd10e26ae97b35cf856ca1dc67576e42414ae39502c51165bb36bb1dff8/qdrant_client-1.16.2.tar.gz", hash = "sha256:ca4ef5f9be7b5eadeec89a085d96d5c723585a391eb8b2be8192919ab63185f0", size = 331112 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/13/8ce16f808297e16968269de44a14f4fef19b64d9766be1d6ba5ba78b579d/qdrant_client-1.16.2-py3-none-any.whl", hash = "sha256:442c7ef32ae0f005e88b5d3c0783c63d4912b97ae756eb5e052523be682f17d3", size = 377186 }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 }, +] + +[[package]] +name = "responses" +version = "0.25.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/95/89c054ad70bfef6da605338b009b2e283485835351a9935c7bfbfaca7ffc/responses-0.25.8.tar.gz", hash = "sha256:9374d047a575c8f781b94454db5cab590b6029505f488d12899ddb10a4af1cf4", size = 79320 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1c/4c/cc276ce57e572c102d9542d383b2cfd551276581dc60004cb94fe8774c11/responses-0.25.8-py3-none-any.whl", hash = "sha256:0c710af92def29c8352ceadff0c3fe340ace27cf5af1bbe46fb71275bcd2831c", size = 34769 }, +] + +[[package]] +name = "rsa" +version = "4.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696 }, +] + +[[package]] +name = "ruff" +version = "0.14.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650 }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245 }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273 }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753 }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052 }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637 }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761 }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701 }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455 }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882 }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549 }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416 }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491 }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525 }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626 }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442 }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486 }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448 }, +] + +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "types-awscrt" +version = "0.31.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/97/be/589b7bba42b5681a72bac4d714287afef4e1bb84d07c859610ff631d449e/types_awscrt-0.31.1.tar.gz", hash = "sha256:08b13494f93f45c1a92eb264755fce50ed0d1dc75059abb5e31670feb9a09724", size = 17839 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/fd/ddca80617f230bd833f99b4fb959abebffd8651f520493cae2e96276b1bd/types_awscrt-0.31.1-py3-none-any.whl", hash = "sha256:7e4364ac635f72bd57f52b093883640b1448a6eded0ecbac6e900bf4b1e4777b", size = 42516 }, +] + +[[package]] +name = "types-s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/64/42689150509eb3e6e82b33ee3d89045de1592488842ddf23c56957786d05/types_s3transfer-0.16.0.tar.gz", hash = "sha256:b4636472024c5e2b62278c5b759661efeb52a81851cde5f092f24100b1ecb443", size = 13557 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/27/e88220fe6274eccd3bdf95d9382918716d312f6f6cef6a46332d1ee2feff/types_s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:1c0cd111ecf6e21437cb410f5cddb631bfb2263b77ad973e79b9c6d0cb24e0ef", size = 19247 }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611 }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584 }, +] + +[[package]] +name = "werkzeug" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/70/1469ef1d3542ae7c2c7b72bd5e3a4e6ee69d7978fa8a3af05a38eca5becf/werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67", size = 864754 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025 }, +] + +[[package]] +name = "xmltodict" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/aa/917ceeed4dbb80d2f04dbd0c784b7ee7bba8ae5a54837ef0e5e062cd3cfb/xmltodict-1.0.2.tar.gz", hash = "sha256:54306780b7c2175a3967cad1db92f218207e5bc1aba697d887807c0fb68b7649", size = 25725 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/20/69a0e6058bc5ea74892d089d64dfc3a62ba78917ec5e2cfa70f7c92ba3a5/xmltodict-1.0.2-py3-none-any.whl", hash = "sha256:62d0fddb0dcbc9f642745d8bbf4d81fd17d6dfaec5a15b5c1876300aad92af0d", size = 13893 }, +] diff --git a/packages/nvisy-rig/README.md b/packages/nvisy-rig/README.md new file mode 100644 index 0000000..9e9a08f --- /dev/null +++ b/packages/nvisy-rig/README.md @@ -0,0 +1,80 @@ +# nvisy-rig + +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + +AI/LLM orchestration layer. Provides unified interfaces for LLM providers and agent workflows. + +## Installation + +```bash +# Core package +uv add nvisy-rig + +# With specific providers +uv add "nvisy-rig[openai,anthropic]" + +# All providers +uv add "nvisy-rig[all]" +``` + +## Available Providers + +| Provider | Extra | Description | +|----------|-------|-------------| +| OpenAI | `openai` | GPT models, embeddings | +| Anthropic | `anthropic` | Claude models | +| Cohere | `cohere` | Command models, embeddings | + +## Usage + +```python +from nvisy_rig.agents import Agent + +# Create an agent +agent = Agent( + model="gpt-4", + system_prompt="You are a helpful assistant.", +) + +# Run completion +response = await agent.complete("Hello, world!") +print(response) +``` + +## Architecture + +This package provides the Python AI/LLM layer for the nvisy system: + +- **nvisy-dal**: Data access layer (storage, databases, vector stores) +- **nvisy-rig**: AI orchestration layer (LLM providers, agents, RAG) + +## Development + +```bash +# Install dev dependencies +uv sync --extra dev + +# Run tests +uv run pytest + +# Type check +uv run pyright + +# Lint +uv run ruff check . +``` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/packages/nvisy-rig/py.typed b/packages/nvisy-rig/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/nvisy-rig/pyproject.toml b/packages/nvisy-rig/pyproject.toml new file mode 100644 index 0000000..ed2c6a9 --- /dev/null +++ b/packages/nvisy-rig/pyproject.toml @@ -0,0 +1,51 @@ +[project] +name = "nvisy-rig" +version = "0.1.0" +description = "AI/LLM orchestration layer" +requires-python = ">=3.12" +dependencies = [ + "pydantic>=2.10", + "nvisy-dal", +] + +[project.optional-dependencies] +openai = ["openai>=1.60"] +anthropic = ["anthropic>=0.40"] +cohere = ["cohere>=5.13"] +all = ["nvisy-rig[openai,anthropic,cohere]"] +dev = ["nvisy-rig[all]", "pytest>=8.0", "pytest-asyncio>=0.24"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/nvisy_rig"] + +[tool.uv.sources] +nvisy-dal = { path = "../nvisy-dal", editable = true } + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["ALL"] +ignore = ["D", "COM812", "ISC001"] + +[tool.ruff.lint.isort] +known-first-party = ["nvisy_rig", "nvisy_dal"] + +[tool.basedpyright] +pythonVersion = "3.12" +typeCheckingMode = "strict" + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" + +[dependency-groups] +dev = [ + "pyright>=1.1.408", + "ruff>=0.14.14", +] diff --git a/packages/nvisy-rig/pyrightconfig.json b/packages/nvisy-rig/pyrightconfig.json new file mode 100644 index 0000000..8fd8643 --- /dev/null +++ b/packages/nvisy-rig/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "venvPath": ".", + "venv": ".venv" +} diff --git a/packages/nvisy-rig/src/nvisy_rig/__init__.py b/packages/nvisy-rig/src/nvisy_rig/__init__.py new file mode 100644 index 0000000..bf3ebc2 --- /dev/null +++ b/packages/nvisy-rig/src/nvisy_rig/__init__.py @@ -0,0 +1 @@ +"""AI/LLM orchestration layer.""" diff --git a/packages/nvisy-rig/src/nvisy_rig/agents/__init__.py b/packages/nvisy-rig/src/nvisy_rig/agents/__init__.py new file mode 100644 index 0000000..511d50c --- /dev/null +++ b/packages/nvisy-rig/src/nvisy_rig/agents/__init__.py @@ -0,0 +1 @@ +"""Agent implementations.""" diff --git a/packages/nvisy-rig/src/nvisy_rig/generated/__init__.py b/packages/nvisy-rig/src/nvisy_rig/generated/__init__.py new file mode 100644 index 0000000..5aa7e6e --- /dev/null +++ b/packages/nvisy-rig/src/nvisy_rig/generated/__init__.py @@ -0,0 +1 @@ +"""Generated types from Rust JSON schemas.""" diff --git a/packages/nvisy-rig/uv.lock b/packages/nvisy-rig/uv.lock new file mode 100644 index 0000000..ddb9851 --- /dev/null +++ b/packages/nvisy-rig/uv.lock @@ -0,0 +1,858 @@ +version = 1 +revision = 1 +requires-python = ">=3.12" + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "anthropic" +version = "0.76.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "docstring-parser" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6e/be/d11abafaa15d6304826438170f7574d750218f49a106c54424a40cef4494/anthropic-0.76.0.tar.gz", hash = "sha256:e0cae6a368986d5cf6df743dfbb1b9519e6a9eee9c6c942ad8121c0b34416ffe", size = 495483 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/70/7b0fd9c1a738f59d3babe2b4212031c34ab7d0fda4ffef15b58a55c5bcea/anthropic-0.76.0-py3-none-any.whl", hash = "sha256:81efa3113901192af2f0fe977d3ec73fdadb1e691586306c4256cd6d5ccc331c", size = 390309 }, +] + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592 }, +] + +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 }, +] + +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274 }, +] + +[[package]] +name = "cohere" +version = "5.20.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastavro" }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "pydantic-core" }, + { name = "requests" }, + { name = "tokenizers" }, + { name = "types-requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/52/08564d1820970010d30421cd6e36f2e4ca552646504d3fe532eef282c88d/cohere-5.20.2.tar.gz", hash = "sha256:0aa9f3735626b70eedf15c231c61f3a58e7f8bbe5f0509fe7b2e6606c5d420f1", size = 180820 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/10/d76f045eefe42fb3f4e271d17ab41b5e73a3b6de69c98e15ab1cb0c8e6f6/cohere-5.20.2-py3-none-any.whl", hash = "sha256:26156d83bf3e3e4475e4caa1d8c4148475c5b0a253aee6066d83c643e9045be6", size = 318986 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, +] + +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896 }, +] + +[[package]] +name = "fastavro" +version = "1.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/8b/fa2d3287fd2267be6261d0177c6809a7fa12c5600ddb33490c8dc29e77b2/fastavro-1.12.1.tar.gz", hash = "sha256:2f285be49e45bc047ab2f6bed040bb349da85db3f3c87880e4b92595ea093b2b", size = 1025661 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/f0/10bd1a3d08667fa0739e2b451fe90e06df575ec8b8ba5d3135c70555c9bd/fastavro-1.12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:509818cb24b98a804fc80be9c5fed90f660310ae3d59382fc811bfa187122167", size = 1009057 }, + { url = "https://files.pythonhosted.org/packages/78/ad/0d985bc99e1fa9e74c636658000ba38a5cd7f5ab2708e9c62eaf736ecf1a/fastavro-1.12.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:089e155c0c76e0d418d7e79144ce000524dd345eab3bc1e9c5ae69d500f71b14", size = 3391866 }, + { url = "https://files.pythonhosted.org/packages/0d/9e/b4951dc84ebc34aac69afcbfbb22ea4a91080422ec2bfd2c06076ff1d419/fastavro-1.12.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44cbff7518901c91a82aab476fcab13d102e4999499df219d481b9e15f61af34", size = 3458005 }, + { url = "https://files.pythonhosted.org/packages/af/f8/5a8df450a9f55ca8441f22ea0351d8c77809fc121498b6970daaaf667a21/fastavro-1.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a275e48df0b1701bb764b18a8a21900b24cf882263cb03d35ecdba636bbc830b", size = 3295258 }, + { url = "https://files.pythonhosted.org/packages/99/b2/40f25299111d737e58b85696e91138a66c25b7334f5357e7ac2b0e8966f8/fastavro-1.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2de72d786eb38be6b16d556b27232b1bf1b2797ea09599507938cdb7a9fe3e7c", size = 3430328 }, + { url = "https://files.pythonhosted.org/packages/e0/07/85157a7c57c5f8b95507d7829b5946561e5ee656ff80e9dd9a757f53ddaf/fastavro-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:9090f0dee63fe022ee9cc5147483366cc4171c821644c22da020d6b48f576b4f", size = 444140 }, + { url = "https://files.pythonhosted.org/packages/bb/57/26d5efef9182392d5ac9f253953c856ccb66e4c549fd3176a1e94efb05c9/fastavro-1.12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:78df838351e4dff9edd10a1c41d1324131ffecbadefb9c297d612ef5363c049a", size = 1000599 }, + { url = "https://files.pythonhosted.org/packages/33/cb/8ab55b21d018178eb126007a56bde14fd01c0afc11d20b5f2624fe01e698/fastavro-1.12.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:780476c23175d2ae457c52f45b9ffa9d504593499a36cd3c1929662bf5b7b14b", size = 3335933 }, + { url = "https://files.pythonhosted.org/packages/fe/03/9c94ec9bf873eb1ffb0aa694f4e71940154e6e9728ddfdc46046d7e8ced4/fastavro-1.12.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0714b285160fcd515eb0455540f40dd6dac93bdeacdb03f24e8eac3d8aa51f8d", size = 3402066 }, + { url = "https://files.pythonhosted.org/packages/75/c8/cb472347c5a584ccb8777a649ebb28278fccea39d005fc7df19996f41df8/fastavro-1.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a8bc2dcec5843d499f2489bfe0747999108f78c5b29295d877379f1972a3d41a", size = 3240038 }, + { url = "https://files.pythonhosted.org/packages/e1/77/569ce9474c40304b3a09e109494e020462b83e405545b78069ddba5f614e/fastavro-1.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3b1921ac35f3d89090a5816b626cf46e67dbecf3f054131f84d56b4e70496f45", size = 3369398 }, + { url = "https://files.pythonhosted.org/packages/4a/1f/9589e35e9ea68035385db7bdbf500d36b8891db474063fb1ccc8215ee37c/fastavro-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:5aa777b8ee595b50aa084104cd70670bf25a7bbb9fd8bb5d07524b0785ee1699", size = 444220 }, + { url = "https://files.pythonhosted.org/packages/6c/d2/78435fe737df94bd8db2234b2100f5453737cffd29adee2504a2b013de84/fastavro-1.12.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c3d67c47f177e486640404a56f2f50b165fe892cc343ac3a34673b80cc7f1dd6", size = 1086611 }, + { url = "https://files.pythonhosted.org/packages/b6/be/428f99b10157230ddac77ec8cc167005b29e2bd5cbe228345192bb645f30/fastavro-1.12.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5217f773492bac43dae15ff2931432bce2d7a80be7039685a78d3fab7df910bd", size = 3541001 }, + { url = "https://files.pythonhosted.org/packages/16/08/a2eea4f20b85897740efe44887e1ac08f30dfa4bfc3de8962bdcbb21a5a1/fastavro-1.12.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:469fecb25cba07f2e1bfa4c8d008477cd6b5b34a59d48715e1b1a73f6160097d", size = 3432217 }, + { url = "https://files.pythonhosted.org/packages/87/bb/b4c620b9eb6e9838c7f7e4b7be0762834443adf9daeb252a214e9ad3178c/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d71c8aa841ef65cfab709a22bb887955f42934bced3ddb571e98fdbdade4c609", size = 3366742 }, + { url = "https://files.pythonhosted.org/packages/3d/d1/e69534ccdd5368350646fea7d93be39e5f77c614cca825c990bd9ca58f67/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:b81fc04e85dfccf7c028e0580c606e33aa8472370b767ef058aae2c674a90746", size = 3383743 }, + { url = "https://files.pythonhosted.org/packages/58/54/b7b4a0c3fb5fcba38128542da1b26c4e6d69933c923f493548bdfd63ab6a/fastavro-1.12.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9445da127751ba65975d8e4bdabf36bfcfdad70fc35b2d988e3950cce0ec0e7c", size = 1001377 }, + { url = "https://files.pythonhosted.org/packages/1e/4f/0e589089c7df0d8f57d7e5293fdc34efec9a3b758a0d4d0c99a7937e2492/fastavro-1.12.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6", size = 3320401 }, + { url = "https://files.pythonhosted.org/packages/f9/19/260110d56194ae29d7e423a336fccea8bcd103196d00f0b364b732bdb84e/fastavro-1.12.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3616e2f0e1c9265e92954fa099db79c6e7817356d3ff34f4bcc92699ae99697c", size = 3350894 }, + { url = "https://files.pythonhosted.org/packages/d0/96/58b0411e8be9694d5972bee3167d6c1fd1fdfdf7ce253c1a19a327208f4f/fastavro-1.12.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cb0337b42fd3c047fcf0e9b7597bd6ad25868de719f29da81eabb6343f08d399", size = 3229644 }, + { url = "https://files.pythonhosted.org/packages/5b/db/38660660eac82c30471d9101f45b3acfdcbadfe42d8f7cdb129459a45050/fastavro-1.12.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:64961ab15b74b7c168717bbece5660e0f3d457837c3cc9d9145181d011199fa7", size = 3329704 }, + { url = "https://files.pythonhosted.org/packages/9d/a9/1672910f458ecb30b596c9e59e41b7c00309b602a0494341451e92e62747/fastavro-1.12.1-cp314-cp314-win_amd64.whl", hash = "sha256:792356d320f6e757e89f7ac9c22f481e546c886454a6709247f43c0dd7058004", size = 452911 }, + { url = "https://files.pythonhosted.org/packages/dc/8d/2e15d0938ded1891b33eff252e8500605508b799c2e57188a933f0bd744c/fastavro-1.12.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:120aaf82ac19d60a1016afe410935fe94728752d9c2d684e267e5b7f0e70f6d9", size = 3541999 }, + { url = "https://files.pythonhosted.org/packages/a7/1c/6dfd082a205be4510543221b734b1191299e6a1810c452b6bc76dfa6968e/fastavro-1.12.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6a3462934b20a74f9ece1daa49c2e4e749bd9a35fa2657b53bf62898fba80f5", size = 3433972 }, + { url = "https://files.pythonhosted.org/packages/24/90/9de694625a1a4b727b1ad0958d220cab25a9b6cf7f16a5c7faa9ea7b2261/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1f81011d54dd47b12437b51dd93a70a9aa17b61307abf26542fc3c13efbc6c51", size = 3368752 }, + { url = "https://files.pythonhosted.org/packages/fa/93/b44f67589e4d439913dab6720f7e3507b0fa8b8e56d06f6fc875ced26afb/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:43ded16b3f4a9f1a42f5970c2aa618acb23ea59c4fcaa06680bdf470b255e5a8", size = 3386636 }, +] + +[[package]] +name = "filelock" +version = "3.20.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701 }, +] + +[[package]] +name = "fsspec" +version = "2026.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/7d/5df2650c57d47c57232af5ef4b4fdbff182070421e405e0d62c6cdbfaa87/fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b", size = 310496 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/c9/97cc5aae1648dcb851958a3ddf73ccd7dbe5650d95203ecb4d7720b4cdbf/fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc", size = 201838 }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + +[[package]] +name = "hf-xet" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870 }, + { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584 }, + { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004 }, + { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636 }, + { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448 }, + { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401 }, + { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866 }, + { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861 }, + { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699 }, + { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885 }, + { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550 }, + { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010 }, + { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264 }, + { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071 }, + { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099 }, + { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178 }, + { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214 }, + { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054 }, + { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812 }, + { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920 }, + { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735 }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, +] + +[[package]] +name = "huggingface-hub" +version = "1.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "shellingham" }, + { name = "tqdm" }, + { name = "typer-slim" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/25/74af9d16cd59ae15b12467a79a84aa0fe24be4aba68fc4da0c1864d49c17/huggingface_hub-1.3.4.tar.gz", hash = "sha256:c20d5484a611b7b7891d272e8fc9f77d5de025b0480bdacfa858efb3780b455f", size = 627683 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/07/3d0c34c345043c6a398a5882e196b2220dc5861adfa18322448b90908f26/huggingface_hub-1.3.4-py3-none-any.whl", hash = "sha256:a0c526e76eb316e96a91e8a1a7a93cf66b0dd210be1a17bd5fc5ae53cba76bfd", size = 536611 }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, +] + +[[package]] +name = "jiter" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449 }, + { url = "https://files.pythonhosted.org/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855 }, + { url = "https://files.pythonhosted.org/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171 }, + { url = "https://files.pythonhosted.org/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590 }, + { url = "https://files.pythonhosted.org/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462 }, + { url = "https://files.pythonhosted.org/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983 }, + { url = "https://files.pythonhosted.org/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328 }, + { url = "https://files.pythonhosted.org/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740 }, + { url = "https://files.pythonhosted.org/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875 }, + { url = "https://files.pythonhosted.org/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457 }, + { url = "https://files.pythonhosted.org/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546 }, + { url = "https://files.pythonhosted.org/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196 }, + { url = "https://files.pythonhosted.org/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100 }, + { url = "https://files.pythonhosted.org/packages/3d/a6/97209693b177716e22576ee1161674d1d58029eb178e01866a0422b69224/jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e", size = 313658 }, + { url = "https://files.pythonhosted.org/packages/06/4d/125c5c1537c7d8ee73ad3d530a442d6c619714b95027143f1b61c0b4dfe0/jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1", size = 318605 }, + { url = "https://files.pythonhosted.org/packages/99/bf/a840b89847885064c41a5f52de6e312e91fa84a520848ee56c97e4fa0205/jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf", size = 349803 }, + { url = "https://files.pythonhosted.org/packages/8a/88/e63441c28e0db50e305ae23e19c1d8fae012d78ed55365da392c1f34b09c/jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44", size = 365120 }, + { url = "https://files.pythonhosted.org/packages/0a/7c/49b02714af4343970eb8aca63396bc1c82fa01197dbb1e9b0d274b550d4e/jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45", size = 479918 }, + { url = "https://files.pythonhosted.org/packages/69/ba/0a809817fdd5a1db80490b9150645f3aae16afad166960bcd562be194f3b/jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87", size = 379008 }, + { url = "https://files.pythonhosted.org/packages/5f/c3/c9fc0232e736c8877d9e6d83d6eeb0ba4e90c6c073835cc2e8f73fdeef51/jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed", size = 361785 }, + { url = "https://files.pythonhosted.org/packages/96/61/61f69b7e442e97ca6cd53086ddc1cf59fb830549bc72c0a293713a60c525/jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9", size = 386108 }, + { url = "https://files.pythonhosted.org/packages/e9/2e/76bb3332f28550c8f1eba3bf6e5efe211efda0ddbbaf24976bc7078d42a5/jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626", size = 519937 }, + { url = "https://files.pythonhosted.org/packages/84/d6/fa96efa87dc8bff2094fb947f51f66368fa56d8d4fc9e77b25d7fbb23375/jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c", size = 510853 }, + { url = "https://files.pythonhosted.org/packages/8a/28/93f67fdb4d5904a708119a6ab58a8f1ec226ff10a94a282e0215402a8462/jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de", size = 204699 }, + { url = "https://files.pythonhosted.org/packages/c4/1f/30b0eb087045a0abe2a5c9c0c0c8da110875a1d3be83afd4a9a4e548be3c/jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a", size = 204258 }, + { url = "https://files.pythonhosted.org/packages/2c/f4/2b4daf99b96bce6fc47971890b14b2a36aef88d7beb9f057fafa032c6141/jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60", size = 185503 }, + { url = "https://files.pythonhosted.org/packages/39/ca/67bb15a7061d6fe20b9b2a2fd783e296a1e0f93468252c093481a2f00efa/jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6", size = 317965 }, + { url = "https://files.pythonhosted.org/packages/18/af/1788031cd22e29c3b14bc6ca80b16a39a0b10e611367ffd480c06a259831/jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4", size = 345831 }, + { url = "https://files.pythonhosted.org/packages/05/17/710bf8472d1dff0d3caf4ced6031060091c1320f84ee7d5dcbed1f352417/jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb", size = 361272 }, + { url = "https://files.pythonhosted.org/packages/fb/f1/1dcc4618b59761fef92d10bcbb0b038b5160be653b003651566a185f1a5c/jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7", size = 204604 }, + { url = "https://files.pythonhosted.org/packages/d9/32/63cb1d9f1c5c6632a783c0052cde9ef7ba82688f7065e2f0d5f10a7e3edb/jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3", size = 185628 }, + { url = "https://files.pythonhosted.org/packages/a8/99/45c9f0dbe4a1416b2b9a8a6d1236459540f43d7fb8883cff769a8db0612d/jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525", size = 312478 }, + { url = "https://files.pythonhosted.org/packages/4c/a7/54ae75613ba9e0f55fcb0bc5d1f807823b5167cc944e9333ff322e9f07dd/jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49", size = 318706 }, + { url = "https://files.pythonhosted.org/packages/59/31/2aa241ad2c10774baf6c37f8b8e1f39c07db358f1329f4eb40eba179c2a2/jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1", size = 351894 }, + { url = "https://files.pythonhosted.org/packages/54/4f/0f2759522719133a9042781b18cc94e335b6d290f5e2d3e6899d6af933e3/jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e", size = 365714 }, + { url = "https://files.pythonhosted.org/packages/dc/6f/806b895f476582c62a2f52c453151edd8a0fde5411b0497baaa41018e878/jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e", size = 478989 }, + { url = "https://files.pythonhosted.org/packages/86/6c/012d894dc6e1033acd8db2b8346add33e413ec1c7c002598915278a37f79/jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff", size = 378615 }, + { url = "https://files.pythonhosted.org/packages/87/30/d718d599f6700163e28e2c71c0bbaf6dace692e7df2592fd793ac9276717/jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a", size = 364745 }, + { url = "https://files.pythonhosted.org/packages/8f/85/315b45ce4b6ddc7d7fceca24068543b02bdc8782942f4ee49d652e2cc89f/jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a", size = 386502 }, + { url = "https://files.pythonhosted.org/packages/74/0b/ce0434fb40c5b24b368fe81b17074d2840748b4952256bab451b72290a49/jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67", size = 519845 }, + { url = "https://files.pythonhosted.org/packages/e8/a3/7a7a4488ba052767846b9c916d208b3ed114e3eb670ee984e4c565b9cf0d/jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b", size = 510701 }, + { url = "https://files.pythonhosted.org/packages/c3/16/052ffbf9d0467b70af24e30f91e0579e13ded0c17bb4a8eb2aed3cb60131/jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42", size = 205029 }, + { url = "https://files.pythonhosted.org/packages/e4/18/3cf1f3f0ccc789f76b9a754bdb7a6977e5d1d671ee97a9e14f7eb728d80e/jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf", size = 204960 }, + { url = "https://files.pythonhosted.org/packages/02/68/736821e52ecfdeeb0f024b8ab01b5a229f6b9293bbdb444c27efade50b0f/jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451", size = 185529 }, + { url = "https://files.pythonhosted.org/packages/30/61/12ed8ee7a643cce29ac97c2281f9ce3956eb76b037e88d290f4ed0d41480/jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7", size = 318974 }, + { url = "https://files.pythonhosted.org/packages/2d/c6/f3041ede6d0ed5e0e79ff0de4c8f14f401bbf196f2ef3971cdbe5fd08d1d/jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684", size = 345932 }, + { url = "https://files.pythonhosted.org/packages/d5/5d/4d94835889edd01ad0e2dbfc05f7bdfaed46292e7b504a6ac7839aa00edb/jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c", size = 367243 }, + { url = "https://files.pythonhosted.org/packages/fd/76/0051b0ac2816253a99d27baf3dda198663aff882fa6ea7deeb94046da24e/jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d", size = 479315 }, + { url = "https://files.pythonhosted.org/packages/70/ae/83f793acd68e5cb24e483f44f482a1a15601848b9b6f199dacb970098f77/jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993", size = 380714 }, + { url = "https://files.pythonhosted.org/packages/b1/5e/4808a88338ad2c228b1126b93fcd8ba145e919e886fe910d578230dabe3b/jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f", size = 365168 }, + { url = "https://files.pythonhosted.org/packages/0c/d4/04619a9e8095b42aef436b5aeb4c0282b4ff1b27d1db1508df9f5dc82750/jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783", size = 387893 }, + { url = "https://files.pythonhosted.org/packages/17/ea/d3c7e62e4546fdc39197fa4a4315a563a89b95b6d54c0d25373842a59cbe/jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b", size = 520828 }, + { url = "https://files.pythonhosted.org/packages/cc/0b/c6d3562a03fd767e31cb119d9041ea7958c3c80cb3d753eafb19b3b18349/jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6", size = 511009 }, + { url = "https://files.pythonhosted.org/packages/aa/51/2cb4468b3448a8385ebcd15059d325c9ce67df4e2758d133ab9442b19834/jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183", size = 205110 }, + { url = "https://files.pythonhosted.org/packages/b2/c5/ae5ec83dec9c2d1af805fd5fe8f74ebded9c8670c5210ec7820ce0dbeb1e/jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873", size = 205223 }, + { url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564 }, +] + +[[package]] +name = "nodeenv" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438 }, +] + +[[package]] +name = "nvisy-dal" +version = "0.1.0" +source = { editable = "../nvisy-dal" } +dependencies = [ + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiomysql", marker = "extra == 'mysql'", specifier = ">=0.2" }, + { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30" }, + { name = "azure-storage-blob", marker = "extra == 'azure'", specifier = ">=12.23" }, + { name = "boto3", marker = "extra == 's3'", specifier = ">=1.35" }, + { name = "google-cloud-storage", marker = "extra == 'gcs'", specifier = ">=2.18" }, + { name = "moto", marker = "extra == 'dev'", specifier = ">=5.0" }, + { name = "nvisy-dal", extras = ["all"], marker = "extra == 'dev'" }, + { name = "nvisy-dal", extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone"], marker = "extra == 'all'" }, + { name = "pinecone-client", marker = "extra == 'pinecone'", specifier = ">=5.0" }, + { name = "pydantic", specifier = ">=2.10" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24" }, + { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.12" }, + { name = "types-boto3", marker = "extra == 's3'" }, +] +provides-extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone", "all", "dev"] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.408" }, + { name = "ruff", specifier = ">=0.14.14" }, +] + +[[package]] +name = "nvisy-rig" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "nvisy-dal" }, + { name = "pydantic" }, +] + +[package.optional-dependencies] +all = [ + { name = "anthropic" }, + { name = "cohere" }, + { name = "openai" }, +] +anthropic = [ + { name = "anthropic" }, +] +cohere = [ + { name = "cohere" }, +] +dev = [ + { name = "anthropic" }, + { name = "cohere" }, + { name = "openai" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, +] +openai = [ + { name = "openai" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pyright" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.40" }, + { name = "cohere", marker = "extra == 'cohere'", specifier = ">=5.13" }, + { name = "nvisy-dal", editable = "../nvisy-dal" }, + { name = "nvisy-rig", extras = ["all"], marker = "extra == 'dev'" }, + { name = "nvisy-rig", extras = ["openai", "anthropic", "cohere"], marker = "extra == 'all'" }, + { name = "openai", marker = "extra == 'openai'", specifier = ">=1.60" }, + { name = "pydantic", specifier = ">=2.10" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24" }, +] +provides-extras = ["openai", "anthropic", "cohere", "all", "dev"] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.408" }, + { name = "ruff", specifier = ">=0.14.14" }, +] + +[[package]] +name = "openai" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879 }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366 }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580 }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990 }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003 }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200 }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578 }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504 }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816 }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366 }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698 }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603 }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591 }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068 }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908 }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145 }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179 }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403 }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206 }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307 }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258 }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917 }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186 }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164 }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146 }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788 }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133 }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852 }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679 }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766 }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005 }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622 }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725 }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040 }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691 }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897 }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302 }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877 }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680 }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960 }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102 }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039 }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126 }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489 }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288 }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255 }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760 }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092 }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385 }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832 }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585 }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078 }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914 }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560 }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244 }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955 }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "pyright" +version = "1.1.408" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nodeenv" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/b2/5db700e52554b8f025faa9c3c624c59f1f6c8841ba81ab97641b54322f16/pyright-1.1.408.tar.gz", hash = "sha256:f28f2321f96852fa50b5829ea492f6adb0e6954568d1caa3f3af3a5f555eb684", size = 4400578 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/82/a2c93e32800940d9573fb28c346772a14778b84ba7524e691b324620ab89/pyright-1.1.408-py3-none-any.whl", hash = "sha256:090b32865f4fdb1e0e6cd82bf5618480d48eecd2eb2e70f960982a3d9a4c17c1", size = 6399144 }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801 }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 }, +] + +[[package]] +name = "ruff" +version = "0.14.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650 }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245 }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273 }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753 }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052 }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637 }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761 }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701 }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455 }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882 }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549 }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416 }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491 }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525 }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626 }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442 }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486 }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448 }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "tokenizers" +version = "0.22.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275 }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472 }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736 }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835 }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673 }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818 }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195 }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982 }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245 }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069 }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263 }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429 }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363 }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786 }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133 }, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, +] + +[[package]] +name = "typer-slim" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444 }, +] + +[[package]] +name = "types-requests" +version = "2.32.4.20260107" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/f3/a0663907082280664d745929205a89d41dffb29e89a50f753af7d57d0a96/types_requests-2.32.4.20260107.tar.gz", hash = "sha256:018a11ac158f801bfa84857ddec1650750e393df8a004a8a9ae2a9bec6fcb24f", size = 23165 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1c/12/709ea261f2bf91ef0a26a9eed20f2623227a8ed85610c1e54c5805692ecb/types_requests-2.32.4.20260107-py3-none-any.whl", hash = "sha256:b703fe72f8ce5b31ef031264fe9395cac8f46a04661a79f7ed31a80fb308730d", size = 20676 }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611 }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584 }, +]