diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..657b839 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,402 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + +[[package]] +name = "ariadne" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8454c8a44ce2cb9cc7e7fae67fc6128465b343b92c6631e94beca3c8d1524ea5" +dependencies = [ + "unicode-width", + "yansi", +] + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "cc" +version = "1.2.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chumsky" +version = "1.0.0-alpha.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e82d74e6c83060ec269fe9e0d408d6de4a1645d525f9a0bbbb841ba4efd91ac" +dependencies = [ + "hashbrown", + "regex-automata", + "serde", + "stacker", + "unicode-ident", + "unicode-segmentation", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "logos" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax 0.8.8", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "maml" +version = "0.0.0" +dependencies = [ + "ariadne", + "chumsky", + "logos", + "serde", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys", +] + +[[package]] +name = "syn" +version = "2.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" diff --git a/Cargo.toml b/Cargo.toml index 0cc4829..76a58b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,11 @@ license-file = "LICENSE" readme = "README.md" [dependencies] +ariadne = "0.6.0" +chumsky = "1.0.0-alpha.7" +logos = "0.15.1" +serde = { version = "1.0", optional = true, features = ["derive"]} + +[features] +default = [] +serde = ["dep:serde"] diff --git a/maml.js b/maml.js new file mode 160000 index 0000000..5d1521a --- /dev/null +++ b/maml.js @@ -0,0 +1 @@ +Subproject commit 5d1521ac22378ed82d1c4eac10123de4f36e89ff diff --git a/src/lib.rs b/src/lib.rs index b93cf3f..ea73dc2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,77 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right +use std::collections::HashMap; + +pub mod parser; +pub mod tokenizer; +mod utils; + +/// MAML AST +#[derive(Debug, Clone, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] +pub enum MamlValue { + Null, + Bool(bool), + Int(i64), + Float(f64), + String(String), + Array(Vec), + Object(HashMap), } #[cfg(test)] mod tests { + use crate::parser::from_str; + use super::*; #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); + fn test_basic_values() { + assert!(matches!(from_str("null").unwrap(), MamlValue::Null)); + assert!(matches!(from_str("true").unwrap(), MamlValue::Bool(true))); + assert!(matches!(from_str("42").unwrap(), MamlValue::Int(42))); + assert!(matches!(from_str("3.14").unwrap(), MamlValue::Float(_))); + } + + #[test] + fn test_string() { + let val = from_str(r#""hello world""#).unwrap(); + assert!(matches!(val, MamlValue::String(s) if s == "hello world")); + } + + #[test] + fn test_array() { + let val = from_str("[1, 2, 3]").unwrap(); + if let MamlValue::Array(arr) = val { + assert_eq!(arr.len(), 3); + } else { + panic!("Expected array"); + } + } + + #[test] + fn test_object() { + let val = from_str(r#"{ name: "test", value: 42 }"#).unwrap(); + if let MamlValue::Object(obj) = val { + assert_eq!(obj.len(), 2); + assert!(obj.contains_key("name")); + } else { + panic!("Expected object"); + } + } + + #[test] + fn test_raw_string() { + let val = from_str( + r#"""" +hello +world +""""#, + ) + .unwrap(); + if let MamlValue::String(s) = val { + assert_eq!(s, "hello\nworld\n"); + } else { + panic!("Expected string"); + } } } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..dc2fddd --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,226 @@ +use std::collections::HashMap; + +use ariadne::{Color, Label, Report, ReportKind, Source}; +use chumsky::prelude::*; +use logos::Logos; + +use crate::{MamlValue, tokenizer::Token}; + +/// Raw parser entrypoint +pub fn parser<'src>() -> impl Parser<'src, &'src [Token], MamlValue, extra::Err>> +{ + recursive(|value| { + // Separator: comma or newline + let separator = choice((just(Token::Comma).ignored(), just(Token::Newline).ignored())); + + // The number types + let number = choice(( + select! { Token::Float(f) => MamlValue::Float(f) }, + select! { Token::Int(i) => MamlValue::Int(i) }, + )) + .labelled("number"); + + // Strings, raw or typical + let string_val = choice(( + select! { Token::RawString(s) => MamlValue::String(s) }, + select! { Token::String(s) => MamlValue::String(s) }, + )); + + // Handling object keys + let key = choice(( + select! { Token::String(s) => s }, + select! { Token::Key(s) => s }, + )); + + let array = value + .clone() + .separated_by(separator.clone().repeated().at_least(1)) + .allow_trailing() + .collect() + .padded_by(just(Token::Newline).repeated()) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .map(MamlValue::Array) + .labelled("array") + .recover_with(via_parser(nested_delimiters( + Token::LBracket, + Token::RBracket, + [ + (Token::LBracket, Token::RBracket), + (Token::LBrace, Token::RBrace), + ], + |_| MamlValue::Array(vec![]), + ))); + + // Object parsing + let member = key.then_ignore(just(Token::Colon)).then(value.clone()); + + let object = member + .separated_by(separator.repeated().at_least(1)) + .allow_trailing() + .collect() + .padded_by(just(Token::Newline).repeated()) + .delimited_by(just(Token::LBrace), just(Token::RBrace)) + .map(MamlValue::Object) + .labelled("object") + .recover_with(via_parser(nested_delimiters( + Token::LBrace, + Token::RBrace, + [ + (Token::LBracket, Token::RBracket), + (Token::LBrace, Token::RBrace), + ], + |_| MamlValue::Object(HashMap::new()), + ))); + + // Entry point/top-level choice + choice(( + just(Token::Null).to(MamlValue::Null), + just(Token::True).to(MamlValue::Bool(true)), + just(Token::False).to(MamlValue::Bool(false)), + number, + string_val, + array, + object, + )) + }) +} + +/// Parse from string +pub fn from_str(input: &str) -> Result { + // Tokenize + let lexer = Token::lexer(input); + let mut tokens = vec![]; + + for (token_result, span) in lexer.spanned() { + match token_result { + Ok(token) => tokens.push(token), + Err(_) => { + return Err(format!("Lexer error at {:?}", span)); + } + } + } + + // Parse + let (val, errs) = parser() + .padded_by(just(Token::Newline).repeated()) + .parse(&tokens) + .into_output_errors(); + + if !errs.is_empty() { + let mut buffer = Vec::new(); + for e in errs { + Report::build(ReportKind::Error, ("", e.span().into_range())) + .with_message(format!("{:?}", e)) + .with_label( + Label::new(("", e.span().into_range())) + .with_message(format!("{:?}", e.reason())) + .with_color(Color::Red), + ) + .finish() + .write(("", Source::from(input)), &mut buffer) + .unwrap(); + } + return Err(String::from_utf8_lossy(&buffer).to_string()); + } + + val.ok_or_else(|| "Unexpected parsing failure".to_string()) +} + +/// Parse with detailed error reporting to stderr +pub fn parse_with_report(filename: &str, input: &str) -> Option { + // Tokenize + let lexer = Token::lexer(input); + let mut tokens = vec![]; + + for (token_result, span) in lexer.spanned() { + match token_result { + Ok(token) => tokens.push(token), + Err(_) => { + eprintln!("Lexer error at {:?}", span); + return None; + } + } + } + + // Parse + let (val, errs) = parser() + .padded_by(just(Token::Newline).repeated()) + .parse(&tokens) + .into_output_errors(); + + if errs.is_empty() { + return val; + } + + for e in errs { + let span = e.span().into_range(); + Report::build(ReportKind::Error, (filename, span.clone())) + .with_message(format!("{}", e)) + .with_label( + Label::new((filename, span.clone())) + .with_message(e.reason().to_string()) + .with_color(Color::Red), + ) + .finish() + .eprint((filename, Source::from(input))) + .unwrap(); + } + + None +} + +#[cfg(test)] +mod tests { + use crate::parser::from_str; + + use super::*; + + #[test] + fn test_object() { + let val = from_str( + r#" + { + project: "MAML" + tags: [ + "minimal" + "readable" + ] + + # A simple nested object + spec: { + version: 1 + author: "Anton Medvedev" + } + + # Array of objects with nested objects + examples: [ + { + json: { + name: "JSON" + born: 2001 + } + } + { + maml: { + name: "MAML" + born: 2025 + } + } + ] + + notes: """ + This is a multiline strings. + Keeps formatting as-is. + """ + } + "#, + ) + .unwrap(); + if let MamlValue::Object(obj) = val { + assert_eq!(obj.len(), 5); + assert!(obj.contains_key("notes")); + } else { + panic!("Expected object"); + } + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..e746a9e --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,101 @@ +use logos::Logos; + +use crate::utils::parse_string; + +/// Token definitions for MAML +#[derive(Logos, Debug, Clone, PartialEq)] +#[logos(skip r"[ \t]+")] +pub enum Token { + #[token("null")] + Null, + + #[token("true")] + True, + + #[token("false")] + False, + + // Float MUST come before Int to ensure proper decimal matching, so priority three + #[regex(r"-?(?:0|[1-9][0-9]*)\.[0-9]+(?:[eE][+-]?[0-9]+)?", |lex| lex.slice().parse::().ok(), priority = 3)] + #[regex(r"-?(?:0|[1-9][0-9]*)[eE][+-]?[0-9]+", |lex| lex.slice().parse::().ok(), priority = 3)] + Float(f64), + + #[regex(r"-?(?:0|[1-9][0-9]*)", |lex| lex.slice().parse::().ok(), priority = 2)] + Int(i64), + + #[regex(r#""(?:[^"\\]|\\["\\/bfnrt]|\\u\{[0-9a-fA-F]{1,6}\})*""#, |lex| { + let s = lex.slice(); + parse_string(&s[1..s.len()-1]) + })] + String(String), + + // Surrounded by triple quotes + #[regex(r#""""([^"]|"[^"]|""[^"])*""""#, |lex| { + let s = lex.slice(); + let content = &s[3..s.len()-3]; // The content within the triple quotes (which take up three chars each) + + // Make sure triple quotes are checked + if content.contains(r#"""""#) { + return None; + } + + Some(content.strip_prefix('\n') + .or_else(|| content.strip_prefix("\r\n")) + .unwrap_or(content) + .to_string()) + })] + RawString(String), + + // An object key + #[regex(r"[a-zA-Z_-][a-zA-Z0-9_-]*", |lex| lex.slice().to_string(), priority = 1)] + #[regex(r"[0-9]+", |lex| lex.slice().to_string(), priority = 1)] + Key(String), + + #[token("[")] + LBracket, + + #[token("]")] + RBracket, + + #[token("{")] + LBrace, + + #[token("}")] + RBrace, + + #[token(":")] + Colon, + + #[token(",")] + Comma, + + #[regex(r"\r?\n")] + Newline, + + // Anything that comes after a # + #[regex(r"#[^\n]*", logos::skip)] + Comment, +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Token::Null => write!(f, "null"), + Token::True => write!(f, "true"), + Token::False => write!(f, "false"), + Token::Float(n) => write!(f, "{}", n), + Token::Int(n) => write!(f, "{}", n), + Token::String(s) => write!(f, "\"{}\"", s), + Token::RawString(s) => write!(f, "\"\"\"{}\"\"\"", s), + Token::Key(s) => write!(f, "{}", s), + Token::LBracket => write!(f, "["), + Token::RBracket => write!(f, "]"), + Token::LBrace => write!(f, "{{"), + Token::RBrace => write!(f, "}}"), + Token::Colon => write!(f, ":"), + Token::Comma => write!(f, ","), + Token::Newline => write!(f, "\\n"), + Token::Comment => write!(f, "#comment"), + } + } +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..c839f5b --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,41 @@ +/// Parse escape sequences in strings +pub(crate) fn parse_string(s: &str) -> Option { + let mut result = String::new(); + let mut chars = s.chars(); + + while let Some(ch) = chars.next() { + if ch == '\\' { + match chars.next()? { + '\\' => result.push('\\'), + '/' => result.push('/'), + '"' => result.push('"'), + 'b' => result.push('\x08'), + 'f' => result.push('\x0C'), + 'n' => result.push('\n'), + 'r' => result.push('\r'), + 't' => result.push('\t'), + 'u' => { + // Expect {XXXXXX} + if chars.next()? != '{' { + return None; + } + let mut hex = String::new(); + loop { + match chars.next()? { + '}' => break, + c if c.is_ascii_hexdigit() && hex.len() < 6 => hex.push(c), + _ => return None, + } + } + let code = u32::from_str_radix(&hex, 16).ok()?; + result.push(char::from_u32(code)?); + } + _ => return None, + } + } else { + result.push(ch); + } + } + + Some(result) +}