diff --git a/src/lib.rs b/src/lib.rs index 6417134..0bcbb42 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,7 +68,7 @@ pub fn parse(input: &str, options: ParserOptions) -> Result, ParseError /// } /// ``` pub fn parse_query_selector(input: &str) -> Option> { - let selector = queryselector::Parser::new(input.as_bytes()).selector()?; + let selector = queryselector::Parser::new(input.as_bytes()).parse_selectors_group()?; Some(selector) } diff --git a/src/queryselector/parser.rs b/src/queryselector/parser.rs index 26a8aa2..429f2ae 100644 --- a/src/queryselector/parser.rs +++ b/src/queryselector/parser.rs @@ -40,40 +40,6 @@ impl<'a> Parser<'a> { self.stream.slice(start, self.stream.idx) } - fn parse_combinator(&mut self, left: Selector<'a>) -> Option> { - let has_whitespaces = self.skip_whitespaces(); - - let tok = if let Some(tok) = self.stream.current_cpy() { - tok - } else { - return Some(left); - }; - - let combinator = match tok { - b',' => { - self.stream.advance(); - let right = self.selector()?; - Selector::Or(Box::new(left), Box::new(right)) - } - b'>' => { - self.stream.advance(); - let right = self.selector()?; - Selector::Parent(Box::new(left), Box::new(right)) - } - _ if has_whitespaces => { - let right = self.selector()?; - Selector::Descendant(Box::new(left), Box::new(right)) - } - _ if !has_whitespaces => { - let right = self.selector()?; - Selector::And(Box::new(left), Box::new(right)) - } - _ => unreachable!(), - }; - - Some(combinator) - } - fn parse_attribute(&mut self) -> Option> { let attribute = self.read_identifier(); let ty = match self.stream.current_cpy() { @@ -115,37 +81,89 @@ impl<'a> Parser<'a> { Some(ty) } - /// Parses a full selector - pub fn selector(&mut self) -> Option> { + /// Parses a selectors group production as defined in the [grammar provided by the W3C]. + /// + /// [grammar provided by the W3C]: https://www.w3.org/TR/selectors-3/#w3cselgrammar + pub fn parse_selectors_group(&mut self) -> Option> { + let mut left = self.parse_selector()?; + + while let Some(right) = self.parse_selector() { + left = Selector::Or(Box::new(left), Box::new(right)); + } + + Some(left) + } + + /// Parses a selector production as defined in the [grammar provided by the W3C]. + /// + /// [grammar provided by the W3C]: https://www.w3.org/TR/selectors-3/#w3cselgrammar + fn parse_selector(&mut self) -> Option> { + let mut left = self.parse_simple_selector_sequence()?; + let mut has_whitespaces = self.skip_whitespaces(); + + while let Some(tok) = self.stream.current_cpy() { + match tok { + b',' => { + self.stream.advance(); + return Some(left); + } + b'>' => { + self.stream.advance(); + let right = self.parse_simple_selector_sequence()?; + left = Selector::Parent(Box::new(left), Box::new(right)); + } + _ if has_whitespaces => { + let right = self.parse_simple_selector_sequence()?; + left = Selector::Descendant(Box::new(left), Box::new(right)); + } + _ => unreachable!(), + } + + has_whitespaces = self.skip_whitespaces(); + } + + Some(left) + } + + /// Parses a simple selector sequence production as defined in the [grammar provided by the W3C]. + /// + /// [grammar provided by the W3C]: https://www.w3.org/TR/selectors-3/#w3cselgrammar + fn parse_simple_selector_sequence(&mut self) -> Option> { + let mut result = None; self.skip_whitespaces(); - let tok = self.stream.current_cpy()?; - let left = match tok { - b'#' => { + while let Some(right) = match self.stream.current_cpy() { + Some(b'#') => { self.stream.advance(); let id = self.read_identifier(); - Selector::Id(id) + Some(Selector::Id(id)) } - b'.' => { + Some(b'.') => { self.stream.advance(); let class = self.read_identifier(); - Selector::Class(class) + Some(Selector::Class(class)) } - b'*' => { + Some(b'*') => { self.stream.advance(); - Selector::All + Some(Selector::All) } - b'[' => { + Some(b'[') => { self.stream.advance(); - self.parse_attribute()? + self.parse_attribute() } - _ if util::is_ident(tok) => { + Some(tok) if util::is_ident(tok) => { let tag = self.read_identifier(); - Selector::Tag(tag) + Some(Selector::Tag(tag)) } - _ => return None, - }; + _ => None, + } { + if let Some(left) = result { + result = Some(Selector::And(Box::new(left), Box::new(right))); + } else { + result = Some(right); + } + } - self.parse_combinator(left) + result } } diff --git a/src/tests.rs b/src/tests.rs index 046861a..82f6b29 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -511,6 +511,8 @@ fn unquoted() { } mod query_selector { + use crate::queryselector::Selector; + use super::*; #[test] fn query_selector_simple() { @@ -611,6 +613,137 @@ mod query_selector { assert_eq!(value, Some("hello".to_string())); } + + #[test] + fn parse_query_selector_whitespace_before() { + let selector = crate::parse_query_selector(" .hello").unwrap(); + assert!(matches!(selector, Selector::Class(b"hello"))); + } + + #[test] + fn parse_query_selector_whitespace_after() { + let selector = crate::parse_query_selector(".hello ").unwrap(); + assert!(matches!(selector, Selector::Class(b"hello"))); + } + + #[test] + fn parse_query_selector_whitespace_before_and_after() { + let selector = crate::parse_query_selector(" .hello ").unwrap(); + assert!(matches!(selector, Selector::Class(b"hello"))); + } + + #[test] + fn parse_query_selector_simple_class() { + let selector = crate::parse_query_selector(".hello").unwrap(); + assert!(matches!(selector, Selector::Class(b"hello"))); + } + + #[test] + fn parse_query_selector_simple_or() { + let selector = crate::parse_query_selector(".hello, world").unwrap(); + assert!( + matches!(selector, Selector::Or(left, right) if matches!(&*left, Selector::Class(b"hello")) && matches!(&*right, Selector::Tag(b"world"))) + ); + } + + #[test] + fn parse_query_selector_simple_and() { + let selector = crate::parse_query_selector("hello.world").unwrap(); + assert!( + matches!(selector, Selector::And(left, right) if matches!(&*left, Selector::Tag(b"hello")) && matches!(&*right, Selector::Class(b"world"))) + ); + } + + #[test] + fn parse_query_selector_simple_descendant() { + let selector = crate::parse_query_selector("hello .world").unwrap(); + assert!( + matches!(selector, Selector::Descendant(left, right) if matches!(&*left, Selector::Tag(b"hello")) && matches!(&*right, Selector::Class(b"world"))) + ); + } + + #[test] + fn parse_query_selector_simple_parent() { + let selector = crate::parse_query_selector("hello > .world").unwrap(); + assert!( + matches!(selector, Selector::Parent(left, right) if matches!( + &*left, Selector::Tag(b"hello") + ) && matches!(&*right, Selector::Class(b"world"))) + ); + } + + #[test] + fn parse_query_selector_list_descendant() { + let selector = crate::parse_query_selector("hello .world, world.hello").unwrap(); + assert!(matches!( + selector, + Selector::Or(left, right) if matches!( + &*left, Selector::Descendant(left, right) if matches!( + &**left, Selector::Tag(b"hello") + ) && matches!(&**right, Selector::Class(b"world")) + ) && matches!( + &*right, Selector::And(left, right) if matches!( + &**left, Selector::Tag(b"world") + ) && matches!(&**right, Selector::Class(b"hello")) + ) + )); + } + + #[test] + fn parse_query_selector_list_parent() { + let selector = crate::parse_query_selector("hello > .world, world.hello").unwrap(); + assert!(matches!( + selector, + Selector::Or(left, right) if matches!( + &*left, Selector::Parent(left, right) if matches!( + &**left, Selector::Tag(b"hello") + ) && matches!(&**right, Selector::Class(b"world")) + ) && matches!( + &*right, Selector::And(left, right) if matches!( + &**left, Selector::Tag(b"world") + ) && matches!(&**right, Selector::Class(b"hello")) + ) + )); + } + + #[test] + fn parse_query_selector_list_descendant_and_parent() { + let selector = crate::parse_query_selector("hello .world, world > .hello.world").unwrap(); + assert!(matches!( + selector, + Selector::Or(left, right) if matches!( + &*left, Selector::Descendant(left, right) if matches!( + &**left, Selector::Tag(b"hello") + ) && matches!(&**right, Selector::Class(b"world")) + ) && matches!( + &*right, Selector::Parent(left, right) if matches!( + &**left, Selector::Tag(b"world") + ) && matches!( + &**right, Selector::And(left, right) if matches!( + &**left, Selector::Class(b"hello") + ) && matches!(&**right, Selector::Class(b"world")) + ) + ) + )); + } + + /// Tests that the parser properly handles both the descendant and child (parent) combinators as in the [Selectors Level 3 Recommendation]. + /// + /// [Selectors Level 3 Recommendation]: https://www.w3.org/TR/selectors-3/#child-combinators + #[test] + fn parse_query_selector_child_combinators() { + let selector = crate::parse_query_selector("div ol>li p").unwrap(); + assert!(matches!( + selector, + Selector::Descendant(left, right) if matches!( + &*left, Selector::Parent(left, right) if matches!( + &**left, Selector::Descendant(left, right) if matches!( + &**left, Selector::Tag(b"div") + ) && matches!(&**right, Selector::Tag(b"ol")) + ) && matches!(&**right, Selector::Tag(b"li")) + ) && matches!(&*right, Selector::Tag(b"p")) + )); + } } #[test]