From 31b4398390e02767fd387c43ed53548413f53dcc Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 3 Oct 2023 16:01:43 -0400 Subject: [PATCH] syntax: box each AstKind variant This does reduce memory, but not as much as it is reduced if we don't box the Ast. --- regex-syntax/src/ast/mod.rs | 149 ++++++++++++++++++----------- regex-syntax/src/ast/parse.rs | 152 +++++++++++++++--------------- regex-syntax/src/ast/print.rs | 14 +-- regex-syntax/src/ast/visitor.rs | 2 +- regex-syntax/src/hir/translate.rs | 38 ++++---- 5 files changed, 192 insertions(+), 163 deletions(-) diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 6a6b58237..c346abcb6 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -443,77 +443,92 @@ pub struct Ast(pub Box); #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum AstKind { /// An empty regex that matches everything. - Empty(Span), + Empty(Box), /// A set of flags, e.g., `(?is)`. - Flags(SetFlags), + Flags(Box), /// A single character literal, which includes escape sequences. - Literal(Literal), + Literal(Box), /// The "any character" class. - Dot(Span), + Dot(Box), /// A single zero-width assertion. - Assertion(Assertion), - /// A single character class. This includes all forms of character classes - /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`. - Class(Class), + Assertion(Box), + /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`. + ClassUnicode(Box), + /// A single perl character class, e.g., `\d` or `\W`. + ClassPerl(Box), + /// A single bracketed character class set, which may contain zero or more + /// character ranges and/or zero or more nested classes. e.g., + /// `[a-zA-Z\pL]`. + ClassBracketed(Box), /// A repetition operator applied to an arbitrary regular expression. - Repetition(Repetition), + Repetition(Box), /// A grouped regular expression. - Group(Group), + Group(Box), /// An alternation of regular expressions. - Alternation(Alternation), + Alternation(Box), /// A concatenation of regular expressions. - Concat(Concat), + Concat(Box), } impl Ast { /// Create an "empty" AST item. pub fn empty(span: Span) -> Ast { - Ast(Box::new(AstKind::Empty(span))) + Ast(Box::new(AstKind::Empty(Box::new(span)))) } /// Create a "flags" AST item. pub fn flags(e: SetFlags) -> Ast { - Ast(Box::new(AstKind::Flags(e))) + Ast(Box::new(AstKind::Flags(Box::new(e)))) } /// Create a "literal" AST item. pub fn literal(e: Literal) -> Ast { - Ast(Box::new(AstKind::Literal(e))) + Ast(Box::new(AstKind::Literal(Box::new(e)))) } /// Create a "dot" AST item. pub fn dot(span: Span) -> Ast { - Ast(Box::new(AstKind::Dot(span))) + Ast(Box::new(AstKind::Dot(Box::new(span)))) } /// Create a "assertion" AST item. pub fn assertion(e: Assertion) -> Ast { - Ast(Box::new(AstKind::Assertion(e))) + Ast(Box::new(AstKind::Assertion(Box::new(e)))) + } + + /// Create a "Unicode class" AST item. + pub fn class_unicode(e: ClassUnicode) -> Ast { + Ast(Box::new(AstKind::ClassUnicode(Box::new(e)))) + } + + /// Create a "Perl class" AST item. + pub fn class_perl(e: ClassPerl) -> Ast { + Ast(Box::new(AstKind::ClassPerl(Box::new(e)))) } - /// Create a "class" AST item. - pub fn class(e: Class) -> Ast { - Ast(Box::new(AstKind::Class(e))) + /// Create a "bracketed class" AST item. + pub fn class_bracketed(e: ClassBracketed) -> Ast { + Ast(Box::new(AstKind::ClassBracketed(Box::new(e)))) } /// Create a "repetition" AST item. pub fn repetition(e: Repetition) -> Ast { - Ast(Box::new(AstKind::Repetition(e))) + Ast(Box::new(AstKind::Repetition(Box::new(e)))) } /// Create a "group" AST item. pub fn group(e: Group) -> Ast { - Ast(Box::new(AstKind::Group(e))) + Ast(Box::new(AstKind::Group(Box::new(e)))) } /// Create a "alternation" AST item. pub fn alternation(e: Alternation) -> Ast { - Ast(Box::new(AstKind::Alternation(e))) + Ast(Box::new(AstKind::Alternation(Box::new(e)))) } /// Create a "concat" AST item. pub fn concat(e: Concat) -> Ast { - Ast(Box::new(AstKind::Concat(e))) + Ast(Box::new(AstKind::Concat(Box::new(e)))) } /// Return the span of this abstract syntax tree. @@ -524,7 +539,9 @@ impl Ast { AstKind::Literal(ref x) => &x.span, AstKind::Dot(ref span) => span, AstKind::Assertion(ref x) => &x.span, - AstKind::Class(ref x) => x.span(), + AstKind::ClassUnicode(ref x) => &x.span, + AstKind::ClassPerl(ref x) => &x.span, + AstKind::ClassBracketed(ref x) => &x.span, AstKind::Repetition(ref x) => &x.span, AstKind::Group(ref x) => &x.span, AstKind::Alternation(ref x) => &x.span, @@ -548,8 +565,10 @@ impl Ast { | AstKind::Flags(_) | AstKind::Literal(_) | AstKind::Dot(_) - | AstKind::Assertion(_) => false, - AstKind::Class(_) + | AstKind::Assertion(_) + | AstKind::ClassUnicode(_) + | AstKind::ClassPerl(_) => false, + AstKind::ClassBracketed(_) | AstKind::Repetition(_) | AstKind::Group(_) | AstKind::Alternation(_) @@ -735,31 +754,6 @@ impl HexLiteralKind { } } -/// A single character class expression. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum Class { - /// A Unicode character class, e.g., `\pL` or `\p{Greek}`. - Unicode(ClassUnicode), - /// A perl character class, e.g., `\d` or `\W`. - Perl(ClassPerl), - /// A bracketed character class set, which may contain zero or more - /// character ranges and/or zero or more nested classes. e.g., - /// `[a-zA-Z\pL]`. - Bracketed(ClassBracketed), -} - -impl Class { - /// Return the span of this character class. - pub fn span(&self) -> &Span { - match *self { - Class::Perl(ref x) => &x.span, - Class::Unicode(ref x) => &x.span, - Class::Bracketed(ref x) => &x.span, - } - } -} - /// A Perl character class. #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] @@ -1610,8 +1604,10 @@ impl Drop for Ast { | AstKind::Literal(_) | AstKind::Dot(_) | AstKind::Assertion(_) - // Classes are recursive, so they get their own Drop impl. - | AstKind::Class(_) => return, + | AstKind::ClassUnicode(_) + | AstKind::ClassPerl(_) + // Bracketed classes are recursive, they get their own Drop impl. + | AstKind::ClassBracketed(_) => return, AstKind::Repetition(ref x) if !x.ast.has_subexprs() => return, AstKind::Group(ref x) if !x.ast.has_subexprs() => return, AstKind::Alternation(ref x) if x.asts.is_empty() => return, @@ -1629,8 +1625,11 @@ impl Drop for Ast { | AstKind::Literal(_) | AstKind::Dot(_) | AstKind::Assertion(_) - // Classes are recursive, so they get their own Drop impl. - | AstKind::Class(_) => {} + | AstKind::ClassUnicode(_) + | AstKind::ClassPerl(_) + // Bracketed classes are recursive, so they get their own Drop + // impl. + | AstKind::ClassBracketed(_) => {} AstKind::Repetition(ref mut x) => { stack.push(mem::replace(&mut x.ast, empty_ast())); } @@ -1754,4 +1753,42 @@ mod tests { .join() .unwrap(); } + + // This tests that our `Ast` has a reasonable size. This isn't a hard rule + // and it can be increased if given a good enough reason. But this test + // exists because the size of `Ast` was at one point over 200 bytes on a + // 64-bit target. Wow. + #[test] + fn ast_size() { + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + std::dbg!(core::mem::size_of::()); + + let max = core::mem::size_of::(); + let size = core::mem::size_of::(); + assert!( + size <= max, + "Ast size of {} bytes is bigger than suggested max {}", + size, + max + ); + + let max = 2 * core::mem::size_of::(); + let size = core::mem::size_of::(); + assert!( + size <= max, + "AstKind size of {} bytes is bigger than suggested max {}", + size, + max + ); + } } diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index b3f04bfdc..a87be0e02 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -56,8 +56,8 @@ impl Primitive { Primitive::Literal(lit) => Ast::literal(lit), Primitive::Assertion(assert) => Ast::assertion(assert), Primitive::Dot(span) => Ast::dot(span), - Primitive::Perl(cls) => Ast::class(ast::Class::Perl(cls)), - Primitive::Unicode(cls) => Ast::class(ast::Class::Unicode(cls)), + Primitive::Perl(cls) => Ast::class_perl(cls), + Primitive::Unicode(cls) => Ast::class_unicode(cls), } } @@ -850,7 +850,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { fn pop_class( &self, nested_union: ast::ClassSetUnion, - ) -> Result> { + ) -> Result> { assert_eq!(self.char(), ']'); let item = ast::ClassSet::Item(nested_union.into_item()); @@ -882,7 +882,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { set.span.end = self.pos(); set.kind = prevset; if stack.is_empty() { - Ok(Either::Right(ast::Class::Bracketed(set))) + Ok(Either::Right(set)) } else { union.push(ast::ClassSetItem::Bracketed(Box::new(set))); Ok(Either::Left(union)) @@ -976,7 +976,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { '|' => concat = self.push_alternate(concat)?, '[' => { let class = self.parse_set_class()?; - concat.asts.push(Ast::class(class)); + concat.asts.push(Ast::class_bracketed(class)); } '?' => { concat = self.parse_uncounted_repetition( @@ -1743,7 +1743,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// is successful, then the parser is advanced to the position immediately /// following the closing `]`. #[inline(never)] - fn parse_set_class(&self) -> Result { + fn parse_set_class(&self) -> Result { assert_eq!(self.char(), '['); let mut union = @@ -2189,12 +2189,12 @@ impl<'p, 's, P: Borrow> ast::Visitor for NestLimiter<'p, 's, P> { | AstKind::Literal(_) | AstKind::Dot(_) | AstKind::Assertion(_) - | AstKind::Class(ast::Class::Unicode(_)) - | AstKind::Class(ast::Class::Perl(_)) => { + | AstKind::ClassUnicode(_) + | AstKind::ClassPerl(_) => { // These are all base cases, so we don't increment depth. return Ok(()); } - AstKind::Class(ast::Class::Bracketed(ref x)) => &x.span, + AstKind::ClassBracketed(ref x) => &x.span, AstKind::Repetition(ref x) => &x.span, AstKind::Group(ref x) => &x.span, AstKind::Alternation(ref x) => &x.span, @@ -2210,12 +2210,12 @@ impl<'p, 's, P: Borrow> ast::Visitor for NestLimiter<'p, 's, P> { | AstKind::Literal(_) | AstKind::Dot(_) | AstKind::Assertion(_) - | AstKind::Class(ast::Class::Unicode(_)) - | AstKind::Class(ast::Class::Perl(_)) => { + | AstKind::ClassUnicode(_) + | AstKind::ClassPerl(_) => { // These are all base cases, so we don't decrement depth. Ok(()) } - AstKind::Class(ast::Class::Bracketed(_)) + AstKind::ClassBracketed(_) | AstKind::Repetition(_) | AstKind::Group(_) | AstKind::Alternation(_) @@ -2606,7 +2606,7 @@ mod tests { ); assert_eq!( parser_nest_limit("[a]", 1).parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: ast::ClassSet::Item(ast::ClassSetItem::Literal( @@ -2616,7 +2616,7 @@ mod tests { c: 'a', } )), - }))) + })) ); assert_eq!( parser_nest_limit("[ab]", 1).parse().unwrap_err(), @@ -4965,15 +4965,15 @@ bar assert_eq!( parser("[[:alnum:]]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..11), negated: false, kind: itemset(item_ascii(alnum(span(1..10), false))), - }))) + })) ); assert_eq!( parser("[[[:alnum:]]]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..13), negated: false, kind: itemset(item_bracket(ast::ClassBracketed { @@ -4981,11 +4981,11 @@ bar negated: false, kind: itemset(item_ascii(alnum(span(2..11), false))), })), - }))) + })) ); assert_eq!( parser("[[:alnum:]&&[:lower:]]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: intersection( @@ -4993,11 +4993,11 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[[:alnum:]--[:lower:]]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: difference( @@ -5005,11 +5005,11 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[[:alnum:]~~[:lower:]]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: symdifference( @@ -5017,20 +5017,20 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[a]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: itemset(lit(span(1..2), 'a')), - }))) + })) ); assert_eq!( parser(r"[a\]]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: union( @@ -5044,11 +5044,11 @@ bar }), ] ), - }))) + })) ); assert_eq!( parser(r"[a\-z]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: union( @@ -5063,44 +5063,44 @@ bar lit(span(4..5), 'z'), ] ), - }))) + })) ); assert_eq!( parser("[ab]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),] ), - }))) + })) ); assert_eq!( parser("[a-]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),] ), - }))) + })) ); assert_eq!( parser("[-a]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),] ), - }))) + })) ); assert_eq!( parser(r"[\pL]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: itemset(item_unicode(ast::ClassUnicode { @@ -5108,11 +5108,11 @@ bar negated: false, kind: ast::ClassUnicodeKind::OneLetter('L'), })), - }))) + })) ); assert_eq!( parser(r"[\w]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: itemset(item_perl(ast::ClassPerl { @@ -5120,11 +5120,11 @@ bar kind: ast::ClassPerlKind::Word, negated: false, })), - }))) + })) ); assert_eq!( parser(r"[a\wz]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: union( @@ -5139,20 +5139,20 @@ bar lit(span(4..5), 'z'), ] ), - }))) + })) ); assert_eq!( parser("[a-z]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: itemset(range(span(1..4), 'a', 'z')), - }))) + })) ); assert_eq!( parser("[a-cx-z]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..8), negated: false, kind: union( @@ -5162,11 +5162,11 @@ bar range(span(4..7), 'x', 'z'), ] ), - }))) + })) ); assert_eq!( parser(r"[\w&&a-cx-z]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..12), negated: false, kind: intersection( @@ -5184,11 +5184,11 @@ bar ] ), ), - }))) + })) ); assert_eq!( parser(r"[a-cx-z&&\w]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..12), negated: false, kind: intersection( @@ -5206,11 +5206,11 @@ bar negated: false, })), ), - }))) + })) ); assert_eq!( parser(r"[a--b--c]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..9), negated: false, kind: difference( @@ -5222,11 +5222,11 @@ bar ), itemset(lit(span(7..8), 'c')), ), - }))) + })) ); assert_eq!( parser(r"[a~~b~~c]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..9), negated: false, kind: symdifference( @@ -5238,11 +5238,11 @@ bar ), itemset(lit(span(7..8), 'c')), ), - }))) + })) ); assert_eq!( parser(r"[\^&&^]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..7), negated: false, kind: intersection( @@ -5254,11 +5254,11 @@ bar })), itemset(lit(span(5..6), '^')), ), - }))) + })) ); assert_eq!( parser(r"[\&&&&]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..7), negated: false, kind: intersection( @@ -5270,11 +5270,11 @@ bar })), itemset(lit(span(5..6), '&')), ), - }))) + })) ); assert_eq!( parser(r"[&&&&]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: intersection( @@ -5286,13 +5286,13 @@ bar ), itemset(empty(span(5..5))), ), - }))) + })) ); let pat = "[☃-⛄]"; assert_eq!( parser(pat).parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span_range(pat, 0..9), negated: false, kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange { @@ -5308,20 +5308,20 @@ bar c: '⛄', }, })), - }))) + })) ); assert_eq!( parser(r"[]]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: itemset(lit(span(1..2), ']')), - }))) + })) ); assert_eq!( parser(r"[]\[]").parse(), - Ok(Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: union( @@ -5335,14 +5335,14 @@ bar }), ] ), - }))) + })) ); assert_eq!( parser(r"[\[]]").parse(), Ok(concat( 0..5, vec![ - Ast::class(ast::Class::Bracketed(ast::ClassBracketed { + Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: itemset(ast::ClassSetItem::Literal( @@ -5352,7 +5352,7 @@ bar c: '[', } )), - })), + }), Ast::literal(ast::Literal { span: span(4..5), kind: ast::LiteralKind::Verbatim, @@ -5917,11 +5917,11 @@ bar Ok(Ast::concat(ast::Concat { span: span(0..4), asts: vec![ - Ast::class(ast::Class::Unicode(ast::ClassUnicode { + Ast::class_unicode(ast::ClassUnicode { span: span(0..3), negated: false, kind: ast::ClassUnicodeKind::OneLetter('N'), - })), + }), Ast::literal(ast::Literal { span: span(3..4), kind: ast::LiteralKind::Verbatim, @@ -5935,11 +5935,11 @@ bar Ok(Ast::concat(ast::Concat { span: span(0..10), asts: vec![ - Ast::class(ast::Class::Unicode(ast::ClassUnicode { + Ast::class_unicode(ast::ClassUnicode { span: span(0..9), negated: false, kind: ast::ClassUnicodeKind::Named(s("Greek")), - })), + }), Ast::literal(ast::Literal { span: span(9..10), kind: ast::LiteralKind::Verbatim, @@ -6017,22 +6017,22 @@ bar assert_eq!( parser(r"\d").parse(), - Ok(Ast::class(ast::Class::Perl(ast::ClassPerl { + Ok(Ast::class_perl(ast::ClassPerl { span: span(0..2), kind: ast::ClassPerlKind::Digit, negated: false, - }))) + })) ); assert_eq!( parser(r"\dz").parse(), Ok(Ast::concat(ast::Concat { span: span(0..3), asts: vec![ - Ast::class(ast::Class::Perl(ast::ClassPerl { + Ast::class_perl(ast::ClassPerl { span: span(0..2), kind: ast::ClassPerlKind::Digit, negated: false, - })), + }), Ast::literal(ast::Literal { span: span(2..3), kind: ast::LiteralKind::Verbatim, diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs index daf6776f2..10ee56c2c 100644 --- a/regex-syntax/src/ast/print.rs +++ b/regex-syntax/src/ast/print.rs @@ -80,27 +80,21 @@ impl Visitor for Writer { fn visit_pre(&mut self, ast: &Ast) -> fmt::Result { match *ast.0 { AstKind::Group(ref x) => self.fmt_group_pre(x), - AstKind::Class(ast::Class::Bracketed(ref x)) => { - self.fmt_class_bracketed_pre(x) - } + AstKind::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x), _ => Ok(()), } } fn visit_post(&mut self, ast: &Ast) -> fmt::Result { - use crate::ast::Class; - match *ast.0 { AstKind::Empty(_) => Ok(()), AstKind::Flags(ref x) => self.fmt_set_flags(x), AstKind::Literal(ref x) => self.fmt_literal(x), AstKind::Dot(_) => self.wtr.write_str("."), AstKind::Assertion(ref x) => self.fmt_assertion(x), - AstKind::Class(Class::Perl(ref x)) => self.fmt_class_perl(x), - AstKind::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x), - AstKind::Class(Class::Bracketed(ref x)) => { - self.fmt_class_bracketed_post(x) - } + AstKind::ClassPerl(ref x) => self.fmt_class_perl(x), + AstKind::ClassUnicode(ref x) => self.fmt_class_unicode(x), + AstKind::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x), AstKind::Repetition(ref x) => self.fmt_repetition(x), AstKind::Group(ref x) => self.fmt_group_post(x), AstKind::Alternation(_) => Ok(()), diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs index 05fdac89c..2bd4b1956 100644 --- a/regex-syntax/src/ast/visitor.rs +++ b/regex-syntax/src/ast/visitor.rs @@ -264,7 +264,7 @@ impl<'a> HeapVisitor<'a> { visitor: &mut V, ) -> Result>, V::Err> { Ok(match *ast.0 { - AstKind::Class(ast::Class::Bracketed(ref x)) => { + AstKind::ClassBracketed(ref x) => { self.visit_class(x, visitor)?; None } diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 743218df4..ab3aa93d7 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -337,7 +337,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { fn visit_pre(&mut self, ast: &Ast) -> Result<()> { match *ast.0 { - AstKind::Class(ast::Class::Bracketed(_)) => { + AstKind::ClassBracketed(_) => { if self.flags().unicode() { let cls = hir::ClassUnicode::empty(); self.push(HirFrame::ClassUnicode(cls)); @@ -386,29 +386,27 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { // consistency sake. self.push(HirFrame::Expr(Hir::empty())); } - AstKind::Literal(ref x) => { - match self.ast_literal_to_scalar(x)? { - Either::Right(byte) => self.push_byte(byte), - Either::Left(ch) => { - if !self.flags().unicode() && ch.len_utf8() > 1 { - return Err(self - .error(x.span, ErrorKind::UnicodeNotAllowed)); - } - match self.case_fold_char(x.span, ch)? { - None => self.push_char(ch), - Some(expr) => self.push(HirFrame::Expr(expr)), - } + AstKind::Literal(ref x) => match self.ast_literal_to_scalar(x)? { + Either::Right(byte) => self.push_byte(byte), + Either::Left(ch) => { + if !self.flags().unicode() && ch.len_utf8() > 1 { + return Err( + self.error(x.span, ErrorKind::UnicodeNotAllowed) + ); + } + match self.case_fold_char(x.span, ch)? { + None => self.push_char(ch), + Some(expr) => self.push(HirFrame::Expr(expr)), } } - // self.push(HirFrame::Expr(self.hir_literal(x)?)); - } - AstKind::Dot(span) => { - self.push(HirFrame::Expr(self.hir_dot(span)?)); + }, + AstKind::Dot(ref span) => { + self.push(HirFrame::Expr(self.hir_dot(**span)?)); } AstKind::Assertion(ref x) => { self.push(HirFrame::Expr(self.hir_assertion(x)?)); } - AstKind::Class(ast::Class::Perl(ref x)) => { + AstKind::ClassPerl(ref x) => { if self.flags().unicode() { let cls = self.hir_perl_unicode_class(x)?; let hcls = hir::Class::Unicode(cls); @@ -419,11 +417,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.push(HirFrame::Expr(Hir::class(hcls))); } } - AstKind::Class(ast::Class::Unicode(ref x)) => { + AstKind::ClassUnicode(ref x) => { let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); self.push(HirFrame::Expr(Hir::class(cls))); } - AstKind::Class(ast::Class::Bracketed(ref ast)) => { + AstKind::ClassBracketed(ref ast) => { if self.flags().unicode() { let mut cls = self.pop().unwrap().unwrap_class_unicode(); self.unicode_fold_and_negate(