Skip to content

Commit

Permalink
Auto merge of rust-lang#80459 - mark-i-m:or-pat-reg, r=petrochenkov
Browse files Browse the repository at this point in the history
Implement edition-based macro :pat feature

This PR does two things:
1. Fixes the perf regression from rust-lang#80100 (comment)
2. Implements `:pat2018` and `:pat2021` matchers, as described by `@joshtriplett`  in rust-lang#54883 (comment) behind the feature gate `edition_macro_pat`.

r? `@petrochenkov`

cc `@Mark-Simulacrum`
  • Loading branch information
bors committed Dec 31, 2020
2 parents b33e234 + 40bf3c0 commit 44e3daf
Show file tree
Hide file tree
Showing 12 changed files with 151 additions and 76 deletions.
35 changes: 30 additions & 5 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use rustc_span::hygiene::ExpnKind;
use rustc_span::source_map::SourceMap;
use rustc_span::symbol::{kw, sym};
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::{self, FileName, RealFileName, Span, DUMMY_SP};
use rustc_span::{self, edition::Edition, FileName, RealFileName, Span, DUMMY_SP};
use std::borrow::Cow;
use std::{fmt, mem};

Expand Down Expand Up @@ -690,7 +690,16 @@ pub enum NonterminalKind {
Item,
Block,
Stmt,
Pat,
Pat2018 {
/// Keep track of whether the user used `:pat2018` or `:pat` and we inferred it from the
/// edition of the span. This is used for diagnostics.
inferred: bool,
},
Pat2021 {
/// Keep track of whether the user used `:pat2018` or `:pat` and we inferred it from the
/// edition of the span. This is used for diagnostics.
inferred: bool,
},
Expr,
Ty,
Ident,
Expand All @@ -703,12 +712,25 @@ pub enum NonterminalKind {
}

impl NonterminalKind {
pub fn from_symbol(symbol: Symbol) -> Option<NonterminalKind> {
/// The `edition` closure is used to get the edition for the given symbol. Doing
/// `span.edition()` is expensive, so we do it lazily.
pub fn from_symbol(
symbol: Symbol,
edition: impl FnOnce() -> Edition,
) -> Option<NonterminalKind> {
Some(match symbol {
sym::item => NonterminalKind::Item,
sym::block => NonterminalKind::Block,
sym::stmt => NonterminalKind::Stmt,
sym::pat => NonterminalKind::Pat,
sym::pat => match edition() {
Edition::Edition2015 | Edition::Edition2018 => {
NonterminalKind::Pat2018 { inferred: true }
}
// FIXME(mark-i-m): uncomment when 2021 machinery is available.
//Edition::Edition2021 => NonterminalKind::Pat2021{inferred:true},
},
sym::pat2018 => NonterminalKind::Pat2018 { inferred: false },
sym::pat2021 => NonterminalKind::Pat2021 { inferred: false },
sym::expr => NonterminalKind::Expr,
sym::ty => NonterminalKind::Ty,
sym::ident => NonterminalKind::Ident,
Expand All @@ -726,7 +748,10 @@ impl NonterminalKind {
NonterminalKind::Item => sym::item,
NonterminalKind::Block => sym::block,
NonterminalKind::Stmt => sym::stmt,
NonterminalKind::Pat => sym::pat,
NonterminalKind::Pat2018 { inferred: false } => sym::pat2018,
NonterminalKind::Pat2021 { inferred: false } => sym::pat2021,
NonterminalKind::Pat2018 { inferred: true }
| NonterminalKind::Pat2021 { inferred: true } => sym::pat,
NonterminalKind::Expr => sym::expr,
NonterminalKind::Ty => sym::ty,
NonterminalKind::Ident => sym::ident,
Expand Down
24 changes: 5 additions & 19 deletions compiler/rustc_expand/src/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ use TokenTreeOrTokenTreeSlice::*;
use crate::mbe::{self, TokenTree};

use rustc_ast::token::{self, DocComment, Nonterminal, Token};
use rustc_parse::parser::{OrPatNonterminalMode, Parser};
use rustc_parse::parser::Parser;
use rustc_session::parse::ParseSess;
use rustc_span::{edition::Edition, symbol::MacroRulesNormalizedIdent};
use rustc_span::symbol::MacroRulesNormalizedIdent;

use smallvec::{smallvec, SmallVec};

Expand Down Expand Up @@ -419,18 +419,6 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
}
}

/// In edition 2015/18, `:pat` can only match `pat<no_top_alt>` because otherwise, we have
/// breakage. As of edition 2021, `:pat` matches `top_pat`.
///
/// See <https://github.com/rust-lang/rust/issues/54883> for more info.
fn or_pat_mode(edition: Edition) -> OrPatNonterminalMode {
match edition {
Edition::Edition2015 | Edition::Edition2018 => OrPatNonterminalMode::NoTopAlt,
// FIXME(mark-i-m): uncomment this when edition 2021 machinery is added.
// Edition::Edition2021 => OrPatNonterminalMode::TopPat,
}
}

/// Process the matcher positions of `cur_items` until it is empty. In the process, this will
/// produce more items in `next_items`, `eof_items`, and `bb_items`.
///
Expand Down Expand Up @@ -578,14 +566,13 @@ fn inner_parse_loop<'root, 'tt>(

// We need to match a metavar with a valid ident... call out to the black-box
// parser by adding an item to `bb_items`.
TokenTree::MetaVarDecl(span, _, Some(kind)) => {
TokenTree::MetaVarDecl(_, _, Some(kind)) => {
// Built-in nonterminals never start with these tokens, so we can eliminate
// them from consideration.
//
// We use the span of the metavariable declaration to determine any
// edition-specific matching behavior for non-terminals.
if Parser::nonterminal_may_begin_with(kind, token, or_pat_mode(span.edition()))
{
if Parser::nonterminal_may_begin_with(kind, token) {
bb_items.push(item);
}
}
Expand Down Expand Up @@ -749,8 +736,7 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na
let match_cur = item.match_cur;
// We use the span of the metavariable declaration to determine any
// edition-specific matching behavior for non-terminals.
let nt = match parser.to_mut().parse_nonterminal(kind, or_pat_mode(span.edition()))
{
let nt = match parser.to_mut().parse_nonterminal(kind) {
Err(mut err) => {
err.span_label(
span,
Expand Down
16 changes: 11 additions & 5 deletions compiler/rustc_expand/src/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,10 +476,15 @@ pub fn compile_declarative_macro(
.map(|m| {
if let MatchedNonterminal(ref nt) = *m {
if let NtTT(ref tt) = **nt {
let tt =
mbe::quoted::parse(tt.clone().into(), true, &sess.parse_sess, def.id)
.pop()
.unwrap();
let tt = mbe::quoted::parse(
tt.clone().into(),
true,
&sess.parse_sess,
def.id,
features,
)
.pop()
.unwrap();
valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def.attrs, &tt);
return tt;
}
Expand All @@ -501,6 +506,7 @@ pub fn compile_declarative_macro(
false,
&sess.parse_sess,
def.id,
features,
)
.pop()
.unwrap();
Expand Down Expand Up @@ -1090,7 +1096,7 @@ fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow {
_ => IsInFollow::No(TOKENS),
}
}
NonterminalKind::Pat => {
NonterminalKind::Pat2018 { .. } | NonterminalKind::Pat2021 { .. } => {
const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"];
match tok {
TokenTree::Token(token) => match token.kind {
Expand Down
68 changes: 46 additions & 22 deletions compiler/rustc_expand/src/mbe/quoted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ use rustc_ast::token::{self, Token};
use rustc_ast::tokenstream;
use rustc_ast::{NodeId, DUMMY_NODE_ID};
use rustc_ast_pretty::pprust;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, Ident};
use rustc_feature::Features;
use rustc_session::parse::{feature_err, ParseSess};
use rustc_span::symbol::{kw, sym, Ident};

use rustc_span::Span;

Expand All @@ -29,10 +30,8 @@ const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \
/// `ident` are "matchers". They are not present in the body of a macro rule -- just in the
/// pattern, so we pass a parameter to indicate whether to expect them or not.
/// - `sess`: the parsing session. Any errors will be emitted to this session.
/// - `features`, `attrs`: language feature flags and attributes so that we know whether to use
/// unstable features or not.
/// - `edition`: which edition are we in.
/// - `macro_node_id`: the NodeId of the macro we are parsing.
/// - `node_id`: the NodeId of the macro we are parsing.
/// - `features`: language features so we can do feature gating.
///
/// # Returns
///
Expand All @@ -42,6 +41,7 @@ pub(super) fn parse(
expect_matchers: bool,
sess: &ParseSess,
node_id: NodeId,
features: &Features,
) -> Vec<TokenTree> {
// Will contain the final collection of `self::TokenTree`
let mut result = Vec::new();
Expand All @@ -52,7 +52,7 @@ pub(super) fn parse(
while let Some(tree) = trees.next() {
// Given the parsed tree, if there is a metavar and we are expecting matchers, actually
// parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`).
let tree = parse_tree(tree, &mut trees, expect_matchers, sess, node_id);
let tree = parse_tree(tree, &mut trees, expect_matchers, sess, node_id, features);
match tree {
TokenTree::MetaVar(start_sp, ident) if expect_matchers => {
let span = match trees.next() {
Expand All @@ -61,18 +61,39 @@ pub(super) fn parse(
Some(tokenstream::TokenTree::Token(token)) => match token.ident() {
Some((frag, _)) => {
let span = token.span.with_lo(start_sp.lo());
let kind = token::NonterminalKind::from_symbol(frag.name)
.unwrap_or_else(|| {
let msg = format!(
"invalid fragment specifier `{}`",
frag.name
);
sess.span_diagnostic
.struct_span_err(span, &msg)
.help(VALID_FRAGMENT_NAMES_MSG)

match frag.name {
sym::pat2018 | sym::pat2021 => {
if !features.edition_macro_pats {
feature_err(
sess,
sym::edition_macro_pats,
frag.span,
"`pat2018` and `pat2021` are unstable.",
)
.emit();
token::NonterminalKind::Ident
});
}
}
_ => {}
}

let kind =
token::NonterminalKind::from_symbol(frag.name, || {
span.edition()
})
.unwrap_or_else(
|| {
let msg = format!(
"invalid fragment specifier `{}`",
frag.name
);
sess.span_diagnostic
.struct_span_err(span, &msg)
.help(VALID_FRAGMENT_NAMES_MSG)
.emit();
token::NonterminalKind::Ident
},
);
result.push(TokenTree::MetaVarDecl(span, ident, Some(kind)));
continue;
}
Expand Down Expand Up @@ -110,14 +131,14 @@ pub(super) fn parse(
/// converting `tree`
/// - `expect_matchers`: same as for `parse` (see above).
/// - `sess`: the parsing session. Any errors will be emitted to this session.
/// - `features`, `attrs`: language feature flags and attributes so that we know whether to use
/// unstable features or not.
/// - `features`: language features so we can do feature gating.
fn parse_tree(
tree: tokenstream::TokenTree,
outer_trees: &mut impl Iterator<Item = tokenstream::TokenTree>,
expect_matchers: bool,
sess: &ParseSess,
node_id: NodeId,
features: &Features,
) -> TokenTree {
// Depending on what `tree` is, we could be parsing different parts of a macro
match tree {
Expand Down Expand Up @@ -145,7 +166,7 @@ fn parse_tree(
sess.span_diagnostic.span_err(span.entire(), &msg);
}
// Parse the contents of the sequence itself
let sequence = parse(tts, expect_matchers, sess, node_id);
let sequence = parse(tts, expect_matchers, sess, node_id, features);
// Get the Kleene operator and optional separator
let (separator, kleene) =
parse_sep_and_kleene_op(&mut trees, span.entire(), sess);
Expand Down Expand Up @@ -196,7 +217,10 @@ fn parse_tree(
// descend into the delimited set and further parse it.
tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited(
span,
Lrc::new(Delimited { delim, tts: parse(tts, expect_matchers, sess, node_id) }),
Lrc::new(Delimited {
delim,
tts: parse(tts, expect_matchers, sess, node_id, features),
}),
),
}
}
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_feature/src/active.rs
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,9 @@ declare_features! (
/// Allows arbitrary expressions in key-value attributes at parse time.
(active, extended_key_value_attributes, "1.50.0", Some(78835), None),

/// `:pat2018` and `:pat2021` macro matchers.
(active, edition_macro_pats, "1.51.0", Some(54883), None),

// -------------------------------------------------------------------------
// feature-group-end: actual feature gates
// -------------------------------------------------------------------------
Expand Down
1 change: 0 additions & 1 deletion compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ mod ty;
use crate::lexer::UnmatchedBrace;
pub use diagnostics::AttemptLocalParseRecovery;
use diagnostics::Error;
pub use pat::OrPatNonterminalMode;
pub use path::PathStyle;

use rustc_ast::ptr::P;
Expand Down
27 changes: 10 additions & 17 deletions compiler/rustc_parse/src/parser/nonterminal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,15 @@ use rustc_ast_pretty::pprust;
use rustc_errors::PResult;
use rustc_span::symbol::{kw, Ident};

use crate::parser::pat::{GateOr, OrPatNonterminalMode, RecoverComma};
use crate::parser::pat::{GateOr, RecoverComma};
use crate::parser::{FollowedByType, Parser, PathStyle};

impl<'a> Parser<'a> {
/// Checks whether a non-terminal may begin with a particular token.
///
/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that
/// token. Be conservative (return true) if not sure.
pub fn nonterminal_may_begin_with(
kind: NonterminalKind,
token: &Token,
or_pat_mode: OrPatNonterminalMode,
) -> bool {
pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool {
/// Checks whether the non-terminal may contain a single (non-keyword) identifier.
fn may_be_ident(nt: &token::Nonterminal) -> bool {
match *nt {
Expand Down Expand Up @@ -62,7 +58,7 @@ impl<'a> Parser<'a> {
},
_ => false,
},
NonterminalKind::Pat => match token.kind {
NonterminalKind::Pat2018 { .. } | NonterminalKind::Pat2021 { .. } => match token.kind {
token::Ident(..) | // box, ref, mut, and other identifiers (can stricten)
token::OpenDelim(token::Paren) | // tuple pattern
token::OpenDelim(token::Bracket) | // slice pattern
Expand All @@ -76,7 +72,7 @@ impl<'a> Parser<'a> {
token::Lt | // path (UFCS constant)
token::BinOp(token::Shl) => true, // path (double UFCS)
// leading vert `|` or-pattern
token::BinOp(token::Or) => matches!(or_pat_mode, OrPatNonterminalMode::TopPat),
token::BinOp(token::Or) => matches!(kind, NonterminalKind::Pat2021 {..}),
token::Interpolated(ref nt) => may_be_ident(nt),
_ => false,
},
Expand All @@ -94,11 +90,7 @@ impl<'a> Parser<'a> {
}

/// Parse a non-terminal (e.g. MBE `:pat` or `:ident`).
pub fn parse_nonterminal(
&mut self,
kind: NonterminalKind,
or_pat_mode: OrPatNonterminalMode,
) -> PResult<'a, Nonterminal> {
pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, Nonterminal> {
// Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`)
// needs to have them force-captured here.
// A `macro_rules!` invocation may pass a captured item/expr to a proc-macro,
Expand Down Expand Up @@ -141,12 +133,13 @@ impl<'a> Parser<'a> {
}
}
}
NonterminalKind::Pat => {
let (mut pat, tokens) = self.collect_tokens(|this| match or_pat_mode {
OrPatNonterminalMode::TopPat => {
NonterminalKind::Pat2018 { .. } | NonterminalKind::Pat2021 { .. } => {
let (mut pat, tokens) = self.collect_tokens(|this| match kind {
NonterminalKind::Pat2018 { .. } => this.parse_pat(None),
NonterminalKind::Pat2021 { .. } => {
this.parse_top_pat(GateOr::Yes, RecoverComma::No)
}
OrPatNonterminalMode::NoTopAlt => this.parse_pat(None),
_ => unreachable!(),
})?;
// We have have eaten an NtPat, which could already have tokens
if pat.tokens.is_none() {
Expand Down
7 changes: 0 additions & 7 deletions compiler/rustc_parse/src/parser/pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,6 @@ pub(super) enum RecoverComma {
No,
}

/// Used when parsing a non-terminal (see `parse_nonterminal`) to determine if `:pat` should match
/// `top_pat` or `pat<no_top_alt>`. See issue <https://github.com/rust-lang/rust/pull/78935>.
pub enum OrPatNonterminalMode {
TopPat,
NoTopAlt,
}

impl<'a> Parser<'a> {
/// Parses a pattern.
///
Expand Down
Loading

0 comments on commit 44e3daf

Please sign in to comment.