From ac478f2f610bd93c25c82491526ea153ad103ac0 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Fri, 6 Mar 2020 15:11:24 -0500 Subject: [PATCH] Optimize strip_prefix and strip_suffix with str patterns Constructing a Searcher in strip_prefix and strip_suffix is unnecessarily slow when the pattern is a fixed-length string. Add strip_prefix and strip_suffix methods to the Pattern trait, and add optimized implementations of these methods in the str implementation. The old implementation is retained as the default for these methods. --- src/liballoc/string.rs | 15 +++++++ src/libcore/str/mod.rs | 37 ++++------------- src/libcore/str/pattern.rs | 85 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 30 deletions(-) diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs index 7c89d38caa4e6..1e5fe125c550d 100644 --- a/src/liballoc/string.rs +++ b/src/liballoc/string.rs @@ -1849,6 +1849,21 @@ impl<'a, 'b> Pattern<'a> for &'b String { fn is_prefix_of(self, haystack: &'a str) -> bool { self[..].is_prefix_of(haystack) } + + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_prefix_of(haystack) + } + + #[inline] + fn is_suffix_of(self, haystack: &'a str) -> bool { + self[..].is_suffix_of(haystack) + } + + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_suffix_of(haystack) + } } #[stable(feature = "rust1", since = "1.0.0")] diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 013ca182c13cd..681dfda4ad517 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -9,7 +9,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use self::pattern::Pattern; -use self::pattern::{DoubleEndedSearcher, ReverseSearcher, SearchStep, Searcher}; +use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher}; use crate::char; use crate::fmt::{self, Write}; @@ -3986,26 +3986,15 @@ impl str { /// ``` /// #![feature(str_strip)] /// - /// assert_eq!("foobar".strip_prefix("foo"), Some("bar")); - /// assert_eq!("foobar".strip_prefix("bar"), None); + /// assert_eq!("foo:bar".strip_prefix("foo:"), Some("bar")); + /// assert_eq!("foo:bar".strip_prefix("bar"), None); /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo")); /// ``` #[must_use = "this returns the remaining substring as a new slice, \ without modifying the original"] #[unstable(feature = "str_strip", reason = "newly added", issue = "67302")] pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> { - let mut matcher = prefix.into_searcher(self); - if let SearchStep::Match(start, len) = matcher.next() { - debug_assert_eq!( - start, 0, - "The first search step from Searcher \ - must include the first character" - ); - // SAFETY: `Searcher` is known to return valid indices. - unsafe { Some(self.get_unchecked(len..)) } - } else { - None - } + prefix.strip_prefix_of(self) } /// Returns a string slice with the suffix removed. @@ -4020,8 +4009,8 @@ impl str { /// /// ``` /// #![feature(str_strip)] - /// assert_eq!("barfoo".strip_suffix("foo"), Some("bar")); - /// assert_eq!("barfoo".strip_suffix("bar"), None); + /// assert_eq!("bar:foo".strip_suffix(":foo"), Some("bar")); + /// assert_eq!("bar:foo".strip_suffix("bar"), None); /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo")); /// ``` #[must_use = "this returns the remaining substring as a new slice, \ @@ -4032,19 +4021,7 @@ impl str { P: Pattern<'a>,

>::Searcher: ReverseSearcher<'a>, { - let mut matcher = suffix.into_searcher(self); - if let SearchStep::Match(start, end) = matcher.next_back() { - debug_assert_eq!( - end, - self.len(), - "The first search step from ReverseSearcher \ - must include the last character" - ); - // SAFETY: `Searcher` is known to return valid indices. - unsafe { Some(self.get_unchecked(..start)) } - } else { - None - } + suffix.strip_suffix_of(self) } /// Returns a string slice with all suffixes that match a pattern diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index ffa418cba6c99..30fd55f7b7f64 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -47,6 +47,22 @@ pub trait Pattern<'a>: Sized { matches!(self.into_searcher(haystack).next(), SearchStep::Match(0, _)) } + /// Removes the pattern from the front of haystack, if it matches. + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + if let SearchStep::Match(start, len) = self.into_searcher(haystack).next() { + debug_assert_eq!( + start, 0, + "The first search step from Searcher \ + must include the first character" + ); + // SAFETY: `Searcher` is known to return valid indices. + unsafe { Some(haystack.get_unchecked(len..)) } + } else { + None + } + } + /// Checks whether the pattern matches at the back of the haystack #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool @@ -55,6 +71,26 @@ pub trait Pattern<'a>: Sized { { matches!(self.into_searcher(haystack).next_back(), SearchStep::Match(_, j) if haystack.len() == j) } + + /// Removes the pattern from the back of haystack, if it matches. + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> + where + Self::Searcher: ReverseSearcher<'a>, + { + if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() { + debug_assert_eq!( + end, + haystack.len(), + "The first search step from ReverseSearcher \ + must include the last character" + ); + // SAFETY: `Searcher` is known to return valid indices. + unsafe { Some(haystack.get_unchecked(..start)) } + } else { + None + } + } } // Searcher @@ -448,6 +484,11 @@ impl<'a> Pattern<'a> for char { self.encode_utf8(&mut [0u8; 4]).is_prefix_of(haystack) } + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + self.encode_utf8(&mut [0u8; 4]).strip_prefix_of(haystack) + } + #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool where @@ -455,6 +496,14 @@ impl<'a> Pattern<'a> for char { { self.encode_utf8(&mut [0u8; 4]).is_suffix_of(haystack) } + + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> + where + Self::Searcher: ReverseSearcher<'a>, + { + self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack) + } } ///////////////////////////////////////////////////////////////////////////// @@ -569,6 +618,11 @@ macro_rules! pattern_methods { ($pmap)(self).is_prefix_of(haystack) } + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + ($pmap)(self).strip_prefix_of(haystack) + } + #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool where @@ -576,6 +630,14 @@ macro_rules! pattern_methods { { ($pmap)(self).is_suffix_of(haystack) } + + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> + where + $t: ReverseSearcher<'a>, + { + ($pmap)(self).strip_suffix_of(haystack) + } }; } @@ -715,11 +777,34 @@ impl<'a, 'b> Pattern<'a> for &'b str { haystack.as_bytes().starts_with(self.as_bytes()) } + /// Removes the pattern from the front of haystack, if it matches. + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + if self.is_prefix_of(haystack) { + // SAFETY: prefix was just verified to exist. + unsafe { Some(haystack.get_unchecked(self.as_bytes().len()..)) } + } else { + None + } + } + /// Checks whether the pattern matches at the back of the haystack #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool { haystack.as_bytes().ends_with(self.as_bytes()) } + + /// Removes the pattern from the back of haystack, if it matches. + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> { + if self.is_suffix_of(haystack) { + let i = haystack.len() - self.as_bytes().len(); + // SAFETY: suffix was just verified to exist. + unsafe { Some(haystack.get_unchecked(..i)) } + } else { + None + } + } } /////////////////////////////////////////////////////////////////////////////