builders.rs - mozsearch

#![allow(warnings)]

// This module defines an internal builder that encapsulates all interaction

// with meta::Regex construction, and then 4 public API builders that wrap

// around it. The docs are essentially repeated on each of the 4 public

// builders, with tweaks to the examples as needed.

//

// The reason why there are so many builders is partially because of a misstep

// in the initial API design: the builder constructor takes in the pattern

// strings instead of using the `build` method to accept the pattern strings.

// This means `new` has a different signature for each builder. It probably

// would have been nicer to to use one builder with `fn new()`, and then add

// `build(pat)` and `build_many(pats)` constructors.

//

// The other reason is because I think the `bytes` module should probably

// have its own builder type. That way, it is completely isolated from the

// top-level API.

//

// If I could do it again, I'd probably have a `regex::Builder` and a

// `regex::bytes::Builder`. Each would have `build` and `build_set` (or

// `build_many`) methods for constructing a single pattern `Regex` and a

// multi-pattern `RegexSet`, respectively.

use alloc::{

    string::{String, ToString},

    sync::Arc,

    vec,

    vec::Vec,

};

use regex_automata::{

    meta, nfa::thompson::WhichCaptures, util::syntax, MatchKind,

};

use crate::error::Error;

/// A builder for constructing a `Regex`, `bytes::Regex`, `RegexSet` or a

/// `bytes::RegexSet`.

///

/// This is essentially the implementation of the four different builder types

/// in the public API: `RegexBuilder`, `bytes::RegexBuilder`, `RegexSetBuilder`

/// and `bytes::RegexSetBuilder`.

#[derive(Clone, Debug)]

struct Builder {

    pats: Vec<String>,

    metac: meta::Config,

    syntaxc: syntax::Config,

impl Default for Builder {

    fn default() -> Builder {

        let metac = meta::Config::new()

            .nfa_size_limit(Some(10 * (1 << 20)))

            .hybrid_cache_capacity(2 * (1 << 20));

        Builder { pats: vec![], metac, syntaxc: syntax::Config::default() }

impl Builder {

    fn new<I, S>(patterns: I) -> Builder

    where

        S: AsRef<str>,

        I: IntoIterator<Item = S>,

        let mut b = Builder::default();

        b.pats.extend(patterns.into_iter().map(|p| p.as_ref().to_string()));

    fn build_one_string(&self) -> Result<crate::Regex, Error> {

        assert_eq!(1, self.pats.len());

        let metac = self

            .metac

            .clone()

            .match_kind(MatchKind::LeftmostFirst)

            .utf8_empty(true);

        let syntaxc = self.syntaxc.clone().utf8(true);

        let pattern = Arc::from(self.pats[0].as_str());

        meta::Builder::new()

            .configure(metac)

            .syntax(syntaxc)

            .build(&pattern)

            .map(|meta| crate::Regex { meta, pattern })

            .map_err(Error::from_meta_build_error)

    fn build_one_bytes(&self) -> Result<crate::bytes::Regex, Error> {

        assert_eq!(1, self.pats.len());

        let metac = self

            .metac

            .clone()

            .match_kind(MatchKind::LeftmostFirst)

            .utf8_empty(false);

        let syntaxc = self.syntaxc.clone().utf8(false);

        let pattern = Arc::from(self.pats[0].as_str());

        meta::Builder::new()

            .configure(metac)

            .syntax(syntaxc)

            .build(&pattern)

            .map(|meta| crate::bytes::Regex { meta, pattern })

            .map_err(Error::from_meta_build_error)

    fn build_many_string(&self) -> Result<crate::RegexSet, Error> {

        let metac = self

            .metac

            .clone()

            .match_kind(MatchKind::All)

            .utf8_empty(true)

            .which_captures(WhichCaptures::None);

        let syntaxc = self.syntaxc.clone().utf8(true);

        let patterns = Arc::from(self.pats.as_slice());

        meta::Builder::new()

            .configure(metac)

            .syntax(syntaxc)

            .build_many(&patterns)

            .map(|meta| crate::RegexSet { meta, patterns })

            .map_err(Error::from_meta_build_error)

    fn build_many_bytes(&self) -> Result<crate::bytes::RegexSet, Error> {

        let metac = self

            .metac

            .clone()

            .match_kind(MatchKind::All)

            .utf8_empty(false)

            .which_captures(WhichCaptures::None);

        let syntaxc = self.syntaxc.clone().utf8(false);

        let patterns = Arc::from(self.pats.as_slice());

        meta::Builder::new()

            .configure(metac)

            .syntax(syntaxc)

            .build_many(&patterns)

            .map(|meta| crate::bytes::RegexSet { meta, patterns })

            .map_err(Error::from_meta_build_error)

    fn case_insensitive(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.case_insensitive(yes);

        self

    fn multi_line(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.multi_line(yes);

        self

    fn dot_matches_new_line(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.dot_matches_new_line(yes);

        self

    fn crlf(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.crlf(yes);

        self

    fn line_terminator(&mut self, byte: u8) -> &mut Builder {

        self.metac = self.metac.clone().line_terminator(byte);

        self.syntaxc = self.syntaxc.line_terminator(byte);

        self

    fn swap_greed(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.swap_greed(yes);

        self

    fn ignore_whitespace(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.ignore_whitespace(yes);

        self

    fn unicode(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.unicode(yes);

        self

    fn octal(&mut self, yes: bool) -> &mut Builder {

        self.syntaxc = self.syntaxc.octal(yes);

        self

    fn size_limit(&mut self, limit: usize) -> &mut Builder {

        self.metac = self.metac.clone().nfa_size_limit(Some(limit));

        self

    fn dfa_size_limit(&mut self, limit: usize) -> &mut Builder {

        self.metac = self.metac.clone().hybrid_cache_capacity(limit);

        self

    fn nest_limit(&mut self, limit: u32) -> &mut Builder {

        self.syntaxc = self.syntaxc.nest_limit(limit);

        self

pub(crate) mod string {

    use crate::{error::Error, Regex, RegexSet};

    use super::Builder;

    /// A configurable builder for a [`Regex`].

///

    /// This builder can be used to programmatically set flags such as `i`

    /// (case insensitive) and `x` (for verbose mode). This builder can also be

    /// used to configure things like the line terminator and a size limit on

    /// the compiled regular expression.

    #[derive(Clone, Debug)]

    pub struct RegexBuilder {

        builder: Builder,

    impl RegexBuilder {

        /// Create a new builder with a default configuration for the given

        /// pattern.

///

        /// If the pattern is invalid or exceeds the configured size limits,

        /// then an error will be returned when [`RegexBuilder::build`] is

        /// called.

        pub fn new(pattern: &str) -> RegexBuilder {

            RegexBuilder { builder: Builder::new([pattern]) }

        /// Compiles the pattern given to `RegexBuilder::new` with the

        /// configuration set on this builder.

///

        /// If the pattern isn't a valid regex or if a configured size limit

        /// was exceeded, then an error is returned.

        pub fn build(&self) -> Result<Regex, Error> {

            self.builder.build_one_string()

        /// This configures Unicode mode for the entire pattern.

///

        /// Enabling Unicode mode does a number of things:

///

        /// * Most fundamentally, it causes the fundamental atom of matching

        /// to be a single codepoint. When Unicode mode is disabled, it's a

        /// single byte. For example, when Unicode mode is enabled, `.` will

        /// match `💩` once, where as it will match 4 times when Unicode mode

        /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.)

        /// * Case insensitive matching uses Unicode simple case folding rules.

        /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are

        /// available.

        /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and

        /// `\d`.

        /// * The word boundary assertions, `\b` and `\B`, use the Unicode

        /// definition of a word character.

///

        /// Note that if Unicode mode is disabled, then the regex will fail to

        /// compile if it could match invalid UTF-8. For example, when Unicode

        /// mode is disabled, then since `.` matches any byte (except for

        /// `\n`), then it can match invalid UTF-8 and thus building a regex

        /// from it will fail. Another example is `\w` and `\W`. Since `\w` can

        /// only match ASCII bytes when Unicode mode is disabled, it's allowed.

        /// But `\W` can match more than ASCII bytes, including invalid UTF-8,

        /// and so it is not allowed. This restriction can be lifted only by

        /// using a [`bytes::Regex`](crate::bytes::Regex).

///

        /// For more details on the Unicode support in this crate, see the

        /// [Unicode section](crate#unicode) in this crate's top-level

        /// documentation.

///

        /// The default for this is `true`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"\w")

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally greek letters would be included in \w, but since

        /// // Unicode mode is disabled, it only matches ASCII letters.

        /// assert!(!re.is_match("δ"));

///

        /// let re = RegexBuilder::new(r"s")

        ///     .case_insensitive(true)

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally 'ſ' is included when searching for 's' case

        /// // insensitively due to Unicode's simple case folding rules. But

        /// // when Unicode mode is disabled, only ASCII case insensitive rules

        /// // are used.

        /// assert!(!re.is_match("ſ"));

        /// ```

        pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.unicode(yes);

            self

        /// This configures whether to enable case insensitive matching for the

        /// entire pattern.

///

        /// This setting can also be configured using the inline flag `i`

        /// in the pattern. For example, `(?i:foo)` matches `foo` case

        /// insensitively while `(?-i:foo)` matches `foo` case sensitively.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"foo(?-i:bar)quux")

        ///     .case_insensitive(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("FoObarQuUx"));

        /// // Even though case insensitive matching is enabled in the builder,

        /// // it can be locally disabled within the pattern. In this case,

        /// // `bar` is matched case sensitively.

        /// assert!(!re.is_match("fooBARquux"));

        /// ```

        pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.case_insensitive(yes);

            self

        /// This configures multi-line mode for the entire pattern.

///

        /// Enabling multi-line mode changes the behavior of the `^` and `$`

        /// anchor assertions. Instead of only matching at the beginning and

        /// end of a haystack, respectively, multi-line mode causes them to

        /// match at the beginning and end of a line *in addition* to the

        /// beginning and end of a haystack. More precisely, `^` will match at

        /// the position immediately following a `\n` and `$` will match at the

        /// position immediately preceding a `\n`.

///

        /// The behavior of this option can be impacted by other settings too:

///

        /// * The [`RegexBuilder::line_terminator`] option changes `\n` above

        /// to any ASCII byte.

        /// * The [`RegexBuilder::crlf`] option changes the line terminator to

        /// be either `\r` or `\n`, but never at the position between a `\r`

        /// and `\n`.

///

        /// This setting can also be configured using the inline flag `m` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^foo$")

        ///     .multi_line(true)

        ///     .build()

        ///     .unwrap();

        /// assert_eq!(Some(1..4), re.find("\nfoo\n").map(|m| m.range()));

        /// ```

        pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.multi_line(yes);

            self

        /// This configures dot-matches-new-line mode for the entire pattern.

///

        /// Perhaps surprisingly, the default behavior for `.` is not to match

        /// any character, but rather, to match any character except for the

        /// line terminator (which is `\n` by default). When this mode is

        /// enabled, the behavior changes such that `.` truly matches any

        /// character.

///

        /// This setting can also be configured using the inline flag `s` in

        /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent

        /// regexes.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"foo.bar")

        ///     .dot_matches_new_line(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = "foo\nbar";

        /// assert_eq!(Some("foo\nbar"), re.find(hay).map(|m| m.as_str()));

        /// ```

        pub fn dot_matches_new_line(

            &mut self,

            yes: bool,

        ) -> &mut RegexBuilder {

            self.builder.dot_matches_new_line(yes);

            self

        /// This configures CRLF mode for the entire pattern.

///

        /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for

        /// short) and `\n` ("line feed" or LF for short) are treated as line

        /// terminators. This results in the following:

///

        /// * Unless dot-matches-new-line mode is enabled, `.` will now match

        /// any character except for `\n` and `\r`.

        /// * When multi-line mode is enabled, `^` will match immediately

        /// following a `\n` or a `\r`. Similarly, `$` will match immediately

        /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match

        /// between `\r` and `\n`.

///

        /// This setting can also be configured using the inline flag `R` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^foo$")

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = "\r\nfoo\r\n";

        /// // If CRLF mode weren't enabled here, then '$' wouldn't match

        /// // immediately after 'foo', and thus no match would be found.

        /// assert_eq!(Some("foo"), re.find(hay).map(|m| m.as_str()));

        /// ```

///

        /// This example demonstrates that `^` will never match at a position

        /// between `\r` and `\n`. (`$` will similarly not match between a `\r`

        /// and a `\n`.)

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^")

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = "\r\n\r\n";

        /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect();

        /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]);

        /// ```

        pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.crlf(yes);

            self

        /// Configures the line terminator to be used by the regex.

///

        /// The line terminator is relevant in two ways for a particular regex:

///

        /// * When dot-matches-new-line mode is *not* enabled (the default),

        /// then `.` will match any character except for the configured line

        /// terminator.

        /// * When multi-line mode is enabled (not the default), then `^` and

        /// `$` will match immediately after and before, respectively, a line

        /// terminator.

///

        /// In both cases, if CRLF mode is enabled in a particular context,

        /// then it takes precedence over any configured line terminator.

///

        /// This option cannot be configured from within the pattern.

///

        /// The default line terminator is `\n`.

///

        /// # Example

///

        /// This shows how to treat the NUL byte as a line terminator. This can

        /// be a useful heuristic when searching binary data.

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^foo$")

        ///     .multi_line(true)

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// let hay = "\x00foo\x00";

        /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range()));

        /// ```

///

        /// This example shows that the behavior of `.` is impacted by this

        /// setting as well:

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r".")

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("\n"));

        /// assert!(!re.is_match("\x00"));

        /// ```

///

        /// This shows that building a regex will fail if the byte given

        /// is not ASCII and the pattern could result in matching invalid

        /// UTF-8. This is because any singular non-ASCII byte is not valid

        /// UTF-8, and it is not permitted for a [`Regex`] to match invalid

        /// UTF-8. (It is permissible to use a non-ASCII byte when building a

        /// [`bytes::Regex`](crate::bytes::Regex).)

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// assert!(RegexBuilder::new(r".").line_terminator(0x80).build().is_err());

        /// // Note that using a non-ASCII byte isn't enough on its own to

        /// // cause regex compilation to fail. You actually have to make use

        /// // of it in the regex in a way that leads to matching invalid

        /// // UTF-8. If you don't, then regex compilation will succeed!

        /// assert!(RegexBuilder::new(r"a").line_terminator(0x80).build().is_ok());

        /// ```

        pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder {

            self.builder.line_terminator(byte);

            self

        /// This configures swap-greed mode for the entire pattern.

///

        /// When swap-greed mode is enabled, patterns like `a+` will become

        /// non-greedy and patterns like `a+?` will become greedy. In other

        /// words, the meanings of `a+` and `a+?` are switched.

///

        /// This setting can also be configured using the inline flag `U` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"a+")

        ///     .swap_greed(true)

        ///     .build()

        ///     .unwrap();

        /// assert_eq!(Some("a"), re.find("aaa").map(|m| m.as_str()));

        /// ```

        pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.swap_greed(yes);

            self

        /// This configures verbose mode for the entire pattern.

///

        /// When enabled, whitespace will treated as insignifcant in the

        /// pattern and `#` can be used to start a comment until the next new

        /// line.

///

        /// Normally, in most places in a pattern, whitespace is treated

        /// literally. For example ` +` will match one or more ASCII whitespace

        /// characters.

///

        /// When verbose mode is enabled, `\#` can be used to match a literal

        /// `#` and `\ ` can be used to match a literal ASCII whitespace

        /// character.

///

        /// Verbose mode is useful for permitting regexes to be formatted and

        /// broken up more nicely. This may make them more easily readable.

///

        /// This setting can also be configured using the inline flag `x` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// let pat = r"

        ///     \b

        ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter

        ///     [\s--\n]+                   # whitespace should separate names

        ///     (?: # middle name can be an initial!

        ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*))

        ///         [\s--\n]+

        ///     )?

        ///     (?<last>\p{Uppercase}\w*)

        ///     \b

        /// ";

        /// let re = RegexBuilder::new(pat)

        ///     .ignore_whitespace(true)

        ///     .build()

        ///     .unwrap();

///

        /// let caps = re.captures("Harry Potter").unwrap();

        /// assert_eq!("Harry", &caps["first"]);

        /// assert_eq!("Potter", &caps["last"]);

///

        /// let caps = re.captures("Harry J. Potter").unwrap();

        /// assert_eq!("Harry", &caps["first"]);

        /// // Since a middle name/initial isn't required for an overall match,

        /// // we can't assume that 'initial' or 'middle' will be populated!

        /// assert_eq!(Some("J"), caps.name("initial").map(|m| m.as_str()));

        /// assert_eq!(None, caps.name("middle").map(|m| m.as_str()));

        /// assert_eq!("Potter", &caps["last"]);

///

        /// let caps = re.captures("Harry James Potter").unwrap();

        /// assert_eq!("Harry", &caps["first"]);

        /// // Since a middle name/initial isn't required for an overall match,

        /// // we can't assume that 'initial' or 'middle' will be populated!

        /// assert_eq!(None, caps.name("initial").map(|m| m.as_str()));

        /// assert_eq!(Some("James"), caps.name("middle").map(|m| m.as_str()));

        /// assert_eq!("Potter", &caps["last"]);

        /// ```

        pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.ignore_whitespace(yes);

            self

        /// This configures octal mode for the entire pattern.

///

        /// Octal syntax is a little-known way of uttering Unicode codepoints

        /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all

        /// equivalent patterns, where the last example shows octal syntax.

///

        /// While supporting octal syntax isn't in and of itself a problem,

        /// it does make good error messages harder. That is, in PCRE based

        /// regex engines, syntax like `\1` invokes a backreference, which is

        /// explicitly unsupported this library. However, many users expect

        /// backreferences to be supported. Therefore, when octal support

        /// is disabled, the error message will explicitly mention that

        /// backreferences aren't supported.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// // Normally this pattern would not compile, with an error message

        /// // about backreferences not being supported. But with octal mode

        /// // enabled, octal escape sequences work.

        /// let re = RegexBuilder::new(r"\141")

        ///     .octal(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("a"));

        /// ```

        pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.octal(yes);

            self

        /// Sets the approximate size limit, in bytes, of the compiled regex.

///

        /// This roughly corresponds to the number of heap memory, in

        /// bytes, occupied by a single regex. If the regex would otherwise

        /// approximately exceed this limit, then compiling that regex will

        /// fail.

///

        /// The main utility of a method like this is to avoid compiling

        /// regexes that use an unexpected amount of resources, such as

        /// time and memory. Even if the memory usage of a large regex is

        /// acceptable, its search time may not be. Namely, worst case time

        /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and

        /// `n ~ len(haystack)`. That is, search time depends, in part, on the

        /// size of the compiled regex. This means that putting a limit on the

        /// size of the regex limits how much a regex can impact search time.

///

        /// For more information about regex size limits, see the section on

        /// [untrusted inputs](crate#untrusted-input) in the top-level crate

        /// documentation.

///

        /// The default for this is some reasonable number that permits most

        /// patterns to compile successfully.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// // It may surprise you how big some seemingly small patterns can

        /// // be! Since \w is Unicode aware, this generates a regex that can

        /// // match approximately 140,000 distinct codepoints.

        /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err());

        /// ```

        pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder {

            self.builder.size_limit(bytes);

            self

        /// Set the approximate capacity, in bytes, of the cache of transitions

        /// used by the lazy DFA.

///

        /// While the lazy DFA isn't always used, in tends to be the most

        /// commonly use regex engine in default configurations. It tends to

        /// adopt the performance profile of a fully build DFA, but without the

        /// downside of taking worst case exponential time to build.

///

        /// The downside is that it needs to keep a cache of transitions and

        /// states that are built while running a search, and this cache

        /// can fill up. When it fills up, the cache will reset itself. Any

        /// previously generated states and transitions will then need to be

        /// re-generated. If this happens too many times, then this library

        /// will bail out of using the lazy DFA and switch to a different regex

        /// engine.

///

        /// If your regex provokes this particular downside of the lazy DFA,

        /// then it may be beneficial to increase its cache capacity. This will

        /// potentially reduce the frequency of cache resetting (ideally to

        /// `0`). While it won't fix all potential performance problems with

        /// the lazy DFA, increasing the cache capacity does fix some.

///

        /// There is no easy way to determine, a priori, whether increasing

        /// this cache capacity will help. In general, the larger your regex,

        /// the more cache it's likely to use. But that isn't an ironclad rule.

        /// For example, a regex like `[01]*1[01]{N}` would normally produce a

        /// fully build DFA that is exponential in size with respect to `N`.

        /// The lazy DFA will prevent exponential space blow-up, but it cache

        /// is likely to fill up, even when it's large and even for smallish

        /// values of `N`.

///

        /// If you aren't sure whether this helps or not, it is sensible to

        /// set this to some arbitrarily large number in testing, such as

        /// `usize::MAX`. Namely, this represents the amount of capacity that

        /// *may* be used. It's probably not a good idea to use `usize::MAX` in

        /// production though, since it implies there are no controls on heap

        /// memory used by this library during a search. In effect, set it to

        /// whatever you're willing to allocate for a single regex search.

        pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder {

            self.builder.dfa_size_limit(bytes);

            self

        /// Set the nesting limit for this parser.

///

        /// The nesting limit controls how deep the abstract syntax tree is

        /// allowed to be. If the AST exceeds the given limit (e.g., with too

        /// many nested groups), then an error is returned by the parser.

///

        /// The purpose of this limit is to act as a heuristic to prevent stack

        /// overflow for consumers that do structural induction on an AST using

        /// explicit recursion. While this crate never does this (instead using

        /// constant stack space and moving the call stack to the heap), other

        /// crates may.

///

        /// This limit is not checked until the entire AST is parsed.

        /// Therefore, if callers want to put a limit on the amount of heap

        /// space used, then they should impose a limit on the length, in

        /// bytes, of the concrete pattern string. In particular, this is

        /// viable since this parser implementation will limit itself to heap

        /// space proportional to the length of the pattern string. See also

        /// the [untrusted inputs](crate#untrusted-input) section in the

        /// top-level crate documentation for more information about this.

///

        /// Note that a nest limit of `0` will return a nest limit error for

        /// most patterns but not all. For example, a nest limit of `0` permits

        /// `a` but not `ab`, since `ab` requires an explicit concatenation,

        /// which results in a nest depth of `1`. In general, a nest limit is

        /// not something that manifests in an obvious way in the concrete

        /// syntax, therefore, it should not be used in a granular way.

///

        /// # Example

///

        /// ```

        /// use regex::RegexBuilder;

///

        /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok());

        /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err());

        /// ```

        pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {

            self.builder.nest_limit(limit);

            self

    /// A configurable builder for a [`RegexSet`].

///

    /// This builder can be used to programmatically set flags such as

    /// `i` (case insensitive) and `x` (for verbose mode). This builder

    /// can also be used to configure things like the line terminator

    /// and a size limit on the compiled regular expression.

    #[derive(Clone, Debug)]

    pub struct RegexSetBuilder {

        builder: Builder,

    impl RegexSetBuilder {

        /// Create a new builder with a default configuration for the given

        /// patterns.

///

        /// If the patterns are invalid or exceed the configured size limits,

        /// then an error will be returned when [`RegexSetBuilder::build`] is

        /// called.

        pub fn new<I, S>(patterns: I) -> RegexSetBuilder

        where

            I: IntoIterator<Item = S>,

            S: AsRef<str>,

            RegexSetBuilder { builder: Builder::new(patterns) }

        /// Compiles the patterns given to `RegexSetBuilder::new` with the

        /// configuration set on this builder.

///

        /// If the patterns aren't valid regexes or if a configured size limit

        /// was exceeded, then an error is returned.

        pub fn build(&self) -> Result<RegexSet, Error> {

            self.builder.build_many_string()

        /// This configures Unicode mode for the all of the patterns.

///

        /// Enabling Unicode mode does a number of things:

///

        /// * Most fundamentally, it causes the fundamental atom of matching

        /// to be a single codepoint. When Unicode mode is disabled, it's a

        /// single byte. For example, when Unicode mode is enabled, `.` will

        /// match `💩` once, where as it will match 4 times when Unicode mode

        /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.)

        /// * Case insensitive matching uses Unicode simple case folding rules.

        /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are

        /// available.

        /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and

        /// `\d`.

        /// * The word boundary assertions, `\b` and `\B`, use the Unicode

        /// definition of a word character.

///

        /// Note that if Unicode mode is disabled, then the regex will fail to

        /// compile if it could match invalid UTF-8. For example, when Unicode

        /// mode is disabled, then since `.` matches any byte (except for

        /// `\n`), then it can match invalid UTF-8 and thus building a regex

        /// from it will fail. Another example is `\w` and `\W`. Since `\w` can

        /// only match ASCII bytes when Unicode mode is disabled, it's allowed.

        /// But `\W` can match more than ASCII bytes, including invalid UTF-8,

        /// and so it is not allowed. This restriction can be lifted only by

        /// using a [`bytes::RegexSet`](crate::bytes::RegexSet).

///

        /// For more details on the Unicode support in this crate, see the

        /// [Unicode section](crate#unicode) in this crate's top-level

        /// documentation.

///

        /// The default for this is `true`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"\w"])

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally greek letters would be included in \w, but since

        /// // Unicode mode is disabled, it only matches ASCII letters.

        /// assert!(!re.is_match("δ"));

///

        /// let re = RegexSetBuilder::new([r"s"])

        ///     .case_insensitive(true)

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally 'ſ' is included when searching for 's' case

        /// // insensitively due to Unicode's simple case folding rules. But

        /// // when Unicode mode is disabled, only ASCII case insensitive rules

        /// // are used.

        /// assert!(!re.is_match("ſ"));

        /// ```

        pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.unicode(yes);

            self

        /// This configures whether to enable case insensitive matching for all

        /// of the patterns.

///

        /// This setting can also be configured using the inline flag `i`

        /// in the pattern. For example, `(?i:foo)` matches `foo` case

        /// insensitively while `(?-i:foo)` matches `foo` case sensitively.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"])

        ///     .case_insensitive(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("FoObarQuUx"));

        /// // Even though case insensitive matching is enabled in the builder,

        /// // it can be locally disabled within the pattern. In this case,

        /// // `bar` is matched case sensitively.

        /// assert!(!re.is_match("fooBARquux"));

        /// ```

        pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.case_insensitive(yes);

            self

        /// This configures multi-line mode for all of the patterns.

///

        /// Enabling multi-line mode changes the behavior of the `^` and `$`

        /// anchor assertions. Instead of only matching at the beginning and

        /// end of a haystack, respectively, multi-line mode causes them to

        /// match at the beginning and end of a line *in addition* to the

        /// beginning and end of a haystack. More precisely, `^` will match at

        /// the position immediately following a `\n` and `$` will match at the

        /// position immediately preceding a `\n`.

///

        /// The behavior of this option can be impacted by other settings too:

///

        /// * The [`RegexSetBuilder::line_terminator`] option changes `\n`

        /// above to any ASCII byte.

        /// * The [`RegexSetBuilder::crlf`] option changes the line terminator

        /// to be either `\r` or `\n`, but never at the position between a `\r`

        /// and `\n`.

///

        /// This setting can also be configured using the inline flag `m` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^foo$"])

        ///     .multi_line(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("\nfoo\n"));

        /// ```

        pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.multi_line(yes);

            self

        /// This configures dot-matches-new-line mode for the entire pattern.

///

        /// Perhaps surprisingly, the default behavior for `.` is not to match

        /// any character, but rather, to match any character except for the

        /// line terminator (which is `\n` by default). When this mode is

        /// enabled, the behavior changes such that `.` truly matches any

        /// character.

///

        /// This setting can also be configured using the inline flag `s` in

        /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent

        /// regexes.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"foo.bar"])

        ///     .dot_matches_new_line(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = "foo\nbar";

        /// assert!(re.is_match(hay));

        /// ```

        pub fn dot_matches_new_line(

            &mut self,

            yes: bool,

        ) -> &mut RegexSetBuilder {

            self.builder.dot_matches_new_line(yes);

            self

        /// This configures CRLF mode for all of the patterns.

///

        /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for

        /// short) and `\n` ("line feed" or LF for short) are treated as line

        /// terminators. This results in the following:

///

        /// * Unless dot-matches-new-line mode is enabled, `.` will now match

        /// any character except for `\n` and `\r`.

        /// * When multi-line mode is enabled, `^` will match immediately

        /// following a `\n` or a `\r`. Similarly, `$` will match immediately

        /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match

        /// between `\r` and `\n`.

///

        /// This setting can also be configured using the inline flag `R` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^foo$"])

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = "\r\nfoo\r\n";

        /// // If CRLF mode weren't enabled here, then '$' wouldn't match

        /// // immediately after 'foo', and thus no match would be found.

        /// assert!(re.is_match(hay));

        /// ```

///

        /// This example demonstrates that `^` will never match at a position

        /// between `\r` and `\n`. (`$` will similarly not match between a `\r`

        /// and a `\n`.)

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^\n"])

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(!re.is_match("\r\n"));

        /// ```

        pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.crlf(yes);

            self

        /// Configures the line terminator to be used by the regex.

///

        /// The line terminator is relevant in two ways for a particular regex:

///

        /// * When dot-matches-new-line mode is *not* enabled (the default),

        /// then `.` will match any character except for the configured line

        /// terminator.

        /// * When multi-line mode is enabled (not the default), then `^` and

        /// `$` will match immediately after and before, respectively, a line

        /// terminator.

///

        /// In both cases, if CRLF mode is enabled in a particular context,

        /// then it takes precedence over any configured line terminator.

///

        /// This option cannot be configured from within the pattern.

///

        /// The default line terminator is `\n`.

///

        /// # Example

///

        /// This shows how to treat the NUL byte as a line terminator. This can

        /// be a useful heuristic when searching binary data.

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^foo$"])

        ///     .multi_line(true)

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// let hay = "\x00foo\x00";

        /// assert!(re.is_match(hay));

        /// ```

///

        /// This example shows that the behavior of `.` is impacted by this

        /// setting as well:

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"."])

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("\n"));

        /// assert!(!re.is_match("\x00"));

        /// ```

///

        /// This shows that building a regex will fail if the byte given

        /// is not ASCII and the pattern could result in matching invalid

        /// UTF-8. This is because any singular non-ASCII byte is not valid

        /// UTF-8, and it is not permitted for a [`RegexSet`] to match invalid

        /// UTF-8. (It is permissible to use a non-ASCII byte when building a

        /// [`bytes::RegexSet`](crate::bytes::RegexSet).)

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// assert!(

        ///     RegexSetBuilder::new([r"."])

        ///         .line_terminator(0x80)

        ///         .build()

        ///         .is_err()

        /// );

        /// // Note that using a non-ASCII byte isn't enough on its own to

        /// // cause regex compilation to fail. You actually have to make use

        /// // of it in the regex in a way that leads to matching invalid

        /// // UTF-8. If you don't, then regex compilation will succeed!

        /// assert!(

        ///     RegexSetBuilder::new([r"a"])

        ///         .line_terminator(0x80)

        ///         .build()

        ///         .is_ok()

        /// );

        /// ```

        pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder {

            self.builder.line_terminator(byte);

            self

        /// This configures swap-greed mode for all of the patterns.

///

        /// When swap-greed mode is enabled, patterns like `a+` will become

        /// non-greedy and patterns like `a+?` will become greedy. In other

        /// words, the meanings of `a+` and `a+?` are switched.

///

        /// This setting can also be configured using the inline flag `U` in

        /// the pattern.

///

        /// Note that this is generally not useful for a `RegexSet` since a

        /// `RegexSet` can only report whether a pattern matches or not. Since

        /// greediness never impacts whether a match is found or not (only the

        /// offsets of the match), it follows that whether parts of a pattern

        /// are greedy or not doesn't matter for a `RegexSet`.

///

        /// The default for this is `false`.

        pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.swap_greed(yes);

            self

        /// This configures verbose mode for all of the patterns.

///

        /// When enabled, whitespace will treated as insignifcant in the

        /// pattern and `#` can be used to start a comment until the next new

        /// line.

///

        /// Normally, in most places in a pattern, whitespace is treated

        /// literally. For example ` +` will match one or more ASCII whitespace

        /// characters.

///

        /// When verbose mode is enabled, `\#` can be used to match a literal

        /// `#` and `\ ` can be used to match a literal ASCII whitespace

        /// character.

///

        /// Verbose mode is useful for permitting regexes to be formatted and

        /// broken up more nicely. This may make them more easily readable.

///

        /// This setting can also be configured using the inline flag `x` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// let pat = r"

        ///     \b

        ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter

        ///     [\s--\n]+                   # whitespace should separate names

        ///     (?: # middle name can be an initial!

        ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*))

        ///         [\s--\n]+

        ///     )?

        ///     (?<last>\p{Uppercase}\w*)

        ///     \b

        /// ";

        /// let re = RegexSetBuilder::new([pat])

        ///     .ignore_whitespace(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("Harry Potter"));

        /// assert!(re.is_match("Harry J. Potter"));

        /// assert!(re.is_match("Harry James Potter"));

        /// assert!(!re.is_match("harry J. Potter"));

        /// ```

        pub fn ignore_whitespace(

            &mut self,

            yes: bool,

        ) -> &mut RegexSetBuilder {

            self.builder.ignore_whitespace(yes);

            self

        /// This configures octal mode for all of the patterns.

///

        /// Octal syntax is a little-known way of uttering Unicode codepoints

        /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all

        /// equivalent patterns, where the last example shows octal syntax.

///

        /// While supporting octal syntax isn't in and of itself a problem,

        /// it does make good error messages harder. That is, in PCRE based

        /// regex engines, syntax like `\1` invokes a backreference, which is

        /// explicitly unsupported this library. However, many users expect

        /// backreferences to be supported. Therefore, when octal support

        /// is disabled, the error message will explicitly mention that

        /// backreferences aren't supported.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// // Normally this pattern would not compile, with an error message

        /// // about backreferences not being supported. But with octal mode

        /// // enabled, octal escape sequences work.

        /// let re = RegexSetBuilder::new([r"\141"])

        ///     .octal(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match("a"));

        /// ```

        pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.octal(yes);

            self

        /// Sets the approximate size limit, in bytes, of the compiled regex.

///

        /// This roughly corresponds to the number of heap memory, in

        /// bytes, occupied by a single regex. If the regex would otherwise

        /// approximately exceed this limit, then compiling that regex will

        /// fail.

///

        /// The main utility of a method like this is to avoid compiling

        /// regexes that use an unexpected amount of resources, such as

        /// time and memory. Even if the memory usage of a large regex is

        /// acceptable, its search time may not be. Namely, worst case time

        /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and

        /// `n ~ len(haystack)`. That is, search time depends, in part, on the

        /// size of the compiled regex. This means that putting a limit on the

        /// size of the regex limits how much a regex can impact search time.

///

        /// For more information about regex size limits, see the section on

        /// [untrusted inputs](crate#untrusted-input) in the top-level crate

        /// documentation.

///

        /// The default for this is some reasonable number that permits most

        /// patterns to compile successfully.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// // It may surprise you how big some seemingly small patterns can

        /// // be! Since \w is Unicode aware, this generates a regex that can

        /// // match approximately 140,000 distinct codepoints.

        /// assert!(

        ///     RegexSetBuilder::new([r"\w"])

        ///         .size_limit(45_000)

        ///         .build()

        ///         .is_err()

        /// );

        /// ```

        pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder {

            self.builder.size_limit(bytes);

            self

        /// Set the approximate capacity, in bytes, of the cache of transitions

        /// used by the lazy DFA.

///

        /// While the lazy DFA isn't always used, in tends to be the most

        /// commonly use regex engine in default configurations. It tends to

        /// adopt the performance profile of a fully build DFA, but without the

        /// downside of taking worst case exponential time to build.

///

        /// The downside is that it needs to keep a cache of transitions and

        /// states that are built while running a search, and this cache

        /// can fill up. When it fills up, the cache will reset itself. Any

        /// previously generated states and transitions will then need to be

        /// re-generated. If this happens too many times, then this library

        /// will bail out of using the lazy DFA and switch to a different regex

        /// engine.

///

        /// If your regex provokes this particular downside of the lazy DFA,

        /// then it may be beneficial to increase its cache capacity. This will

        /// potentially reduce the frequency of cache resetting (ideally to

        /// `0`). While it won't fix all potential performance problems with

        /// the lazy DFA, increasing the cache capacity does fix some.

///

        /// There is no easy way to determine, a priori, whether increasing

        /// this cache capacity will help. In general, the larger your regex,

        /// the more cache it's likely to use. But that isn't an ironclad rule.

        /// For example, a regex like `[01]*1[01]{N}` would normally produce a

        /// fully build DFA that is exponential in size with respect to `N`.

        /// The lazy DFA will prevent exponential space blow-up, but it cache

        /// is likely to fill up, even when it's large and even for smallish

        /// values of `N`.

///

        /// If you aren't sure whether this helps or not, it is sensible to

        /// set this to some arbitrarily large number in testing, such as

        /// `usize::MAX`. Namely, this represents the amount of capacity that

        /// *may* be used. It's probably not a good idea to use `usize::MAX` in

        /// production though, since it implies there are no controls on heap

        /// memory used by this library during a search. In effect, set it to

        /// whatever you're willing to allocate for a single regex search.

        pub fn dfa_size_limit(

            &mut self,

            bytes: usize,

        ) -> &mut RegexSetBuilder {

            self.builder.dfa_size_limit(bytes);

            self

        /// Set the nesting limit for this parser.

///

        /// The nesting limit controls how deep the abstract syntax tree is

        /// allowed to be. If the AST exceeds the given limit (e.g., with too

        /// many nested groups), then an error is returned by the parser.

///

        /// The purpose of this limit is to act as a heuristic to prevent stack

        /// overflow for consumers that do structural induction on an AST using

        /// explicit recursion. While this crate never does this (instead using

        /// constant stack space and moving the call stack to the heap), other

        /// crates may.

///

        /// This limit is not checked until the entire AST is parsed.

        /// Therefore, if callers want to put a limit on the amount of heap

        /// space used, then they should impose a limit on the length, in

        /// bytes, of the concrete pattern string. In particular, this is

        /// viable since this parser implementation will limit itself to heap

        /// space proportional to the length of the pattern string. See also

        /// the [untrusted inputs](crate#untrusted-input) section in the

        /// top-level crate documentation for more information about this.

///

        /// Note that a nest limit of `0` will return a nest limit error for

        /// most patterns but not all. For example, a nest limit of `0` permits

        /// `a` but not `ab`, since `ab` requires an explicit concatenation,

        /// which results in a nest depth of `1`. In general, a nest limit is

        /// not something that manifests in an obvious way in the concrete

        /// syntax, therefore, it should not be used in a granular way.

///

        /// # Example

///

        /// ```

        /// use regex::RegexSetBuilder;

///

        /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok());

        /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err());

        /// ```

        pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder {

            self.builder.nest_limit(limit);

            self

pub(crate) mod bytes {

    use crate::{

        bytes::{Regex, RegexSet},

        error::Error,

};

    use super::Builder;

    /// A configurable builder for a [`Regex`].

///

    /// This builder can be used to programmatically set flags such as `i`

    /// (case insensitive) and `x` (for verbose mode). This builder can also be

    /// used to configure things like the line terminator and a size limit on

    /// the compiled regular expression.

    #[derive(Clone, Debug)]

    pub struct RegexBuilder {

        builder: Builder,

    impl RegexBuilder {

        /// Create a new builder with a default configuration for the given

        /// pattern.

///

        /// If the pattern is invalid or exceeds the configured size limits,

        /// then an error will be returned when [`RegexBuilder::build`] is

        /// called.

        pub fn new(pattern: &str) -> RegexBuilder {

            RegexBuilder { builder: Builder::new([pattern]) }

        /// Compiles the pattern given to `RegexBuilder::new` with the

        /// configuration set on this builder.

///

        /// If the pattern isn't a valid regex or if a configured size limit

        /// was exceeded, then an error is returned.

        pub fn build(&self) -> Result<Regex, Error> {

            self.builder.build_one_bytes()

        /// This configures Unicode mode for the entire pattern.

///

        /// Enabling Unicode mode does a number of things:

///

        /// * Most fundamentally, it causes the fundamental atom of matching

        /// to be a single codepoint. When Unicode mode is disabled, it's a

        /// single byte. For example, when Unicode mode is enabled, `.` will

        /// match `💩` once, where as it will match 4 times when Unicode mode

        /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.)

        /// * Case insensitive matching uses Unicode simple case folding rules.

        /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are

        /// available.

        /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and

        /// `\d`.

        /// * The word boundary assertions, `\b` and `\B`, use the Unicode

        /// definition of a word character.

///

        /// Note that unlike the top-level `Regex` for searching `&str`, it

        /// is permitted to disable Unicode mode even if the resulting pattern

        /// could match invalid UTF-8. For example, `(?-u:.)` is not a valid

        /// pattern for a top-level `Regex`, but is valid for a `bytes::Regex`.

///

        /// For more details on the Unicode support in this crate, see the

        /// [Unicode section](crate#unicode) in this crate's top-level

        /// documentation.

///

        /// The default for this is `true`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"\w")

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally greek letters would be included in \w, but since

        /// // Unicode mode is disabled, it only matches ASCII letters.

        /// assert!(!re.is_match("δ".as_bytes()));

///

        /// let re = RegexBuilder::new(r"s")

        ///     .case_insensitive(true)

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally 'ſ' is included when searching for 's' case

        /// // insensitively due to Unicode's simple case folding rules. But

        /// // when Unicode mode is disabled, only ASCII case insensitive rules

        /// // are used.

        /// assert!(!re.is_match("ſ".as_bytes()));

        /// ```

///

        /// Since this builder is for constructing a [`bytes::Regex`](Regex),

        /// one can disable Unicode mode even if it would match invalid UTF-8:

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r".")

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally greek letters would be included in \w, but since

        /// // Unicode mode is disabled, it only matches ASCII letters.

        /// assert!(re.is_match(b"\xFF"));

        /// ```

        pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.unicode(yes);

            self

        /// This configures whether to enable case insensitive matching for the

        /// entire pattern.

///

        /// This setting can also be configured using the inline flag `i`

        /// in the pattern. For example, `(?i:foo)` matches `foo` case

        /// insensitively while `(?-i:foo)` matches `foo` case sensitively.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"foo(?-i:bar)quux")

        ///     .case_insensitive(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"FoObarQuUx"));

        /// // Even though case insensitive matching is enabled in the builder,

        /// // it can be locally disabled within the pattern. In this case,

        /// // `bar` is matched case sensitively.

        /// assert!(!re.is_match(b"fooBARquux"));

        /// ```

        pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.case_insensitive(yes);

            self

        /// This configures multi-line mode for the entire pattern.

///

        /// Enabling multi-line mode changes the behavior of the `^` and `$`

        /// anchor assertions. Instead of only matching at the beginning and

        /// end of a haystack, respectively, multi-line mode causes them to

        /// match at the beginning and end of a line *in addition* to the

        /// beginning and end of a haystack. More precisely, `^` will match at

        /// the position immediately following a `\n` and `$` will match at the

        /// position immediately preceding a `\n`.

///

        /// The behavior of this option can be impacted by other settings too:

///

        /// * The [`RegexBuilder::line_terminator`] option changes `\n` above

        /// to any ASCII byte.

        /// * The [`RegexBuilder::crlf`] option changes the line terminator to

        /// be either `\r` or `\n`, but never at the position between a `\r`

        /// and `\n`.

///

        /// This setting can also be configured using the inline flag `m` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^foo$")

        ///     .multi_line(true)

        ///     .build()

        ///     .unwrap();

        /// assert_eq!(Some(1..4), re.find(b"\nfoo\n").map(|m| m.range()));

        /// ```

        pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.multi_line(yes);

            self

        /// This configures dot-matches-new-line mode for the entire pattern.

///

        /// Perhaps surprisingly, the default behavior for `.` is not to match

        /// any character, but rather, to match any character except for the

        /// line terminator (which is `\n` by default). When this mode is

        /// enabled, the behavior changes such that `.` truly matches any

        /// character.

///

        /// This setting can also be configured using the inline flag `s` in

        /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent

        /// regexes.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"foo.bar")

        ///     .dot_matches_new_line(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = b"foo\nbar";

        /// assert_eq!(Some(&b"foo\nbar"[..]), re.find(hay).map(|m| m.as_bytes()));

        /// ```

        pub fn dot_matches_new_line(

            &mut self,

            yes: bool,

        ) -> &mut RegexBuilder {

            self.builder.dot_matches_new_line(yes);

            self

        /// This configures CRLF mode for the entire pattern.

///

        /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for

        /// short) and `\n` ("line feed" or LF for short) are treated as line

        /// terminators. This results in the following:

///

        /// * Unless dot-matches-new-line mode is enabled, `.` will now match

        /// any character except for `\n` and `\r`.

        /// * When multi-line mode is enabled, `^` will match immediately

        /// following a `\n` or a `\r`. Similarly, `$` will match immediately

        /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match

        /// between `\r` and `\n`.

///

        /// This setting can also be configured using the inline flag `R` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^foo$")

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = b"\r\nfoo\r\n";

        /// // If CRLF mode weren't enabled here, then '$' wouldn't match

        /// // immediately after 'foo', and thus no match would be found.

        /// assert_eq!(Some(&b"foo"[..]), re.find(hay).map(|m| m.as_bytes()));

        /// ```

///

        /// This example demonstrates that `^` will never match at a position

        /// between `\r` and `\n`. (`$` will similarly not match between a `\r`

        /// and a `\n`.)

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^")

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = b"\r\n\r\n";

        /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect();

        /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]);

        /// ```

        pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.crlf(yes);

            self

        /// Configures the line terminator to be used by the regex.

///

        /// The line terminator is relevant in two ways for a particular regex:

///

        /// * When dot-matches-new-line mode is *not* enabled (the default),

        /// then `.` will match any character except for the configured line

        /// terminator.

        /// * When multi-line mode is enabled (not the default), then `^` and

        /// `$` will match immediately after and before, respectively, a line

        /// terminator.

///

        /// In both cases, if CRLF mode is enabled in a particular context,

        /// then it takes precedence over any configured line terminator.

///

        /// This option cannot be configured from within the pattern.

///

        /// The default line terminator is `\n`.

///

        /// # Example

///

        /// This shows how to treat the NUL byte as a line terminator. This can

        /// be a useful heuristic when searching binary data.

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"^foo$")

        ///     .multi_line(true)

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// let hay = b"\x00foo\x00";

        /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range()));

        /// ```

///

        /// This example shows that the behavior of `.` is impacted by this

        /// setting as well:

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r".")

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"\n"));

        /// assert!(!re.is_match(b"\x00"));

        /// ```

///

        /// This shows that building a regex will work even when the byte

        /// given is not ASCII. This is unlike the top-level `Regex` API where

        /// matching invalid UTF-8 is not allowed.

///

        /// Note though that you must disable Unicode mode. This is required

        /// because Unicode mode requires matching one codepoint at a time,

        /// and there is no way to match a non-ASCII byte as if it were a

        /// codepoint.

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// assert!(

        ///     RegexBuilder::new(r".")

        ///         .unicode(false)

        ///         .line_terminator(0x80)

        ///         .build()

        ///         .is_ok(),

        /// );

        /// ```

        pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder {

            self.builder.line_terminator(byte);

            self

        /// This configures swap-greed mode for the entire pattern.

///

        /// When swap-greed mode is enabled, patterns like `a+` will become

        /// non-greedy and patterns like `a+?` will become greedy. In other

        /// words, the meanings of `a+` and `a+?` are switched.

///

        /// This setting can also be configured using the inline flag `U` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let re = RegexBuilder::new(r"a+")

        ///     .swap_greed(true)

        ///     .build()

        ///     .unwrap();

        /// assert_eq!(Some(&b"a"[..]), re.find(b"aaa").map(|m| m.as_bytes()));

        /// ```

        pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.swap_greed(yes);

            self

        /// This configures verbose mode for the entire pattern.

///

        /// When enabled, whitespace will treated as insignifcant in the

        /// pattern and `#` can be used to start a comment until the next new

        /// line.

///

        /// Normally, in most places in a pattern, whitespace is treated

        /// literally. For example ` +` will match one or more ASCII whitespace

        /// characters.

///

        /// When verbose mode is enabled, `\#` can be used to match a literal

        /// `#` and `\ ` can be used to match a literal ASCII whitespace

        /// character.

///

        /// Verbose mode is useful for permitting regexes to be formatted and

        /// broken up more nicely. This may make them more easily readable.

///

        /// This setting can also be configured using the inline flag `x` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// let pat = r"

        ///     \b

        ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter

        ///     [\s--\n]+                   # whitespace should separate names

        ///     (?: # middle name can be an initial!

        ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*))

        ///         [\s--\n]+

        ///     )?

        ///     (?<last>\p{Uppercase}\w*)

        ///     \b

        /// ";

        /// let re = RegexBuilder::new(pat)

        ///     .ignore_whitespace(true)

        ///     .build()

        ///     .unwrap();

///

        /// let caps = re.captures(b"Harry Potter").unwrap();

        /// assert_eq!(&b"Harry"[..], &caps["first"]);

        /// assert_eq!(&b"Potter"[..], &caps["last"]);

///

        /// let caps = re.captures(b"Harry J. Potter").unwrap();

        /// assert_eq!(&b"Harry"[..], &caps["first"]);

        /// // Since a middle name/initial isn't required for an overall match,

        /// // we can't assume that 'initial' or 'middle' will be populated!

        /// assert_eq!(

        ///     Some(&b"J"[..]),

        ///     caps.name("initial").map(|m| m.as_bytes()),

        /// );

        /// assert_eq!(None, caps.name("middle").map(|m| m.as_bytes()));

        /// assert_eq!(&b"Potter"[..], &caps["last"]);

///

        /// let caps = re.captures(b"Harry James Potter").unwrap();

        /// assert_eq!(&b"Harry"[..], &caps["first"]);

        /// // Since a middle name/initial isn't required for an overall match,

        /// // we can't assume that 'initial' or 'middle' will be populated!

        /// assert_eq!(None, caps.name("initial").map(|m| m.as_bytes()));

        /// assert_eq!(

        ///     Some(&b"James"[..]),

        ///     caps.name("middle").map(|m| m.as_bytes()),

        /// );

        /// assert_eq!(&b"Potter"[..], &caps["last"]);

        /// ```

        pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.ignore_whitespace(yes);

            self

        /// This configures octal mode for the entire pattern.

///

        /// Octal syntax is a little-known way of uttering Unicode codepoints

        /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all

        /// equivalent patterns, where the last example shows octal syntax.

///

        /// While supporting octal syntax isn't in and of itself a problem,

        /// it does make good error messages harder. That is, in PCRE based

        /// regex engines, syntax like `\1` invokes a backreference, which is

        /// explicitly unsupported this library. However, many users expect

        /// backreferences to be supported. Therefore, when octal support

        /// is disabled, the error message will explicitly mention that

        /// backreferences aren't supported.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// // Normally this pattern would not compile, with an error message

        /// // about backreferences not being supported. But with octal mode

        /// // enabled, octal escape sequences work.

        /// let re = RegexBuilder::new(r"\141")

        ///     .octal(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"a"));

        /// ```

        pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder {

            self.builder.octal(yes);

            self

        /// Sets the approximate size limit, in bytes, of the compiled regex.

///

        /// This roughly corresponds to the number of heap memory, in

        /// bytes, occupied by a single regex. If the regex would otherwise

        /// approximately exceed this limit, then compiling that regex will

        /// fail.

///

        /// The main utility of a method like this is to avoid compiling

        /// regexes that use an unexpected amount of resources, such as

        /// time and memory. Even if the memory usage of a large regex is

        /// acceptable, its search time may not be. Namely, worst case time

        /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and

        /// `n ~ len(haystack)`. That is, search time depends, in part, on the

        /// size of the compiled regex. This means that putting a limit on the

        /// size of the regex limits how much a regex can impact search time.

///

        /// For more information about regex size limits, see the section on

        /// [untrusted inputs](crate#untrusted-input) in the top-level crate

        /// documentation.

///

        /// The default for this is some reasonable number that permits most

        /// patterns to compile successfully.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// // It may surprise you how big some seemingly small patterns can

        /// // be! Since \w is Unicode aware, this generates a regex that can

        /// // match approximately 140,000 distinct codepoints.

        /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err());

        /// ```

        pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder {

            self.builder.size_limit(bytes);

            self

        /// Set the approximate capacity, in bytes, of the cache of transitions

        /// used by the lazy DFA.

///

        /// While the lazy DFA isn't always used, in tends to be the most

        /// commonly use regex engine in default configurations. It tends to

        /// adopt the performance profile of a fully build DFA, but without the

        /// downside of taking worst case exponential time to build.

///

        /// The downside is that it needs to keep a cache of transitions and

        /// states that are built while running a search, and this cache

        /// can fill up. When it fills up, the cache will reset itself. Any

        /// previously generated states and transitions will then need to be

        /// re-generated. If this happens too many times, then this library

        /// will bail out of using the lazy DFA and switch to a different regex

        /// engine.

///

        /// If your regex provokes this particular downside of the lazy DFA,

        /// then it may be beneficial to increase its cache capacity. This will

        /// potentially reduce the frequency of cache resetting (ideally to

        /// `0`). While it won't fix all potential performance problems with

        /// the lazy DFA, increasing the cache capacity does fix some.

///

        /// There is no easy way to determine, a priori, whether increasing

        /// this cache capacity will help. In general, the larger your regex,

        /// the more cache it's likely to use. But that isn't an ironclad rule.

        /// For example, a regex like `[01]*1[01]{N}` would normally produce a

        /// fully build DFA that is exponential in size with respect to `N`.

        /// The lazy DFA will prevent exponential space blow-up, but it cache

        /// is likely to fill up, even when it's large and even for smallish

        /// values of `N`.

///

        /// If you aren't sure whether this helps or not, it is sensible to

        /// set this to some arbitrarily large number in testing, such as

        /// `usize::MAX`. Namely, this represents the amount of capacity that

        /// *may* be used. It's probably not a good idea to use `usize::MAX` in

        /// production though, since it implies there are no controls on heap

        /// memory used by this library during a search. In effect, set it to

        /// whatever you're willing to allocate for a single regex search.

        pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder {

            self.builder.dfa_size_limit(bytes);

            self

        /// Set the nesting limit for this parser.

///

        /// The nesting limit controls how deep the abstract syntax tree is

        /// allowed to be. If the AST exceeds the given limit (e.g., with too

        /// many nested groups), then an error is returned by the parser.

///

        /// The purpose of this limit is to act as a heuristic to prevent stack

        /// overflow for consumers that do structural induction on an AST using

        /// explicit recursion. While this crate never does this (instead using

        /// constant stack space and moving the call stack to the heap), other

        /// crates may.

///

        /// This limit is not checked until the entire AST is parsed.

        /// Therefore, if callers want to put a limit on the amount of heap

        /// space used, then they should impose a limit on the length, in

        /// bytes, of the concrete pattern string. In particular, this is

        /// viable since this parser implementation will limit itself to heap

        /// space proportional to the length of the pattern string. See also

        /// the [untrusted inputs](crate#untrusted-input) section in the

        /// top-level crate documentation for more information about this.

///

        /// Note that a nest limit of `0` will return a nest limit error for

        /// most patterns but not all. For example, a nest limit of `0` permits

        /// `a` but not `ab`, since `ab` requires an explicit concatenation,

        /// which results in a nest depth of `1`. In general, a nest limit is

        /// not something that manifests in an obvious way in the concrete

        /// syntax, therefore, it should not be used in a granular way.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexBuilder;

///

        /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok());

        /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err());

        /// ```

        pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {

            self.builder.nest_limit(limit);

            self

    /// A configurable builder for a [`RegexSet`].

///

    /// This builder can be used to programmatically set flags such as `i`

    /// (case insensitive) and `x` (for verbose mode). This builder can also be

    /// used to configure things like the line terminator and a size limit on

    /// the compiled regular expression.

    #[derive(Clone, Debug)]

    pub struct RegexSetBuilder {

        builder: Builder,

    impl RegexSetBuilder {

        /// Create a new builder with a default configuration for the given

        /// patterns.

///

        /// If the patterns are invalid or exceed the configured size limits,

        /// then an error will be returned when [`RegexSetBuilder::build`] is

        /// called.

        pub fn new<I, S>(patterns: I) -> RegexSetBuilder

        where

            I: IntoIterator<Item = S>,

            S: AsRef<str>,

            RegexSetBuilder { builder: Builder::new(patterns) }

        /// Compiles the patterns given to `RegexSetBuilder::new` with the

        /// configuration set on this builder.

///

        /// If the patterns aren't valid regexes or if a configured size limit

        /// was exceeded, then an error is returned.

        pub fn build(&self) -> Result<RegexSet, Error> {

            self.builder.build_many_bytes()

        /// This configures Unicode mode for the all of the patterns.

///

        /// Enabling Unicode mode does a number of things:

///

        /// * Most fundamentally, it causes the fundamental atom of matching

        /// to be a single codepoint. When Unicode mode is disabled, it's a

        /// single byte. For example, when Unicode mode is enabled, `.` will

        /// match `💩` once, where as it will match 4 times when Unicode mode

        /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.)

        /// * Case insensitive matching uses Unicode simple case folding rules.

        /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are

        /// available.

        /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and

        /// `\d`.

        /// * The word boundary assertions, `\b` and `\B`, use the Unicode

        /// definition of a word character.

///

        /// Note that unlike the top-level `RegexSet` for searching `&str`,

        /// it is permitted to disable Unicode mode even if the resulting

        /// pattern could match invalid UTF-8. For example, `(?-u:.)` is not

        /// a valid pattern for a top-level `RegexSet`, but is valid for a

        /// `bytes::RegexSet`.

///

        /// For more details on the Unicode support in this crate, see the

        /// [Unicode section](crate#unicode) in this crate's top-level

        /// documentation.

///

        /// The default for this is `true`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"\w"])

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally greek letters would be included in \w, but since

        /// // Unicode mode is disabled, it only matches ASCII letters.

        /// assert!(!re.is_match("δ".as_bytes()));

///

        /// let re = RegexSetBuilder::new([r"s"])

        ///     .case_insensitive(true)

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally 'ſ' is included when searching for 's' case

        /// // insensitively due to Unicode's simple case folding rules. But

        /// // when Unicode mode is disabled, only ASCII case insensitive rules

        /// // are used.

        /// assert!(!re.is_match("ſ".as_bytes()));

        /// ```

///

        /// Since this builder is for constructing a

        /// [`bytes::RegexSet`](RegexSet), one can disable Unicode mode even if

        /// it would match invalid UTF-8:

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"."])

        ///     .unicode(false)

        ///     .build()

        ///     .unwrap();

        /// // Normally greek letters would be included in \w, but since

        /// // Unicode mode is disabled, it only matches ASCII letters.

        /// assert!(re.is_match(b"\xFF"));

        /// ```

        pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.unicode(yes);

            self

        /// This configures whether to enable case insensitive matching for all

        /// of the patterns.

///

        /// This setting can also be configured using the inline flag `i`

        /// in the pattern. For example, `(?i:foo)` matches `foo` case

        /// insensitively while `(?-i:foo)` matches `foo` case sensitively.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"])

        ///     .case_insensitive(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"FoObarQuUx"));

        /// // Even though case insensitive matching is enabled in the builder,

        /// // it can be locally disabled within the pattern. In this case,

        /// // `bar` is matched case sensitively.

        /// assert!(!re.is_match(b"fooBARquux"));

        /// ```

        pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.case_insensitive(yes);

            self

        /// This configures multi-line mode for all of the patterns.

///

        /// Enabling multi-line mode changes the behavior of the `^` and `$`

        /// anchor assertions. Instead of only matching at the beginning and

        /// end of a haystack, respectively, multi-line mode causes them to

        /// match at the beginning and end of a line *in addition* to the

        /// beginning and end of a haystack. More precisely, `^` will match at

        /// the position immediately following a `\n` and `$` will match at the

        /// position immediately preceding a `\n`.

///

        /// The behavior of this option can be impacted by other settings too:

///

        /// * The [`RegexSetBuilder::line_terminator`] option changes `\n`

        /// above to any ASCII byte.

        /// * The [`RegexSetBuilder::crlf`] option changes the line terminator

        /// to be either `\r` or `\n`, but never at the position between a `\r`

        /// and `\n`.

///

        /// This setting can also be configured using the inline flag `m` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^foo$"])

        ///     .multi_line(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"\nfoo\n"));

        /// ```

        pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.multi_line(yes);

            self

        /// This configures dot-matches-new-line mode for the entire pattern.

///

        /// Perhaps surprisingly, the default behavior for `.` is not to match

        /// any character, but rather, to match any character except for the

        /// line terminator (which is `\n` by default). When this mode is

        /// enabled, the behavior changes such that `.` truly matches any

        /// character.

///

        /// This setting can also be configured using the inline flag `s` in

        /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent

        /// regexes.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"foo.bar"])

        ///     .dot_matches_new_line(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = b"foo\nbar";

        /// assert!(re.is_match(hay));

        /// ```

        pub fn dot_matches_new_line(

            &mut self,

            yes: bool,

        ) -> &mut RegexSetBuilder {

            self.builder.dot_matches_new_line(yes);

            self

        /// This configures CRLF mode for all of the patterns.

///

        /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for

        /// short) and `\n` ("line feed" or LF for short) are treated as line

        /// terminators. This results in the following:

///

        /// * Unless dot-matches-new-line mode is enabled, `.` will now match

        /// any character except for `\n` and `\r`.

        /// * When multi-line mode is enabled, `^` will match immediately

        /// following a `\n` or a `\r`. Similarly, `$` will match immediately

        /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match

        /// between `\r` and `\n`.

///

        /// This setting can also be configured using the inline flag `R` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^foo$"])

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// let hay = b"\r\nfoo\r\n";

        /// // If CRLF mode weren't enabled here, then '$' wouldn't match

        /// // immediately after 'foo', and thus no match would be found.

        /// assert!(re.is_match(hay));

        /// ```

///

        /// This example demonstrates that `^` will never match at a position

        /// between `\r` and `\n`. (`$` will similarly not match between a `\r`

        /// and a `\n`.)

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^\n"])

        ///     .multi_line(true)

        ///     .crlf(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(!re.is_match(b"\r\n"));

        /// ```

        pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.crlf(yes);

            self

        /// Configures the line terminator to be used by the regex.

///

        /// The line terminator is relevant in two ways for a particular regex:

///

        /// * When dot-matches-new-line mode is *not* enabled (the default),

        /// then `.` will match any character except for the configured line

        /// terminator.

        /// * When multi-line mode is enabled (not the default), then `^` and

        /// `$` will match immediately after and before, respectively, a line

        /// terminator.

///

        /// In both cases, if CRLF mode is enabled in a particular context,

        /// then it takes precedence over any configured line terminator.

///

        /// This option cannot be configured from within the pattern.

///

        /// The default line terminator is `\n`.

///

        /// # Example

///

        /// This shows how to treat the NUL byte as a line terminator. This can

        /// be a useful heuristic when searching binary data.

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"^foo$"])

        ///     .multi_line(true)

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// let hay = b"\x00foo\x00";

        /// assert!(re.is_match(hay));

        /// ```

///

        /// This example shows that the behavior of `.` is impacted by this

        /// setting as well:

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let re = RegexSetBuilder::new([r"."])

        ///     .line_terminator(b'\x00')

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"\n"));

        /// assert!(!re.is_match(b"\x00"));

        /// ```

///

        /// This shows that building a regex will work even when the byte given

        /// is not ASCII. This is unlike the top-level `RegexSet` API where

        /// matching invalid UTF-8 is not allowed.

///

        /// Note though that you must disable Unicode mode. This is required

        /// because Unicode mode requires matching one codepoint at a time,

        /// and there is no way to match a non-ASCII byte as if it were a

        /// codepoint.

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// assert!(

        ///     RegexSetBuilder::new([r"."])

        ///         .unicode(false)

        ///         .line_terminator(0x80)

        ///         .build()

        ///         .is_ok(),

        /// );

        /// ```

        pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder {

            self.builder.line_terminator(byte);

            self

        /// This configures swap-greed mode for all of the patterns.

///

        /// When swap-greed mode is enabled, patterns like `a+` will become

        /// non-greedy and patterns like `a+?` will become greedy. In other

        /// words, the meanings of `a+` and `a+?` are switched.

///

        /// This setting can also be configured using the inline flag `U` in

        /// the pattern.

///

        /// Note that this is generally not useful for a `RegexSet` since a

        /// `RegexSet` can only report whether a pattern matches or not. Since

        /// greediness never impacts whether a match is found or not (only the

        /// offsets of the match), it follows that whether parts of a pattern

        /// are greedy or not doesn't matter for a `RegexSet`.

///

        /// The default for this is `false`.

        pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.swap_greed(yes);

            self

        /// This configures verbose mode for all of the patterns.

///

        /// When enabled, whitespace will treated as insignifcant in the

        /// pattern and `#` can be used to start a comment until the next new

        /// line.

///

        /// Normally, in most places in a pattern, whitespace is treated

        /// literally. For example ` +` will match one or more ASCII whitespace

        /// characters.

///

        /// When verbose mode is enabled, `\#` can be used to match a literal

        /// `#` and `\ ` can be used to match a literal ASCII whitespace

        /// character.

///

        /// Verbose mode is useful for permitting regexes to be formatted and

        /// broken up more nicely. This may make them more easily readable.

///

        /// This setting can also be configured using the inline flag `x` in

        /// the pattern.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// let pat = r"

        ///     \b

        ///     (?<first>\p{Uppercase}\w*)  # always start with uppercase letter

        ///     [\s--\n]+                   # whitespace should separate names

        ///     (?: # middle name can be an initial!

        ///         (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*))

        ///         [\s--\n]+

        ///     )?

        ///     (?<last>\p{Uppercase}\w*)

        ///     \b

        /// ";

        /// let re = RegexSetBuilder::new([pat])

        ///     .ignore_whitespace(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"Harry Potter"));

        /// assert!(re.is_match(b"Harry J. Potter"));

        /// assert!(re.is_match(b"Harry James Potter"));

        /// assert!(!re.is_match(b"harry J. Potter"));

        /// ```

        pub fn ignore_whitespace(

            &mut self,

            yes: bool,

        ) -> &mut RegexSetBuilder {

            self.builder.ignore_whitespace(yes);

            self

        /// This configures octal mode for all of the patterns.

///

        /// Octal syntax is a little-known way of uttering Unicode codepoints

        /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all

        /// equivalent patterns, where the last example shows octal syntax.

///

        /// While supporting octal syntax isn't in and of itself a problem,

        /// it does make good error messages harder. That is, in PCRE based

        /// regex engines, syntax like `\1` invokes a backreference, which is

        /// explicitly unsupported this library. However, many users expect

        /// backreferences to be supported. Therefore, when octal support

        /// is disabled, the error message will explicitly mention that

        /// backreferences aren't supported.

///

        /// The default for this is `false`.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// // Normally this pattern would not compile, with an error message

        /// // about backreferences not being supported. But with octal mode

        /// // enabled, octal escape sequences work.

        /// let re = RegexSetBuilder::new([r"\141"])

        ///     .octal(true)

        ///     .build()

        ///     .unwrap();

        /// assert!(re.is_match(b"a"));

        /// ```

        pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder {

            self.builder.octal(yes);

            self

        /// Sets the approximate size limit, in bytes, of the compiled regex.

///

        /// This roughly corresponds to the number of heap memory, in

        /// bytes, occupied by a single regex. If the regex would otherwise

        /// approximately exceed this limit, then compiling that regex will

        /// fail.

///

        /// The main utility of a method like this is to avoid compiling

        /// regexes that use an unexpected amount of resources, such as

        /// time and memory. Even if the memory usage of a large regex is

        /// acceptable, its search time may not be. Namely, worst case time

        /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and

        /// `n ~ len(haystack)`. That is, search time depends, in part, on the

        /// size of the compiled regex. This means that putting a limit on the

        /// size of the regex limits how much a regex can impact search time.

///

        /// For more information about regex size limits, see the section on

        /// [untrusted inputs](crate#untrusted-input) in the top-level crate

        /// documentation.

///

        /// The default for this is some reasonable number that permits most

        /// patterns to compile successfully.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// // It may surprise you how big some seemingly small patterns can

        /// // be! Since \w is Unicode aware, this generates a regex that can

        /// // match approximately 140,000 distinct codepoints.

        /// assert!(

        ///     RegexSetBuilder::new([r"\w"])

        ///         .size_limit(45_000)

        ///         .build()

        ///         .is_err()

        /// );

        /// ```

        pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder {

            self.builder.size_limit(bytes);

            self

        /// Set the approximate capacity, in bytes, of the cache of transitions

        /// used by the lazy DFA.

///

        /// While the lazy DFA isn't always used, in tends to be the most

        /// commonly use regex engine in default configurations. It tends to

        /// adopt the performance profile of a fully build DFA, but without the

        /// downside of taking worst case exponential time to build.

///

        /// The downside is that it needs to keep a cache of transitions and

        /// states that are built while running a search, and this cache

        /// can fill up. When it fills up, the cache will reset itself. Any

        /// previously generated states and transitions will then need to be

        /// re-generated. If this happens too many times, then this library

        /// will bail out of using the lazy DFA and switch to a different regex

        /// engine.

///

        /// If your regex provokes this particular downside of the lazy DFA,

        /// then it may be beneficial to increase its cache capacity. This will

        /// potentially reduce the frequency of cache resetting (ideally to

        /// `0`). While it won't fix all potential performance problems with

        /// the lazy DFA, increasing the cache capacity does fix some.

///

        /// There is no easy way to determine, a priori, whether increasing

        /// this cache capacity will help. In general, the larger your regex,

        /// the more cache it's likely to use. But that isn't an ironclad rule.

        /// For example, a regex like `[01]*1[01]{N}` would normally produce a

        /// fully build DFA that is exponential in size with respect to `N`.

        /// The lazy DFA will prevent exponential space blow-up, but it cache

        /// is likely to fill up, even when it's large and even for smallish

        /// values of `N`.

///

        /// If you aren't sure whether this helps or not, it is sensible to

        /// set this to some arbitrarily large number in testing, such as

        /// `usize::MAX`. Namely, this represents the amount of capacity that

        /// *may* be used. It's probably not a good idea to use `usize::MAX` in

        /// production though, since it implies there are no controls on heap

        /// memory used by this library during a search. In effect, set it to

        /// whatever you're willing to allocate for a single regex search.

        pub fn dfa_size_limit(

            &mut self,

            bytes: usize,

        ) -> &mut RegexSetBuilder {

            self.builder.dfa_size_limit(bytes);

            self

        /// Set the nesting limit for this parser.

///

        /// The nesting limit controls how deep the abstract syntax tree is

        /// allowed to be. If the AST exceeds the given limit (e.g., with too

        /// many nested groups), then an error is returned by the parser.

///

        /// The purpose of this limit is to act as a heuristic to prevent stack

        /// overflow for consumers that do structural induction on an AST using

        /// explicit recursion. While this crate never does this (instead using

        /// constant stack space and moving the call stack to the heap), other

        /// crates may.

///

        /// This limit is not checked until the entire AST is parsed.

        /// Therefore, if callers want to put a limit on the amount of heap

        /// space used, then they should impose a limit on the length, in

        /// bytes, of the concrete pattern string. In particular, this is

        /// viable since this parser implementation will limit itself to heap

        /// space proportional to the length of the pattern string. See also

        /// the [untrusted inputs](crate#untrusted-input) section in the

        /// top-level crate documentation for more information about this.

///

        /// Note that a nest limit of `0` will return a nest limit error for

        /// most patterns but not all. For example, a nest limit of `0` permits

        /// `a` but not `ab`, since `ab` requires an explicit concatenation,

        /// which results in a nest depth of `1`. In general, a nest limit is

        /// not something that manifests in an obvious way in the concrete

        /// syntax, therefore, it should not be used in a granular way.

///

        /// # Example

///

        /// ```

        /// use regex::bytes::RegexSetBuilder;

///

        /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok());

        /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err());

        /// ```

        pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder {

            self.builder.nest_limit(limit);

            self

Source code

Revision control

Copy as Markdown

Other Tools