test.rs - mozsearch

mozilla-central/dom/base/fragmentdirectives/test.rs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: DOM: Core & HTML

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#[cfg(test)]

mod test {

    use crate::fragment_directive_impl::{

        create_fragment_directive_string, parse_fragment_directive_and_remove_it_from_hash,

        TextDirective,

};

    /// This test verifies that valid combinations of [prefix-,]start[,end][,-suffix] are parsed correctly.

    #[test]

    fn test_parse_fragment_directive_with_one_text_directive() {

        // U+2705 WHITE HEAVY CHECK MARK - UTF-8 percent encoding: %E2%9C%85

        let checkmark = String::from_utf8(vec![0xE2, 0x9C, 0x85]).unwrap();

        let test_cases = vec![

            ("#:~:text=start", (None, Some("start"), None, None)),

                "#:~:text=start,end",

                (None, Some("start"), Some("end"), None),

),

                "#:~:text=prefix-,start",

                (Some("prefix"), Some("start"), None, None),

),

                "#:~:text=prefix-,start,end",

                (Some("prefix"), Some("start"), Some("end"), None),

),

                "#:~:text=prefix-,start,end,-suffix",

                (Some("prefix"), Some("start"), Some("end"), Some("suffix")),

),

                "#:~:text=start,-suffix",

                (None, Some("start"), None, Some("suffix")),

),

                "#:~:text=start,end,-suffix",

                (None, Some("start"), Some("end"), Some("suffix")),

),

            ("#:~:text=text=", (None, Some("text="), None, None)),

            ("#:~:text=%25", (None, Some("%"), None, None)),

            ("#:~:text=%", (None, Some("%"), None, None)),

            ("#:~:text=%%", (None, Some("%%"), None, None)),

            ("#:~:text=%25%25F", (None, Some("%%F"), None, None)),

                "#:~:text=%E2%9C%85",

                (None, Some(checkmark.as_str()), None, None),

),

];

        for (url, (prefix, start, end, suffix)) in test_cases {

            let (stripped_url, fragment_directive, result) =

                parse_fragment_directive_and_remove_it_from_hash(&url)

                    .expect("The parser must find a result.");

            assert_eq!(

                fragment_directive,

                &url[4..],

                "The extracted fragment directive string

                should be unsanitized and therefore match the input string."

);

            assert_eq!(result.len(), 1, "There must be one parsed text fragment.");

            assert_eq!(

                stripped_url, "",

                "The fragment directive must be removed from the url hash."

);

            let text_directive = result.first().unwrap();

            if prefix.is_none() {

                assert!(

                    text_directive.prefix().is_none(),

                    "There must be no `prefix` token (test case `{}`).",

url

);

            } else {

                assert!(

                    text_directive

                        .prefix()

                        .as_ref()

                        .expect("There must be a `prefix` token.")

                        .value()

                        == prefix.unwrap(),

                    "Wrong value for `prefix` (test case `{}`).",

url

);

            if start.is_none() {

                assert!(

                    text_directive.start().is_none(),

                    "There must be no `start` token (test case `{}`).",

url

);

            } else {

                assert!(

                    text_directive

                        .start()

                        .as_ref()

                        .expect("There must be a `start` token.")

                        .value()

                        == start.unwrap(),

                    "Wrong value for `start` (test case `{}`).",

url

);

            if end.is_none() {

                assert!(

                    text_directive.end().is_none(),

                    "There must be no `end` token (test case `{}`).",

url

);

            } else {

                assert!(

                    text_directive

                        .end()

                        .as_ref()

                        .expect("There must be a `end` token.")

                        .value()

                        == end.unwrap(),

                    "Wrong value for `end` (test case `{}`).",

url

);

            if suffix.is_none() {

                assert!(

                    text_directive.suffix().is_none(),

                    "There must be no `suffix` token (test case `{}`).",

url

);

            } else {

                assert!(

                    text_directive

                        .suffix()

                        .as_ref()

                        .expect("There must be a `suffix` token.")

                        .value()

                        == suffix.unwrap(),

                    "Wrong value for `suffix` (test case `{}`).",

url

);

    #[test]

    fn test_parse_full_url() {

        for (url, stripped_url_ref) in [

            ("https://example.com#:~:text=foo", "https://example.com"),

                "https://example.com/some/page.html?query=answer#:~:text=foo",

                "https://example.com/some/page.html?query=answer",

),

                "https://example.com/some/page.html?query=answer#fragment:~:text=foo",

                "https://example.com/some/page.html?query=answer#fragment",

),

                "http://example.com/page.html?query=irrelevant:~:#bar:~:text=foo",

                "http://example.com/page.html?query=irrelevant:~:#bar",

),

] {

            let (stripped_url, fragment_directive, _) =

                parse_fragment_directive_and_remove_it_from_hash(&url)

                    .expect("The parser must find a result");

            assert_eq!(

                stripped_url, stripped_url_ref,

                "The stripped url is not correct."

);

            assert_eq!(fragment_directive, "text=foo");

    /// This test verifies that a text fragment is parsed correctly if it is preceded

    /// or followed by a fragment (i.e. `#foo:~:text=bar`).

    #[test]

    fn test_parse_text_fragment_after_fragments() {

        let url = "#foo:~:text=start";

        let (stripped_url, fragment_directive, result) =

            parse_fragment_directive_and_remove_it_from_hash(&url)

                .expect("The parser must find a result.");

        assert_eq!(

            result.len(),

1,

            "There must be exactly one parsed text fragment."

);

        assert_eq!(

            stripped_url, "#foo",

            "The fragment directive was not removed correctly."

);

        assert_eq!(

            fragment_directive, "text=start",

            "The fragment directive was not extracted correctly."

);

        let fragment = result.first().unwrap();

        assert!(fragment.prefix().is_none(), "There is no `prefix` token.");

        assert_eq!(

            fragment

                .start()

                .as_ref()

                .expect("There must be a `start` token.")

                .value(),

            "start"

);

        assert!(fragment.end().is_none(), "There is no `end` token.");

        assert!(fragment.suffix().is_none(), "There is no `suffix` token.");

    /// Ensure that multiple text fragments are parsed correctly.

    #[test]

    fn test_parse_multiple_text_fragments() {

        let url = "#:~:text=prefix-,start,-suffix&text=foo&text=bar,-suffix";

        let (_, _, text_directives) = parse_fragment_directive_and_remove_it_from_hash(&url)

            .expect("The parser must find a result.");

        assert_eq!(

            text_directives.len(),

3,

            "There must be exactly two parsed text fragments."

);

        let first_text_directive = &text_directives[0];

        assert_eq!(

            first_text_directive

                .prefix()

                .as_ref()

                .expect("There must be a `prefix` token.")

                .value(),

            "prefix"

);

        assert_eq!(

            first_text_directive

                .start()

                .as_ref()

                .expect("There must be a `start` token.")

                .value(),

            "start"

);

        assert!(

            first_text_directive.end().is_none(),

            "There is no `end` token."

);

        assert_eq!(

            first_text_directive

                .suffix()

                .as_ref()

                .expect("There must be a `suffix` token.")

                .value(),

            "suffix"

);

        let second_text_directive = &text_directives[1];

        assert!(

            second_text_directive.prefix().is_none(),

            "There is no `prefix` token."

);

        assert_eq!(

            second_text_directive

                .start()

                .as_ref()

                .expect("There must be a `start` token.")

                .value(),

            "foo"

);

        assert!(

            second_text_directive.end().is_none(),

            "There is no `end` token."

);

        assert!(

            second_text_directive.suffix().is_none(),

            "There is no `suffix` token."

);

        let third_text_directive = &text_directives[2];

        assert!(

            third_text_directive.prefix().is_none(),

            "There is no `prefix` token."

);

        assert_eq!(

            third_text_directive

                .start()

                .as_ref()

                .expect("There must be a `start` token.")

                .value(),

            "bar"

);

        assert!(

            third_text_directive.end().is_none(),

            "There is no `end` token."

);

        assert_eq!(

            third_text_directive

                .suffix()

                .as_ref()

                .expect("There must be a `suffix` token.")

                .value(),

            "suffix"

);

    /// Multiple text directives should be parsed correctly

    /// if they are surrounded or separated by unknown directives.

    #[test]

    fn test_parse_multiple_text_directives_with_unknown_directive_in_between() {

        for url in [

            "#:~:foo&text=start1&text=start2",

            "#:~:text=start1&foo&text=start2",

            "#:~:text=start1&text=start2&foo",

] {

            let (_, fragment_directive, text_directives) =

                parse_fragment_directive_and_remove_it_from_hash(&url)

                    .expect("The parser must find a result.");

            assert_eq!(

                fragment_directive,

                &url[4..],

                "The extracted fragment directive string is unsanitized

                and should contain the unknown directive."

);

            assert_eq!(

                text_directives.len(),

2,

                "There must be exactly two parsed text fragments."

);

            let first_text_directive = &text_directives[0];

            assert_eq!(

                first_text_directive

                    .start()

                    .as_ref()

                    .expect("There must be a `start` token.")

                    .value(),

                "start1"

);

            let second_text_directive = &text_directives[1];

            assert_eq!(

                second_text_directive

                    .start()

                    .as_ref()

                    .expect("There must be a `start` token.")

                    .value(),

                "start2"

);

    /// Ensures that input that doesn't contain a text fragment does not produce a result.

    /// This includes the use of partial identifying tokens necessary for a text fragment

    /// (e.g. `:~:` without `text=`, `text=foo` without the `:~:` or multiple occurrences of `:~:`)

    /// In these cases, the parser must return `None` to indicate that there are no valid text fragments.

    #[test]

    fn test_parse_invalid_or_unknown_fragment_directive() {

        // there is no fragment directive here, hence the original url should not be updated.

        for url in ["#foo", "#foo:", "text=prefix-,start"] {

            let text_directives = parse_fragment_directive_and_remove_it_from_hash(&url);

            assert!(

                text_directives.is_none(),

                "The fragment `{}` does not contain a valid or known fragment directive.",

url

);

        // there is an (invalid) fragment directive present. It needs to be removed from the url.

        for (url, url_without_fragment_directive_ref) in [

            ("#foo:~:", "#foo"),

            ("#foo:~:bar", "#foo"),

            ("#:~:text=foo-,bar,-baz:~:text=foo", ""),

] {

            let (url_without_fragment_directive, _, _) =

                parse_fragment_directive_and_remove_it_from_hash(&url)

                    .expect("There is a fragment directive which should have been removed.");

            assert_eq!(

                url_without_fragment_directive, url_without_fragment_directive_ref,

                "The fragment directive has not been removed correctly from  fragment `{}`.",

url

);

    /// Ensures that ill-formed text directives (but valid fragment directives)

    /// (starting correctly with `:~:text=`) are not parsed.

    /// Instead `None` must be returned.

    /// Test cases include invalid combinations of `prefix`/`suffix`es,

    /// additional `,`s, too many `start`/`end` tokens, or empty text fragments.

    #[test]

    fn test_parse_invalid_text_fragments() {

        for url in [

            "#:~:text=start,start,start",

            "#:~:text=prefix-,prefix-",

            "#:~:text=prefix-,-suffix",

            "#:~:text=prefix-,start,start,start",

            "#:~:text=prefix-,start,start,start,-suffix",

            "#:~:text=start,start,start,-suffix",

            "#:~:text=prefix-,start,end,-suffix,foo",

            "#:~:text=foo,prefix-,start",

            "#:~:text=prefix-,,start,",

            "#:~:text=,prefix,start",

            "#:~:text=",

] {

            let (url_without_fragment_directive, _, _) =

                parse_fragment_directive_and_remove_it_from_hash(&url).expect("");

            assert!(

                url_without_fragment_directive.is_empty(),

                "The fragment directive `{}` does not contain a valid fragment directive. \

                 It must be removed from the original url anyway.",

url

);

    /// Ensure that out of multiple text fragments only the invalid ones are ignored

    /// while valid text fragments are still returned.

    /// Since correct parsing of multiple text fragments as well as

    /// several forms of invalid text fragments are already tested in

    /// `test_parse_multiple_text_fragments` and `test_parse_invalid_text_fragments()`,

    /// it should be enough to test this with only one fragment directive

    /// that contains two text fragments, one of them being invalid.

    #[test]

    fn test_valid_and_invalid_text_directives() {

        for url in [

            "#:~:text=start&text=,foo,",

            "#:~:text=foo,foo,foo&text=start",

] {

            let (_, fragment_directive, text_directives) =

                parse_fragment_directive_and_remove_it_from_hash(&url)

                    .expect("The parser must find a result.");

            assert_eq!(

                fragment_directive,

                &url[4..],

                "The extracted fragment directive string is unsanitized

                and should contain invalid text directives."

);

            assert_eq!(

                text_directives.len(),

1,

                "There must be exactly one parsed text fragment."

);

            let text_directive = text_directives.first().unwrap();

            assert_eq!(

                text_directive

                    .start()

                    .as_ref()

                    .expect("There must be a `start` value.")

                    .value(),

                "start",

                "The `start` value of the text directive has the wrong value."

);

    /// Ensures that a fragment directive that contains percent-encoded characters

    /// is decoded correctly. This explicitly includes characters which are used

    /// for identifying text fragments, i.e. `#`, `, `, `&`, `:`, `~` and `-`.

    #[test]

    fn test_parse_percent_encoding_tokens() {

        let url = "#:~:text=prefix%26-,start%20and%2C,end%23,-%26suffix%2D";

        let (_, fragment_directive, text_directives) =

            parse_fragment_directive_and_remove_it_from_hash(&url)

                .expect("The parser must find a result.");

        assert_eq!(

            fragment_directive,

            &url[4..],

            "The extracted fragment directive string is unsanitized

                and should contain the original and percent-decoded string."

);

        let text_directive = text_directives.first().unwrap();

        assert_eq!(

            text_directive

                .prefix()

                .as_ref()

                .expect("There must be a prefix.")

                .value(),

            "prefix&",

""

);

        assert_eq!(

            text_directive

                .start()

                .as_ref()

                .expect("There must be a prefix.")

                .value(),

            "start and,",

""

);

        assert_eq!(

            text_directive

                .end()

                .as_ref()

                .expect("There must be a prefix.")

                .value(),

            "end#",

""

);

        assert_eq!(

            text_directive

                .suffix()

                .as_ref()

                .expect("There must be a prefix.")

                .value(),

            "&suffix-",

""

);

    /// Ensures that a text fragment is created correctly,

    /// based on a given combination of tokens.

    /// This includes all sorts of combinations of

    /// `prefix`, `suffix`, `start` and `end`,

    /// als well as values for these tokens which contain

    /// characters that need to be encoded because they are

    /// identifiers for text fragments

    /// (#`, `, `, `&`, `:`, `~` and `-`).

    #[test]

    fn test_create_fragment_directive() {

        for (text_directive, expected_fragment_directive) in [

                TextDirective::from_parts(

                    String::new(),

                    String::from("start"),

                    String::new(),

                    String::new(),

                .unwrap(),

                ":~:text=start",

),

                TextDirective::from_parts(

                    String::new(),

                    String::from("start"),

                    String::from("end"),

                    String::new(),

                .unwrap(),

                ":~:text=start,end",

),

                TextDirective::from_parts(

                    String::from("prefix"),

                    String::from("start"),

                    String::from("end"),

                    String::new(),

                .unwrap(),

                ":~:text=prefix-,start,end",

),

                TextDirective::from_parts(

                    String::from("prefix"),

                    String::from("start"),

                    String::from("end"),

                    String::from("suffix"),

                .unwrap(),

                ":~:text=prefix-,start,end,-suffix",

),

                TextDirective::from_parts(

                    String::new(),

                    String::from("start"),

                    String::from("end"),

                    String::from("suffix"),

                .unwrap(),

                ":~:text=start,end,-suffix",

),

                TextDirective::from_parts(

                    String::from("prefix"),

                    String::from("start"),

                    String::new(),

                    String::from("suffix"),

                .unwrap(),

                ":~:text=prefix-,start,-suffix",

),

                TextDirective::from_parts(

                    String::from("prefix-"),

                    String::from("start and,"),

                    String::from("&end"),

                    String::from("#:~:suffix"),

                .unwrap(),

                ":~:text=prefix%2D-,start%20and%2C,%26end,-%23%3A%7E%3Asuffix",

),

] {

            let fragment_directive = create_fragment_directive_string(&vec![text_directive])

                .expect("The given input must produce a valid fragment directive.");

            assert_eq!(fragment_directive, expected_fragment_directive);

    /// Ensures that a fragment directive is created correctly if multiple text fragments are given.

    /// The resulting fragment must start with `:~:`

    /// and each text fragment must be separated using `&text=`.

    #[test]

    fn test_create_fragment_directive_from_multiple_text_directives() {

        let text_directives = vec![

            TextDirective::from_parts(

                String::new(),

                String::from("start1"),

                String::new(),

                String::new(),

            .unwrap(),

            TextDirective::from_parts(

                String::new(),

                String::from("start2"),

                String::new(),

                String::new(),

            .unwrap(),

            TextDirective::from_parts(

                String::new(),

                String::from("start3"),

                String::new(),

                String::new(),

            .unwrap(),

];

        let fragment_directive = create_fragment_directive_string(&text_directives)

            .expect("The given input must produce a valid fragment directive.");

        assert_eq!(

            fragment_directive, ":~:text=start1&text=start2&text=start3",

            "The created fragment directive is wrong for multiple fragments."

);