literal.rs - mozsearch

mozilla-central/third_party/rust/cexpr/src/literal.rs

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

// (C) Copyright 2016 Jethro G. Beekman

//

// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or

// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license

// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your

// option. This file may not be copied, modified, or distributed

// except according to those terms.

//! Parsing C literals from byte slices.

//!

//! This will parse a representation of a C literal into a Rust type.

//!

//! # characters

//! Character literals are stored into the `CChar` type, which can hold values

//! that are not valid Unicode code points. ASCII characters are represented as

//! `char`, literal bytes with the high byte set are converted into the raw

//! representation. Escape sequences are supported. If hex and octal escapes

//! map to an ASCII character, that is used, otherwise, the raw encoding is

//! used, including for values over 255. Unicode escapes are checked for

//! validity and mapped to `char`. Character sequences are not supported. Width

//! prefixes are ignored.

//!

//! # strings

//! Strings are interpreted as byte vectors. Escape sequences are supported. If

//! hex and octal escapes map onto multi-byte characters, they are truncated to

//! one 8-bit character. Unicode escapes are converted into their UTF-8

//! encoding. Width prefixes are ignored.

//!

//! # integers

//! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are

//! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,

//! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and

//! sign suffixes are ignored. Sign prefixes are not supported.

//!

//! # real numbers

//! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are

//! not supported in the significand. Hexadecimal floating points are not

//! supported.

use std::char;

use std::str::{self, FromStr};

use nom::branch::alt;

use nom::bytes::complete::is_not;

use nom::bytes::complete::tag;

use nom::character::complete::{char, one_of};

use nom::combinator::{complete, map, map_opt, opt, recognize};

use nom::multi::{fold_many0, many0, many1, many_m_n};

use nom::sequence::{delimited, pair, preceded, terminated, tuple};

use nom::*;

use crate::expr::EvalResult;

use crate::ToCexprResult;

#[derive(Debug, Copy, Clone, PartialEq, Eq)]

/// Representation of a C character

pub enum CChar {

    /// A character that can be represented as a `char`

    Char(char),

    /// Any other character (8-bit characters, unicode surrogates, etc.)

    Raw(u64),

impl From<u8> for CChar {

    fn from(i: u8) -> CChar {

        match i {

            0..=0x7f => CChar::Char(i as u8 as char),

            _ => CChar::Raw(i as u64),

// A non-allocating version of this would be nice...

impl std::convert::Into<Vec<u8>> for CChar {

    fn into(self) -> Vec<u8> {

        match self {

            CChar::Char(c) => {

                let mut s = String::with_capacity(4);

                s.extend(&[c]);

                s.into_bytes()

            CChar::Raw(i) => {

                let mut v = Vec::with_capacity(1);

                v.push(i as u8);

/// ensures the child parser consumes the whole input

pub fn full<I: Clone, O, F>(

    f: F,

) -> impl Fn(I) -> nom::IResult<I, O>

where

    I: nom::InputLength,

    F: Fn(I) -> nom::IResult<I, O>,

    move |input| {

        let res = f(input);

        match res {

            Ok((i, o)) => {

                if i.input_len() == 0 {

                    Ok((i, o))

                } else {

                    Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Complete)))

            r => r,

// =================================

// ======== matching digits ========

// =================================

macro_rules! byte {

	($($p: pat)|* ) => {{

        fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> {

            match i.split_first() {

                $(Some((&c @ $p,rest)))|* => Ok((rest,c)),

                Some(_) => Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::OneOf))),

                None => Err(nom::Err::Incomplete(Needed::new(1))),

        parser

}}

fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> {

    byte!(b'0'..=b'1')(i)

fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> {

    byte!(b'0'..=b'7')(i)

fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> {

    byte!(b'0'..=b'9')(i)

fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> {

    byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')(i)

// ========================================

// ======== characters and strings ========

// ========================================

fn escape2char(c: char) -> CChar {

    CChar::Char(match c {

        'a' => '\x07',

        'b' => '\x08',

        'f' => '\x0c',

        'n' => '\n',

        'r' => '\r',

        't' => '\t',

        'v' => '\x0b',

        _ => unreachable!("invalid escape {}", c),

})

fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {

    str::from_utf8(&n)

        .ok()

        .and_then(|i| u64::from_str_radix(i, radix).ok())

        .map(|i| match i {

            0..=0x7f => CChar::Char(i as u8 as char),

            _ => CChar::Raw(i),

})

fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {

    str::from_utf8(&n)

        .ok()

        .and_then(|i| u32::from_str_radix(i, 16).ok())

        .and_then(char::from_u32)

        .map(CChar::Char)

fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {

    preceded(

        char('\\'),

        alt((

            map(one_of(r#"'"?\"#), CChar::Char),

            map(one_of("abfnrtv"), escape2char),

            map_opt(many_m_n(1, 3, octal), |v| c_raw_escape(v, 8)),

            map_opt(preceded(char('x'), many1(hexadecimal)), |v| {

                c_raw_escape(v, 16)

}),

            map_opt(

                preceded(char('u'), many_m_n(4, 4, hexadecimal)),

                c_unicode_escape,

),

            map_opt(

                preceded(char('U'), many_m_n(8, 8, hexadecimal)),

                c_unicode_escape,

),

)),

    )(i)

fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> {

    alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i)

fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {

    delimited(

        terminated(opt(c_width_prefix), char('\'')),

        alt((

            escaped_char,

            map(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from),

)),

        char('\''),

    )(i)

fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec<u8>> {

    delimited(

        alt((preceded(c_width_prefix, char('"')), char('"'))),

        fold_many0(

            alt((

                map(escaped_char, |c: CChar| c.into()),

                map(is_not([b'\\', b'"']), |c: &[u8]| c.into()),

)),

            Vec::new,

            |mut v: Vec<u8>, res: Vec<u8>| {

                v.extend_from_slice(&res);

},

),

        char('"'),

    )(i)

// ================================

// ======== parse integers ========

// ================================

fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {

    str::from_utf8(&n)

        .ok()

        .and_then(|i| u64::from_str_radix(i, radix).ok())

fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {

    let r = input.split_at_position(|c| c != b'u' && c != b'U' && c != b'l' && c != b'L');

    match r {

        Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)),

        res => res,

fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> {

    map(

        terminated(

            alt((

                map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), |v| {

                    c_int_radix(v, 16)

}),

                map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), |v| {

                    c_int_radix(v, 16)

}),

                map_opt(preceded(tag("0b"), many1(complete(binary))), |v| {

                    c_int_radix(v, 2)

}),

                map_opt(preceded(tag("0B"), many1(complete(binary))), |v| {

                    c_int_radix(v, 2)

}),

                map_opt(preceded(char('0'), many1(complete(octal))), |v| {

                    c_int_radix(v, 8)

}),

                map_opt(many1(complete(decimal)), |v| c_int_radix(v, 10)),

                |input| Err(crate::nom::Err::Error(nom::error::Error::new(input, crate::nom::ErrorKind::Fix))),

)),

            opt(take_ul),

),

        |i| i as i64,

    )(i)

// ==============================

// ======== parse floats ========

// ==============================

fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> {

    nom::combinator::complete(byte!(b'f' | b'l' | b'F' | b'L'))(i)

fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option<u8>, Vec<u8>)> {

    preceded(

        byte!(b'e' | b'E'),

        pair(opt(byte!(b'-' | b'+')), many1(complete(decimal))),

    )(i)

fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> {

    map_opt(

        alt((

            terminated(

                recognize(tuple((

                    many1(complete(decimal)),

                    byte!(b'.'),

                    many0(complete(decimal)),

                ))),

                opt(float_width),

),

            terminated(

                recognize(tuple((

                    many0(complete(decimal)),

                    byte!(b'.'),

                    many1(complete(decimal)),

                ))),

                opt(float_width),

),

            terminated(

                recognize(tuple((

                    many0(complete(decimal)),

                    opt(byte!(b'.')),

                    many1(complete(decimal)),

                    float_exp,

                ))),

                opt(float_width),

),

            terminated(

                recognize(tuple((

                    many1(complete(decimal)),

                    opt(byte!(b'.')),

                    many0(complete(decimal)),

                    float_exp,

                ))),

                opt(float_width),

),

            terminated(recognize(many1(complete(decimal))), float_width),

)),

        |v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()),

    )(i)

// ================================

// ======== main interface ========

// ================================

fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> {

    alt((

        map(full(c_char), EvalResult::Char),

        map(full(c_int), |i| EvalResult::Int(::std::num::Wrapping(i))),

        map(full(c_float), EvalResult::Float),

        map(full(c_string), EvalResult::Str),

    ))(input)

    .to_cexpr_result()

/// Parse a C literal.

///

/// The input must contain exactly the representation of a single literal

/// token, and in particular no whitespace or sign prefixes.

pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> {

    crate::assert_full_parse(one_literal(input))