Source code

Revision control

Copy as Markdown

Other Tools

#![allow(clippy::identity_op)]
use std::{
char::from_u32 as char_from_u32,
str::{from_utf8, from_utf8_unchecked, FromStr},
};
use base64::engine::general_purpose::{GeneralPurpose, STANDARD};
use crate::{
error::{Error, Position, Result, SpannedError, SpannedResult},
extensions::Extensions,
};
pub const BASE64_ENGINE: GeneralPurpose = STANDARD;
// We have the following char categories.
const INT_CHAR: u8 = 1 << 0; // [0-9A-Fa-f_]
const FLOAT_CHAR: u8 = 1 << 1; // [0-9\.Ee+-_]
const IDENT_FIRST_CHAR: u8 = 1 << 2; // [A-Za-z_]
const IDENT_OTHER_CHAR: u8 = 1 << 3; // [A-Za-z_0-9]
const IDENT_RAW_CHAR: u8 = 1 << 4; // [A-Za-z_0-9\.+-]
const WHITESPACE_CHAR: u8 = 1 << 5; // [\n\t\r ]
// We encode each char as belonging to some number of these categories.
const DIGIT: u8 = INT_CHAR | FLOAT_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [0-9]
const ABCDF: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [ABCDFabcdf]
const UNDER: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [_]
const E____: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [Ee]
const G2Z__: u8 = IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [G-Zg-z]
const PUNCT: u8 = FLOAT_CHAR | IDENT_RAW_CHAR; // [\.+-]
const WS___: u8 = WHITESPACE_CHAR; // [\t\n\r ]
const _____: u8 = 0; // everything else
// Table of encodings, for fast predicates. (Non-ASCII and special chars are
// shown with '·' in the comment.)
#[rustfmt::skip]
const ENCODINGS: [u8; 256] = [
/* 0 1 2 3 4 5 6 7 8 9 */
/* 0+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, WS___,
/* 10+: ·········· */ WS___, _____, _____, WS___, _____, _____, _____, _____, _____, _____,
/* 20+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 30+: ·· !"#$%&' */ _____, _____, WS___, _____, _____, _____, _____, _____, _____, _____,
/* 40+: ()*+,-./01 */ _____, _____, _____, PUNCT, _____, PUNCT, PUNCT, _____, DIGIT, DIGIT,
/* 50+: 23456789:; */ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, _____, _____,
/* 60+: <=>?@ABCDE */ _____, _____, _____, _____, _____, ABCDF, ABCDF, ABCDF, ABCDF, E____,
/* 70+: FGHIJKLMNO */ ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
/* 80+: PQRSTUVWZY */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
/* 90+: Z[\]^_`abc */ G2Z__, _____, _____, _____, _____, UNDER, _____, ABCDF, ABCDF, ABCDF,
/* 100+: defghijklm */ ABCDF, E____, ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
/* 110+: nopqrstuvw */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
/* 120+: xyz{|}~··· */ G2Z__, G2Z__, G2Z__, _____, _____, _____, _____, _____, _____, _____,
/* 130+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 140+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 150+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 160+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 170+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 180+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 190+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 200+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 210+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 220+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 230+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 240+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
/* 250+: ·········· */ _____, _____, _____, _____, _____, _____
];
const fn is_int_char(c: u8) -> bool {
ENCODINGS[c as usize] & INT_CHAR != 0
}
const fn is_float_char(c: u8) -> bool {
ENCODINGS[c as usize] & FLOAT_CHAR != 0
}
pub const fn is_ident_first_char(c: u8) -> bool {
ENCODINGS[c as usize] & IDENT_FIRST_CHAR != 0
}
pub const fn is_ident_other_char(c: u8) -> bool {
ENCODINGS[c as usize] & IDENT_OTHER_CHAR != 0
}
pub const fn is_ident_raw_char(c: u8) -> bool {
ENCODINGS[c as usize] & IDENT_RAW_CHAR != 0
}
const fn is_whitespace_char(c: u8) -> bool {
ENCODINGS[c as usize] & WHITESPACE_CHAR != 0
}
#[derive(Clone, Debug, PartialEq)]
pub enum AnyNum {
F32(f32),
F64(f64),
I8(i8),
U8(u8),
I16(i16),
U16(u16),
I32(i32),
U32(u32),
I64(i64),
U64(u64),
#[cfg(feature = "integer128")]
I128(i128),
#[cfg(feature = "integer128")]
U128(u128),
}
#[derive(Clone, Copy, Debug)]
pub struct Bytes<'a> {
/// Bits set according to the [`Extensions`] enum.
pub exts: Extensions,
bytes: &'a [u8],
cursor: Position,
}
#[cfg(feature = "integer128")]
pub(crate) type LargeUInt = u128;
#[cfg(not(feature = "integer128"))]
pub(crate) type LargeUInt = u64;
#[cfg(feature = "integer128")]
pub(crate) type LargeSInt = i128;
#[cfg(not(feature = "integer128"))]
pub(crate) type LargeSInt = i64;
impl<'a> Bytes<'a> {
pub fn new(bytes: &'a [u8]) -> SpannedResult<Self> {
let mut b = Bytes {
exts: Extensions::empty(),
bytes,
cursor: Position { line: 1, col: 1 },
};
b.skip_ws().map_err(|e| b.span_error(e))?;
// Loop over all extensions attributes
loop {
let attribute = b.extensions().map_err(|e| b.span_error(e))?;
if attribute.is_empty() {
break;
}
b.exts |= attribute;
b.skip_ws().map_err(|e| b.span_error(e))?;
}
Ok(b)
}
pub fn span_error(&self, code: Error) -> SpannedError {
SpannedError {
code,
position: self.cursor,
}
}
pub fn advance(&mut self, bytes: usize) -> Result<()> {
for _ in 0..bytes {
self.advance_single()?;
}
Ok(())
}
pub fn advance_single(&mut self) -> Result<()> {
if self.peek_or_eof()? == b'\n' {
self.cursor.line += 1;
self.cursor.col = 1;
} else {
self.cursor.col += 1;
}
self.bytes = &self.bytes[1..];
Ok(())
}
fn any_integer<T: Num>(&mut self, sign: i8) -> Result<T> {
let base = if self.peek() == Some(b'0') {
match self.bytes.get(1).copied() {
Some(b'x') => 16,
Some(b'b') => 2,
Some(b'o') => 8,
_ => 10,
}
} else {
10
};
if base != 10 {
// If we have `0x45A` for example,
// cut it to `45A`.
let _ = self.advance(2);
}
let num_bytes = self.next_bytes_contained_in(is_int_char);
if num_bytes == 0 {
return Err(Error::ExpectedInteger);
}
let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
if s.as_bytes()[0] == b'_' {
return Err(Error::UnderscoreAtBeginning);
}
fn calc_num<T: Num>(
bytes: &Bytes,
s: &str,
base: u8,
mut f: impl FnMut(&mut T, u8) -> bool,
) -> Result<T> {
let mut num_acc = T::from_u8(0);
for &byte in s.as_bytes() {
if byte == b'_' {
continue;
}
if num_acc.checked_mul_ext(base) {
return Err(Error::IntegerOutOfBounds);
}
let digit = bytes.decode_hex(byte)?;
if digit >= base {
return Err(Error::ExpectedInteger);
}
if f(&mut num_acc, digit) {
return Err(Error::IntegerOutOfBounds);
}
}
Ok(num_acc)
}
let res = if sign > 0 {
calc_num(self, s, base, T::checked_add_ext)
} else {
calc_num(self, s, base, T::checked_sub_ext)
};
let _ = self.advance(num_bytes);
res
}
pub fn any_num(&mut self) -> Result<AnyNum> {
// We are not doing float comparisons here in the traditional sense.
// Instead, this code checks if a f64 fits inside an f32.
#[allow(clippy::float_cmp)]
fn any_float(f: f64) -> Result<AnyNum> {
if f == f64::from(f as f32) {
Ok(AnyNum::F32(f as f32))
} else {
Ok(AnyNum::F64(f))
}
}
let bytes_backup = self.bytes;
let first_byte = self.peek_or_eof()?;
let is_signed = first_byte == b'-' || first_byte == b'+';
let is_float = self.next_bytes_is_float();
if is_float {
let f = self.float::<f64>()?;
any_float(f)
} else {
let max_u8 = LargeUInt::from(std::u8::MAX);
let max_u16 = LargeUInt::from(std::u16::MAX);
let max_u32 = LargeUInt::from(std::u32::MAX);
#[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
let max_u64 = LargeUInt::from(std::u64::MAX);
let min_i8 = LargeSInt::from(std::i8::MIN);
let max_i8 = LargeSInt::from(std::i8::MAX);
let min_i16 = LargeSInt::from(std::i16::MIN);
let max_i16 = LargeSInt::from(std::i16::MAX);
let min_i32 = LargeSInt::from(std::i32::MIN);
let max_i32 = LargeSInt::from(std::i32::MAX);
#[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
let min_i64 = LargeSInt::from(std::i64::MIN);
#[cfg_attr(not(feature = "integer128"), allow(clippy::useless_conversion))]
let max_i64 = LargeSInt::from(std::i64::MAX);
if is_signed {
match self.signed_integer::<LargeSInt>() {
Ok(x) => {
if x >= min_i8 && x <= max_i8 {
Ok(AnyNum::I8(x as i8))
} else if x >= min_i16 && x <= max_i16 {
Ok(AnyNum::I16(x as i16))
} else if x >= min_i32 && x <= max_i32 {
Ok(AnyNum::I32(x as i32))
} else if x >= min_i64 && x <= max_i64 {
Ok(AnyNum::I64(x as i64))
} else {
#[cfg(feature = "integer128")]
{
Ok(AnyNum::I128(x))
}
#[cfg(not(feature = "integer128"))]
{
Ok(AnyNum::I64(x))
}
}
}
Err(_) => {
self.bytes = bytes_backup;
any_float(self.float::<f64>()?)
}
}
} else {
match self.unsigned_integer::<LargeUInt>() {
Ok(x) => {
if x <= max_u8 {
Ok(AnyNum::U8(x as u8))
} else if x <= max_u16 {
Ok(AnyNum::U16(x as u16))
} else if x <= max_u32 {
Ok(AnyNum::U32(x as u32))
} else if x <= max_u64 {
Ok(AnyNum::U64(x as u64))
} else {
#[cfg(feature = "integer128")]
{
Ok(AnyNum::U128(x))
}
#[cfg(not(feature = "integer128"))]
{
Ok(AnyNum::U64(x))
}
}
}
Err(_) => {
self.bytes = bytes_backup;
any_float(self.float::<f64>()?)
}
}
}
}
}
pub fn bool(&mut self) -> Result<bool> {
if self.consume("true") {
Ok(true)
} else if self.consume("false") {
Ok(false)
} else {
Err(Error::ExpectedBoolean)
}
}
pub fn bytes(&self) -> &[u8] {
self.bytes
}
pub fn char(&mut self) -> Result<char> {
if !self.consume("'") {
return Err(Error::ExpectedChar);
}
let c = self.peek_or_eof()?;
let c = if c == b'\\' {
let _ = self.advance(1);
self.parse_escape()?
} else {
// Check where the end of the char (') is and try to
// interpret the rest as UTF-8
let max = self.bytes.len().min(5);
let pos: usize = self.bytes[..max]
.iter()
.position(|&x| x == b'\'')
.ok_or(Error::ExpectedChar)?;
let s = from_utf8(&self.bytes[0..pos]).map_err(Error::from)?;
let mut chars = s.chars();
let first = chars.next().ok_or(Error::ExpectedChar)?;
if chars.next().is_some() {
return Err(Error::ExpectedChar);
}
let _ = self.advance(pos);
first
};
if !self.consume("'") {
return Err(Error::ExpectedChar);
}
Ok(c)
}
pub fn comma(&mut self) -> Result<bool> {
self.skip_ws()?;
if self.consume(",") {
self.skip_ws()?;
Ok(true)
} else {
Ok(false)
}
}
/// Only returns true if the char after `ident` cannot belong
/// to an identifier.
pub fn check_ident(&mut self, ident: &str) -> bool {
self.test_for(ident) && !self.check_ident_other_char(ident.len())
}
fn check_ident_other_char(&self, index: usize) -> bool {
self.bytes
.get(index)
.map_or(false, |&b| is_ident_other_char(b))
}
/// Should only be used on a working copy
pub fn check_tuple_struct(mut self) -> Result<bool> {
if self.identifier().is_err() {
// if there's no field ident, this is a tuple struct
return Ok(true);
}
self.skip_ws()?;
// if there is no colon after the ident, this can only be a unit struct
self.eat_byte().map(|c| c != b':')
}
/// Only returns true if the char after `ident` cannot belong
/// to an identifier.
pub fn consume_ident(&mut self, ident: &str) -> bool {
if self.check_ident(ident) {
let _ = self.advance(ident.len());
true
} else {
false
}
}
pub fn consume_struct_name(&mut self, ident: &'static str) -> Result<bool> {
if self.check_ident("") {
return Ok(false);
}
let found_ident = match self.identifier() {
Ok(maybe_ident) => std::str::from_utf8(maybe_ident)?,
Err(Error::SuggestRawIdentifier(found_ident)) if found_ident == ident => {
return Err(Error::SuggestRawIdentifier(found_ident))
}
Err(_) => return Err(Error::ExpectedNamedStructLike(ident)),
};
if found_ident != ident {
return Err(Error::ExpectedDifferentStructName {
expected: ident,
found: String::from(found_ident),
});
}
Ok(true)
}
pub fn consume(&mut self, s: &str) -> bool {
if self.test_for(s) {
let _ = self.advance(s.len());
true
} else {
false
}
}
fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
all.iter()
.map(|elem| {
if self.consume(elem) {
self.skip_ws()?;
Ok(true)
} else {
Ok(false)
}
})
.fold(Ok(true), |acc, x| acc.and_then(|val| x.map(|x| x && val)))
}
pub fn eat_byte(&mut self) -> Result<u8> {
let peek = self.peek_or_eof()?;
let _ = self.advance_single();
Ok(peek)
}
pub fn expect_byte(&mut self, byte: u8, error: Error) -> Result<()> {
self.eat_byte()
.and_then(|b| if b == byte { Ok(()) } else { Err(error) })
}
/// Returns the extensions bit mask.
fn extensions(&mut self) -> Result<Extensions> {
if self.peek() != Some(b'#') {
return Ok(Extensions::empty());
}
if !self.consume_all(&["#", "!", "[", "enable", "("])? {
return Err(Error::ExpectedAttribute);
}
self.skip_ws()?;
let mut extensions = Extensions::empty();
loop {
let ident = self.identifier()?;
let extension = Extensions::from_ident(ident).ok_or_else(|| {
Error::NoSuchExtension(String::from_utf8_lossy(ident).into_owned())
})?;
extensions |= extension;
let comma = self.comma()?;
// If we have no comma but another item, return an error
if !comma && self.check_ident_other_char(0) {
return Err(Error::ExpectedComma);
}
// If there's no comma, assume the list ended.
// If there is, it might be a trailing one, thus we only
// continue the loop if we get an ident char.
if !comma || !self.check_ident_other_char(0) {
break;
}
}
self.skip_ws()?;
if self.consume_all(&[")", "]"])? {
Ok(extensions)
} else {
Err(Error::ExpectedAttributeEnd)
}
}
pub fn float<T>(&mut self) -> Result<T>
where
T: FromStr,
{
for literal in &["inf", "+inf", "-inf", "NaN", "+NaN", "-NaN"] {
if self.consume_ident(literal) {
return FromStr::from_str(literal).map_err(|_| unreachable!()); // must not fail
}
}
let num_bytes = self.next_bytes_contained_in(is_float_char);
// Since `rustc` allows `1_0.0_1`, lint against underscores in floats
if let Some(err_bytes) = self.bytes[0..num_bytes].iter().position(|b| *b == b'_') {
let _ = self.advance(err_bytes);
return Err(Error::FloatUnderscore);
}
let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
let res = FromStr::from_str(s).map_err(|_| Error::ExpectedFloat);
let _ = self.advance(num_bytes);
res
}
pub fn identifier(&mut self) -> Result<&'a [u8]> {
let next = self.peek_or_eof()?;
if !is_ident_first_char(next) {
if is_ident_raw_char(next) {
let ident_bytes = &self.bytes[..self.next_bytes_contained_in(is_ident_raw_char)];
if let Ok(ident) = std::str::from_utf8(ident_bytes) {
return Err(Error::SuggestRawIdentifier(String::from(ident)));
}
}
return Err(Error::ExpectedIdentifier);
}
// If the next two bytes signify the start of a raw string literal,
// return an error.
let length = if next == b'r' {
match self.bytes.get(1).ok_or(Error::Eof)? {
b'"' => return Err(Error::ExpectedIdentifier),
b'#' => {
let after_next = self.bytes.get(2).copied().unwrap_or_default();
// Note: it's important to check this before advancing forward, so that
// the value-type deserializer can fall back to parsing it differently.
if !is_ident_raw_char(after_next) {
return Err(Error::ExpectedIdentifier);
}
// skip "r#"
let _ = self.advance(2);
self.next_bytes_contained_in(is_ident_raw_char)
}
_ => {
let std_ident_length = self.next_bytes_contained_in(is_ident_other_char);
let raw_ident_length = self.next_bytes_contained_in(is_ident_raw_char);
if raw_ident_length > std_ident_length {
if let Ok(ident) = std::str::from_utf8(&self.bytes[..raw_ident_length]) {
return Err(Error::SuggestRawIdentifier(String::from(ident)));
}
}
std_ident_length
}
}
} else {
let std_ident_length = self.next_bytes_contained_in(is_ident_other_char);
let raw_ident_length = self.next_bytes_contained_in(is_ident_raw_char);
if raw_ident_length > std_ident_length {
if let Ok(ident) = std::str::from_utf8(&self.bytes[..raw_ident_length]) {
return Err(Error::SuggestRawIdentifier(String::from(ident)));
}
}
std_ident_length
};
let ident = &self.bytes[..length];
let _ = self.advance(length);
Ok(ident)
}
pub fn next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize {
self.bytes.iter().take_while(|&&b| allowed(b)).count()
}
pub fn next_bytes_is_float(&self) -> bool {
if let Some(byte) = self.peek() {
let skip = match byte {
b'+' | b'-' => 1,
_ => 0,
};
let flen = self
.bytes
.iter()
.skip(skip)
.take_while(|&&b| is_float_char(b))
.count();
let ilen = self
.bytes
.iter()
.skip(skip)
.take_while(|&&b| is_int_char(b))
.count();
flen > ilen
} else {
false
}
}
pub fn skip_ws(&mut self) -> Result<()> {
loop {
while self.peek().map_or(false, is_whitespace_char) {
let _ = self.advance_single();
}
if !self.skip_comment()? {
return Ok(());
}
}
}
pub fn peek(&self) -> Option<u8> {
self.bytes.first().copied()
}
pub fn peek_or_eof(&self) -> Result<u8> {
self.bytes.first().copied().ok_or(Error::Eof)
}
pub fn signed_integer<T>(&mut self) -> Result<T>
where
T: Num,
{
match self.peek_or_eof()? {
b'+' => {
let _ = self.advance_single();
self.any_integer(1)
}
b'-' => {
let _ = self.advance_single();
self.any_integer(-1)
}
_ => self.any_integer(1),
}
}
pub fn string(&mut self) -> Result<ParsedStr<'a>> {
if self.consume("\"") {
self.escaped_string()
} else if self.consume("r") {
self.raw_string()
} else {
Err(Error::ExpectedString)
}
}
fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
use std::iter::repeat;
let (i, end_or_escape) = self
.bytes
.iter()
.enumerate()
.find(|&(_, &b)| b == b'\\' || b == b'"')
.ok_or(Error::ExpectedStringEnd)?;
if *end_or_escape == b'"' {
let s = from_utf8(&self.bytes[..i]).map_err(Error::from)?;
// Advance by the number of bytes of the string
// + 1 for the `"`.
let _ = self.advance(i + 1);
Ok(ParsedStr::Slice(s))
} else {
let mut i = i;
let mut s: Vec<_> = self.bytes[..i].to_vec();
loop {
let _ = self.advance(i + 1);
let character = self.parse_escape()?;
match character.len_utf8() {
1 => s.push(character as u8),
len => {
let start = s.len();
s.extend(repeat(0).take(len));
character.encode_utf8(&mut s[start..]);
}
}
let (new_i, end_or_escape) = self
.bytes
.iter()
.enumerate()
.find(|&(_, &b)| b == b'\\' || b == b'"')
.ok_or(Error::ExpectedStringEnd)?;
i = new_i;
s.extend_from_slice(&self.bytes[..i]);
if *end_or_escape == b'"' {
let _ = self.advance(i + 1);
let s = String::from_utf8(s).map_err(Error::from)?;
break Ok(ParsedStr::Allocated(s));
}
}
}
}
fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
let num_hashes = self.bytes.iter().take_while(|&&b| b == b'#').count();
let hashes = &self.bytes[..num_hashes];
let _ = self.advance(num_hashes);
if !self.consume("\"") {
return Err(Error::ExpectedString);
}
let ending = [&[b'"'], hashes].concat();
let i = self
.bytes
.windows(num_hashes + 1)
.position(|window| window == ending.as_slice())
.ok_or(Error::ExpectedStringEnd)?;
let s = from_utf8(&self.bytes[..i]).map_err(Error::from)?;
// Advance by the number of bytes of the string
// + `num_hashes` + 1 for the `"`.
let _ = self.advance(i + num_hashes + 1);
Ok(ParsedStr::Slice(s))
}
fn test_for(&self, s: &str) -> bool {
s.bytes()
.enumerate()
.all(|(i, b)| self.bytes.get(i).map_or(false, |t| *t == b))
}
pub fn unsigned_integer<T: Num>(&mut self) -> Result<T> {
self.any_integer(1)
}
fn decode_ascii_escape(&mut self) -> Result<u8> {
let mut n = 0;
for _ in 0..2 {
n <<= 4;
let byte = self.eat_byte()?;
let decoded = self.decode_hex(byte)?;
n |= decoded;
}
Ok(n)
}
#[inline]
fn decode_hex(&self, c: u8) -> Result<u8> {
match c {
c @ b'0'..=b'9' => Ok(c - b'0'),
c @ b'a'..=b'f' => Ok(10 + c - b'a'),
c @ b'A'..=b'F' => Ok(10 + c - b'A'),
_ => Err(Error::InvalidEscape("Non-hex digit found")),
}
}
fn parse_escape(&mut self) -> Result<char> {
let c = match self.eat_byte()? {
b'\'' => '\'',
b'"' => '"',
b'\\' => '\\',
b'n' => '\n',
b'r' => '\r',
b't' => '\t',
b'0' => '\0',
b'x' => self.decode_ascii_escape()? as char,
b'u' => {
self.expect_byte(b'{', Error::InvalidEscape("Missing { in Unicode escape"))?;
let mut bytes: u32 = 0;
let mut num_digits = 0;
while num_digits < 6 {
let byte = self.peek_or_eof()?;
if byte == b'}' {
break;
} else {
self.advance_single()?;
}
let byte = self.decode_hex(byte)?;
bytes <<= 4;
bytes |= u32::from(byte);
num_digits += 1;
}
if num_digits == 0 {
return Err(Error::InvalidEscape(
"Expected 1-6 digits, got 0 digits in Unicode escape",
));
}
self.expect_byte(
b'}',
Error::InvalidEscape("No } at the end of Unicode escape"),
)?;
char_from_u32(bytes).ok_or(Error::InvalidEscape("Not a valid char"))?
}
_ => {
return Err(Error::InvalidEscape("Unknown escape character"));
}
};
Ok(c)
}
fn skip_comment(&mut self) -> Result<bool> {
if self.consume("/") {
match self.eat_byte()? {
b'/' => {
let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();
let _ = self.advance(bytes);
}
b'*' => {
let mut level = 1;
while level > 0 {
let bytes = self
.bytes
.iter()
.take_while(|&&b| b != b'/' && b != b'*')
.count();
if self.bytes.is_empty() {
return Err(Error::UnclosedBlockComment);
}
let _ = self.advance(bytes);
// check whether / or * and take action
if self.consume("/*") {
level += 1;
} else if self.consume("*/") {
level -= 1;
} else {
self.eat_byte().map_err(|_| Error::UnclosedBlockComment)?;
}
}
}
b => return Err(Error::UnexpectedByte(b as char)),
}
Ok(true)
} else {
Ok(false)
}
}
}
pub trait Num {
fn from_u8(x: u8) -> Self;
/// Returns `true` on overflow
fn checked_mul_ext(&mut self, x: u8) -> bool;
/// Returns `true` on overflow
fn checked_add_ext(&mut self, x: u8) -> bool;
/// Returns `true` on overflow
fn checked_sub_ext(&mut self, x: u8) -> bool;
}
macro_rules! impl_num {
($ty:ident) => {
impl Num for $ty {
fn from_u8(x: u8) -> Self {
x as $ty
}
fn checked_mul_ext(&mut self, x: u8) -> bool {
match self.checked_mul(Self::from_u8(x)) {
Some(n) => {
*self = n;
false
}
None => true,
}
}
fn checked_add_ext(&mut self, x: u8) -> bool {
match self.checked_add(Self::from_u8(x)) {
Some(n) => {
*self = n;
false
}
None => true,
}
}
fn checked_sub_ext(&mut self, x: u8) -> bool {
match self.checked_sub(Self::from_u8(x)) {
Some(n) => {
*self = n;
false
}
None => true,
}
}
}
};
($($tys:ident)*) => {
$( impl_num!($tys); )*
};
}
#[cfg(feature = "integer128")]
impl_num!(u8 u16 u32 u64 u128 i8 i16 i32 i64 i128);
#[cfg(not(feature = "integer128"))]
impl_num!(u8 u16 u32 u64 i8 i16 i32 i64);
#[derive(Clone, Debug)]
pub enum ParsedStr<'a> {
Allocated(String),
Slice(&'a str),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decode_x10() {
let mut bytes = Bytes::new(b"10").unwrap();
assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
}
}