punycode.rs - mozsearch

mozilla-central/third_party/rust/idna/src/punycode.rs

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

// Copyright 2013 The rust-url developers.

//

// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or

// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license

// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your

// option. This file may not be copied, modified, or distributed

// except according to those terms.

//! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.

//!

//! Since Punycode fundamentally works on unicode code points,

//! `encode` and `decode` take and return slices and vectors of `char`.

//! `encode_str` and `decode_to_string` provide convenience wrappers

//! that convert from and to Rust’s UTF-8 based `str` and `String` types.

use alloc::{string::String, vec::Vec};

use core::char;

use core::u32;

// Bootstring parameters for Punycode

static BASE: u32 = 36;

static T_MIN: u32 = 1;

static T_MAX: u32 = 26;

static SKEW: u32 = 38;

static DAMP: u32 = 700;

static INITIAL_BIAS: u32 = 72;

static INITIAL_N: u32 = 0x80;

static DELIMITER: char = '-';

#[inline]

fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {

    delta /= if first_time { DAMP } else { 2 };

    delta += delta / num_points;

    let mut k = 0;

    while delta > ((BASE - T_MIN) * T_MAX) / 2 {

        delta /= BASE - T_MIN;

        k += BASE;

    k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))

/// Convert Punycode to an Unicode `String`.

///

/// This is a convenience wrapper around `decode`.

#[inline]

pub fn decode_to_string(input: &str) -> Option<String> {

    decode(input).map(|chars| chars.into_iter().collect())

/// Convert Punycode to Unicode.

///

/// Return None on malformed input or overflow.

/// Overflow can only happen on inputs that take more than

/// 63 encoded bytes, the DNS limit on domain name labels.

pub fn decode(input: &str) -> Option<Vec<char>> {

    Some(Decoder::default().decode(input).ok()?.collect())

#[derive(Default)]

pub(crate) struct Decoder {

    insertions: Vec<(usize, char)>,

impl Decoder {

    /// Split the input iterator and return a Vec with insertions of encoded characters

    pub(crate) fn decode<'a>(&'a mut self, input: &'a str) -> Result<Decode<'a>, ()> {

        self.insertions.clear();

        // Handle "basic" (ASCII) code points.

        // They are encoded as-is before the last delimiter, if any.

        let (base, input) = match input.rfind(DELIMITER) {

            None => ("", input),

            Some(position) => (

                &input[..position],

                if position > 0 {

                    &input[position + 1..]

                } else {

                    input

},

),

};

        if !base.is_ascii() {

            return Err(());

        let base_len = base.len();

        let mut length = base_len as u32;

        let mut code_point = INITIAL_N;

        let mut bias = INITIAL_BIAS;

        let mut i = 0;

        let mut iter = input.bytes();

        loop {

            let previous_i = i;

            let mut weight = 1;

            let mut k = BASE;

            let mut byte = match iter.next() {

                None => break,

                Some(byte) => byte,

};

            // Decode a generalized variable-length integer into delta,

            // which gets added to i.

            loop {

                let digit = match byte {

                    byte @ b'0'..=b'9' => byte - b'0' + 26,

                    byte @ b'A'..=b'Z' => byte - b'A',

                    byte @ b'a'..=b'z' => byte - b'a',

                    _ => return Err(()),

                } as u32;

                if digit > (u32::MAX - i) / weight {

                    return Err(()); // Overflow

                i += digit * weight;

                let t = if k <= bias {

                    T_MIN

                } else if k >= bias + T_MAX {

                    T_MAX

                } else {

                    k - bias

};

                if digit < t {

                    break;

                if weight > u32::MAX / (BASE - t) {

                    return Err(()); // Overflow

                weight *= BASE - t;

                k += BASE;

                byte = match iter.next() {

                    None => return Err(()), // End of input before the end of this delta

                    Some(byte) => byte,

};

            bias = adapt(i - previous_i, length + 1, previous_i == 0);

            if i / (length + 1) > u32::MAX - code_point {

                return Err(()); // Overflow

            // i was supposed to wrap around from length+1 to 0,

            // incrementing code_point each time.

            code_point += i / (length + 1);

            i %= length + 1;

            let c = match char::from_u32(code_point) {

                Some(c) => c,

                None => return Err(()),

};

            // Move earlier insertions farther out in the string

            for (idx, _) in &mut self.insertions {

                if *idx >= i as usize {

                    *idx += 1;

            self.insertions.push((i as usize, c));

            length += 1;

            i += 1;

        self.insertions.sort_by_key(|(i, _)| *i);

        Ok(Decode {

            base: base.chars(),

            insertions: &self.insertions,

            inserted: 0,

            position: 0,

            len: base_len + self.insertions.len(),

})

pub(crate) struct Decode<'a> {

    base: core::str::Chars<'a>,

    pub(crate) insertions: &'a [(usize, char)],

    inserted: usize,

    position: usize,

    len: usize,

impl<'a> Iterator for Decode<'a> {

    type Item = char;

    fn next(&mut self) -> Option<Self::Item> {

        loop {

            match self.insertions.get(self.inserted) {

                Some((pos, c)) if *pos == self.position => {

                    self.inserted += 1;

                    self.position += 1;

                    return Some(*c);

                _ => {}

            if let Some(c) = self.base.next() {

                self.position += 1;

                return Some(c);

            } else if self.inserted >= self.insertions.len() {

                return None;

    fn size_hint(&self) -> (usize, Option<usize>) {

        let len = self.len - self.position;

        (len, Some(len))

impl<'a> ExactSizeIterator for Decode<'a> {

    fn len(&self) -> usize {

        self.len - self.position

/// Convert an Unicode `str` to Punycode.

///

/// This is a convenience wrapper around `encode`.

#[inline]

pub fn encode_str(input: &str) -> Option<String> {

    if input.len() > u32::MAX as usize {

        return None;

    let mut buf = String::with_capacity(input.len());

    encode_into(input.chars(), &mut buf).ok().map(|()| buf)

/// Convert Unicode to Punycode.

///

/// Return None on overflow, which can only happen on inputs that would take more than

/// 63 encoded bytes, the DNS limit on domain name labels.

pub fn encode(input: &[char]) -> Option<String> {

    if input.len() > u32::MAX as usize {

        return None;

    let mut buf = String::with_capacity(input.len());

    encode_into(input.iter().copied(), &mut buf)

        .ok()

        .map(|()| buf)

pub(crate) fn encode_into<I>(input: I, output: &mut String) -> Result<(), ()>

where

    I: Iterator<Item = char> + Clone,

    // Handle "basic" (ASCII) code points. They are encoded as-is.

    let (mut input_length, mut basic_length) = (0u32, 0);

    for c in input.clone() {

        input_length = input_length.checked_add(1).ok_or(())?;

        if c.is_ascii() {

            output.push(c);

            basic_length += 1;

    if basic_length > 0 {

        output.push('-')

    let mut code_point = INITIAL_N;

    let mut delta = 0;

    let mut bias = INITIAL_BIAS;

    let mut processed = basic_length;

    while processed < input_length {

        // All code points < code_point have been handled already.

        // Find the next larger one.

        let min_code_point = input

            .clone()

            .map(|c| c as u32)

            .filter(|&c| c >= code_point)

            .min()

            .unwrap();

        if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {

            return Err(()); // Overflow

        // Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>

        delta += (min_code_point - code_point) * (processed + 1);

        code_point = min_code_point;

        for c in input.clone() {

            let c = c as u32;

            if c < code_point {

                delta = delta.checked_add(1).ok_or(())?;

            if c == code_point {

                // Represent delta as a generalized variable-length integer:

                let mut q = delta;

                let mut k = BASE;

                loop {

                    let t = if k <= bias {

                        T_MIN

                    } else if k >= bias + T_MAX {

                        T_MAX

                    } else {

                        k - bias

};

                    if q < t {

                        break;

                    let value = t + ((q - t) % (BASE - t));

                    output.push(value_to_digit(value));

                    q = (q - t) / (BASE - t);

                    k += BASE;

                output.push(value_to_digit(q));

                bias = adapt(delta, processed + 1, processed == basic_length);

                delta = 0;

                processed += 1;

        delta += 1;

        code_point += 1;

    Ok(())

#[inline]

fn value_to_digit(value: u32) -> char {

    match value {

        0..=25 => (value as u8 + b'a') as char,       // a..z

        26..=35 => (value as u8 - 26 + b'0') as char, // 0..9

        _ => panic!(),

#[test]

#[ignore = "slow"]

#[cfg(target_pointer_width = "64")]

fn huge_encode() {

    let mut buf = String::new();

    assert!(encode_into(std::iter::repeat('ß').take(u32::MAX as usize + 1), &mut buf).is_err());

    assert_eq!(buf.len(), 0);