optimal_fit.rs - mozsearch

use std::cell::RefCell;

use crate::core::Fragment;

/// Penalties for

/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)

/// and [`wrap_optimal_fit`].

///

/// This wrapping algorithm in [`wrap_optimal_fit`] considers the

/// entire paragraph to find optimal line breaks. When wrapping text,

/// "penalties" are assigned to line breaks based on the gaps left at

/// the end of lines. The penalties are given by this struct, with

/// [`Penalties::default`] assigning penalties that work well for

/// monospace text.

///

/// If you are wrapping proportional text, you are advised to assign

/// your own penalties according to your font size. See the individual

/// penalties below for details.

///

/// **Note:** Only available when the `smawk` Cargo feature is

/// enabled.

#[derive(Clone, Copy, Debug, PartialEq, Eq)]

pub struct Penalties {

    /// Per-line penalty. This is added for every line, which makes it

    /// expensive to output more lines than the minimum required.

    pub nline_penalty: usize,

    /// Per-character cost for lines that overflow the target line width.

///

    /// With a default value of 50², every single character costs as

    /// much as leaving a gap of 50 characters behind. This is because

    /// we assign as cost of `gap * gap` to a short line. When

    /// wrapping monospace text, we can overflow the line by 1

    /// character in extreme cases:

///

    /// ```

    /// use textwrap::core::Word;

    /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};

///

    /// let short = "foo ";

    /// let long = "x".repeat(50);

    /// let length = (short.len() + long.len()) as f64;

    /// let fragments = vec![Word::from(short), Word::from(&long)];

    /// let penalties = Penalties::new();

///

    /// // Perfect fit, both words are on a single line with no overflow.

    /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();

    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);

///

    /// // The words no longer fit, yet we get a single line back. While

    /// // the cost of overflow (`1 * 2500`) is the same as the cost of the

    /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`

    /// // which makes it cheaper to overflow than to use two lines.

    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();

    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);

///

    /// // The cost of overflow would be 2 * 2500, whereas the cost of

    /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =

    /// // 3401`. We therefore get two lines.

    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();

    /// assert_eq!(wrapped, vec![&[Word::from(short)],

    ///                          &[Word::from(&long)]]);

    /// ```

///

    /// This only happens if the overflowing word is 50 characters

    /// long _and_ if the word overflows the line by exactly one

    /// character. If it overflows by more than one character, the

    /// overflow penalty will quickly outgrow the cost of the gap, as

    /// seen above.

    pub overflow_penalty: usize,

    /// When should the a single word on the last line be considered

    /// "too short"?

///

    /// If the last line of the text consist of a single word and if

    /// this word is shorter than `1 / short_last_line_fraction` of

    /// the line width, then the final line will be considered "short"

    /// and `short_last_line_penalty` is added as an extra penalty.

///

    /// The effect of this is to avoid a final line consisting of a

    /// single small word. For example, with a

    /// `short_last_line_penalty` of 25 (the default), a gap of up to

    /// 5 columns will be seen as more desirable than having a final

    /// short line.

///

    /// ## Examples

///

    /// ```

    /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};

///

    /// let text = "This is a demo of the short last line penalty.";

///

    /// // The first-fit algorithm leaves a single short word on the last line:

    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),

    ///            vec!["This is a demo of the short last line",

    ///                 "penalty."]);

///

    /// #[cfg(feature = "smawk")] {

    /// let mut penalties = wrap_algorithms::Penalties::new();

///

    /// // Since "penalty." is shorter than 25% of the line width, the

    /// // optimal-fit algorithm adds a penalty of 25. This is enough

    /// // to move "line " down:

    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),

    ///            vec!["This is a demo of the short last",

    ///                 "line penalty."]);

///

    /// // We can change the meaning of "short" lines. Here, only words

    /// // shorter than 1/10th of the line width will be considered short:

    /// penalties.short_last_line_fraction = 10;

    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),

    ///            vec!["This is a demo of the short last line",

    ///                 "penalty."]);

///

    /// // If desired, the penalty can also be disabled:

    /// penalties.short_last_line_fraction = 4;

    /// penalties.short_last_line_penalty = 0;

    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),

    ///            vec!["This is a demo of the short last line",

    ///                 "penalty."]);

    /// }

    /// ```

    pub short_last_line_fraction: usize,

    /// Penalty for a last line with a single short word.

///

    /// Set this to zero if you do not want to penalize short last lines.

    pub short_last_line_penalty: usize,

    /// Penalty for lines ending with a hyphen.

    pub hyphen_penalty: usize,

impl Penalties {

    /// Default penalties for monospace text.

///

    /// The penalties here work well for monospace text. This is

    /// because they expect the gaps at the end of lines to be roughly

    /// in the range `0..100`. If the gaps are larger, the

    /// `overflow_penalty` and `hyphen_penalty` become insignificant.

    pub const fn new() -> Self {

        Penalties {

            nline_penalty: 1000,

            overflow_penalty: 50 * 50,

            short_last_line_fraction: 4,

            short_last_line_penalty: 25,

            hyphen_penalty: 25,

impl Default for Penalties {

    fn default() -> Self {

        Self::new()

/// Cache for line numbers. This is necessary to avoid a O(n**2)

/// behavior when computing line numbers in [`wrap_optimal_fit`].

struct LineNumbers {

    line_numbers: RefCell<Vec<usize>>,

impl LineNumbers {

    fn new(size: usize) -> Self {

        let mut line_numbers = Vec::with_capacity(size);

        line_numbers.push(0);

        LineNumbers {

            line_numbers: RefCell::new(line_numbers),

    fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {

        while self.line_numbers.borrow_mut().len() < i + 1 {

            let pos = self.line_numbers.borrow().len();

            let line_number = 1 + self.get(minima[pos].0, minima);

            self.line_numbers.borrow_mut().push(line_number);

        self.line_numbers.borrow()[i]

/// Overflow error during the [`wrap_optimal_fit`] computation.

#[derive(Debug, PartialEq, Eq)]

pub struct OverflowError;

impl std::fmt::Display for OverflowError {

    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

        write!(f, "wrap_optimal_fit cost computation overflowed")

impl std::error::Error for OverflowError {}

/// Wrap abstract fragments into lines with an optimal-fit algorithm.

///

/// The `line_widths` slice gives the target line width for each line

/// (the last slice element is repeated as necessary). This can be

/// used to implement hanging indentation.

///

/// The fragments must already have been split into the desired

/// widths, this function will not (and cannot) attempt to split them

/// further when arranging them into lines.

///

/// # Optimal-Fit Algorithm

///

/// The algorithm considers all possible break points and picks the

/// breaks which minimizes the gaps at the end of each line. More

/// precisely, the algorithm assigns a cost or penalty to each break

/// point, determined by `cost = gap * gap` where `gap = target_width -

/// line_width`. Shorter lines are thus penalized more heavily since

/// they leave behind a larger gap.

///

/// We can illustrate this with the text “To be, or not to be: that is

/// the question”. We will be wrapping it in a narrow column with room

/// for only 10 characters. The [greedy

/// algorithm](super::wrap_first_fit) will produce these lines, each

/// annotated with the corresponding penalty:

///

/// ```text

/// "To be, or"   1² =  1

/// "not to be:"  0² =  0

/// "that is"     3² =  9

/// "the"         7² = 49

/// "question"    2² =  4

/// ```

///

/// We see that line four with “the” leaves a gap of 7 columns, which

/// gives it a penalty of 49. The sum of the penalties is 63.

///

/// There are 10 words, which means that there are `2_u32.pow(9)` or

/// 512 different ways to typeset it. We can compute

/// the sum of the penalties for each possible line break and search

/// for the one with the lowest sum:

///

/// ```text

/// "To be,"     4² = 16

/// "or not to"  1² =  1

/// "be: that"   2² =  4

/// "is the"     4² = 16

/// "question"   2² =  4

/// ```

///

/// The sum of the penalties is 41, which is better than what the

/// greedy algorithm produced.

///

/// Searching through all possible combinations would normally be

/// prohibitively slow. However, it turns out that the problem can be

/// formulated as the task of finding column minima in a cost matrix.

/// This matrix has a special form (totally monotone) which lets us

/// use a [linear-time algorithm called

/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break

/// points.

///

/// This means that the time complexity remains O(_n_) where _n_ is

/// the number of words. Compared to

/// [`wrap_first_fit()`](super::wrap_first_fit), this function is

/// about 4 times slower.

///

/// The optimization of per-line costs over the entire paragraph is

/// inspired by the line breaking algorithm used in TeX, as described

/// in the 1981 article [_Breaking Paragraphs into

/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)

/// by Knuth and Plass. The implementation here is based on [Python

/// code by David

/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).

///

/// # Errors

///

/// In case of an overflow during the cost computation, an `Err` is

/// returned. Overflows happens when fragments or lines have infinite

/// widths (`f64::INFINITY`) or if the widths are so large that the

/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`

/// (approximately 1e154):

///

/// ```

/// use textwrap::core::Fragment;

/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};

///

/// #[derive(Debug, PartialEq)]

/// struct Word(f64);

///

/// impl Fragment for Word {

///     fn width(&self) -> f64 { self.0 }

///     fn whitespace_width(&self) -> f64 { 1.0 }

///     fn penalty_width(&self) -> f64 { 0.0 }

/// }

///

/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is

/// // larger than f64::MAX:

/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),

///            Err(OverflowError));

/// ```

///

/// When using fragment widths and line widths which fit inside an

/// `u64`, overflows cannot happen. This means that fragments derived

/// from a `&str` cannot cause overflows.

///

/// **Note:** Only available when the `smawk` Cargo feature is

/// enabled.

pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(

    fragments: &'a [T],

    line_widths: &'b [f64],

    penalties: &'b Penalties,

) -> Result<Vec<&'a [T]>, OverflowError> {

    // The final line width is used for all remaining lines.

    let default_line_width = line_widths.last().copied().unwrap_or(0.0);

    let mut widths = Vec::with_capacity(fragments.len() + 1);

    let mut width = 0.0;

    widths.push(width);

    for fragment in fragments {

        width += fragment.width() + fragment.whitespace_width();

        widths.push(width);

    let line_numbers = LineNumbers::new(fragments.len());

    let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {

        // Line number for fragment `i`.

        let line_number = line_numbers.get(i, minima);

        let line_width = line_widths

            .get(line_number)

            .copied()

            .unwrap_or(default_line_width);

        let target_width = line_width.max(1.0);

        // Compute the width of a line spanning fragments[i..j] in

        // constant time. We need to adjust widths[j] by subtracting

        // the whitespace of fragment[j-1] and then add the penalty.

        let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()

            + fragments[j - 1].penalty_width();

        // We compute cost of the line containing fragments[i..j]. We

        // start with values[i].1, which is the optimal cost for

        // breaking before fragments[i].

//

        // First, every extra line cost NLINE_PENALTY.

        let mut cost = minima[i].1 + penalties.nline_penalty as f64;

        // Next, we add a penalty depending on the line length.

        if line_width > target_width {

            // Lines that overflow get a hefty penalty.

            let overflow = line_width - target_width;

            cost += overflow * penalties.overflow_penalty as f64;

        } else if j < fragments.len() {

            // Other lines (except for the last line) get a milder

            // penalty which depend on the size of the gap.

            let gap = target_width - line_width;

            cost += gap * gap;

        } else if i + 1 == j

            && line_width < target_width / penalties.short_last_line_fraction as f64

            // The last line can have any size gap, but we do add a

            // penalty if the line is very short (typically because it

            // contains just a single word).

            cost += penalties.short_last_line_penalty as f64;

        // Finally, we discourage hyphens.

        if fragments[j - 1].penalty_width() > 0.0 {

            // TODO: this should use a penalty value from the fragment

            // instead.

            cost += penalties.hyphen_penalty as f64;

        cost

});

    for (_, cost) in &minima {

        if cost.is_infinite() {

            return Err(OverflowError);

    let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));

    let mut pos = fragments.len();

    loop {

        let prev = minima[pos].0;

        lines.push(&fragments[prev..pos]);

        pos = prev;

        if pos == 0 {

            break;

    lines.reverse();

    Ok(lines)

#[cfg(test)]

mod tests {

    use super::*;

    #[derive(Debug, PartialEq)]

    struct Word(f64);

    #[rustfmt::skip]

    impl Fragment for Word {

        fn width(&self) -> f64 { self.0 }

        fn whitespace_width(&self) -> f64 { 1.0 }

        fn penalty_width(&self) -> f64 { 0.0 }

    #[test]

    fn wrap_fragments_with_infinite_widths() {

        let words = vec![Word(f64::INFINITY)];

        assert_eq!(

            wrap_optimal_fit(&words, &[0.0], &Penalties::default()),

            Err(OverflowError)

);

    #[test]

    fn wrap_fragments_with_huge_widths() {

        let words = vec![Word(1e200), Word(1e250), Word(1e300)];

        assert_eq!(

            wrap_optimal_fit(&words, &[1e300], &Penalties::default()),

            Err(OverflowError)

);

    #[test]

    fn wrap_fragments_with_large_widths() {

        // The gaps will be of the sizes between 1e25 and 1e75. This

        // makes the `gap * gap` cost fit comfortably in a f64.

        let words = vec![Word(1e25), Word(1e50), Word(1e75)];

        assert_eq!(

            wrap_optimal_fit(&words, &[1e100], &Penalties::default()),

            Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])

);

Source code

Revision control

Copy as Markdown

Other Tools