Source code

Revision control

Copy as Markdown

Other Tools

use std::cell::RefCell;
use crate::core::Fragment;
/// Penalties for
/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
/// and [`wrap_optimal_fit`].
///
/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
/// entire paragraph to find optimal line breaks. When wrapping text,
/// "penalties" are assigned to line breaks based on the gaps left at
/// the end of lines. The penalties are given by this struct, with
/// [`Penalties::default`] assigning penalties that work well for
/// monospace text.
///
/// If you are wrapping proportional text, you are advised to assign
/// your own penalties according to your font size. See the individual
/// penalties below for details.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Penalties {
/// Per-line penalty. This is added for every line, which makes it
/// expensive to output more lines than the minimum required.
pub nline_penalty: usize,
/// Per-character cost for lines that overflow the target line width.
///
/// With a default value of 50², every single character costs as
/// much as leaving a gap of 50 characters behind. This is because
/// we assign as cost of `gap * gap` to a short line. When
/// wrapping monospace text, we can overflow the line by 1
/// character in extreme cases:
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
///
/// let short = "foo ";
/// let long = "x".repeat(50);
/// let length = (short.len() + long.len()) as f64;
/// let fragments = vec![Word::from(short), Word::from(&long)];
/// let penalties = Penalties::new();
///
/// // Perfect fit, both words are on a single line with no overflow.
/// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
///
/// // The words no longer fit, yet we get a single line back. While
/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
/// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
/// // which makes it cheaper to overflow than to use two lines.
/// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
///
/// // The cost of overflow would be 2 * 2500, whereas the cost of
/// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
/// // 3401`. We therefore get two lines.
/// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
/// assert_eq!(wrapped, vec![&[Word::from(short)],
/// &[Word::from(&long)]]);
/// ```
///
/// This only happens if the overflowing word is 50 characters
/// long _and_ if the word overflows the line by exactly one
/// character. If it overflows by more than one character, the
/// overflow penalty will quickly outgrow the cost of the gap, as
/// seen above.
pub overflow_penalty: usize,
/// When should the a single word on the last line be considered
/// "too short"?
///
/// If the last line of the text consist of a single word and if
/// this word is shorter than `1 / short_last_line_fraction` of
/// the line width, then the final line will be considered "short"
/// and `short_last_line_penalty` is added as an extra penalty.
///
/// The effect of this is to avoid a final line consisting of a
/// single small word. For example, with a
/// `short_last_line_penalty` of 25 (the default), a gap of up to
/// 5 columns will be seen as more desirable than having a final
/// short line.
///
/// ## Examples
///
/// ```
/// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
///
/// let text = "This is a demo of the short last line penalty.";
///
/// // The first-fit algorithm leaves a single short word on the last line:
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
/// vec!["This is a demo of the short last line",
/// "penalty."]);
///
/// #[cfg(feature = "smawk")] {
/// let mut penalties = wrap_algorithms::Penalties::new();
///
/// // Since "penalty." is shorter than 25% of the line width, the
/// // optimal-fit algorithm adds a penalty of 25. This is enough
/// // to move "line " down:
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
/// vec!["This is a demo of the short last",
/// "line penalty."]);
///
/// // We can change the meaning of "short" lines. Here, only words
/// // shorter than 1/10th of the line width will be considered short:
/// penalties.short_last_line_fraction = 10;
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
/// vec!["This is a demo of the short last line",
/// "penalty."]);
///
/// // If desired, the penalty can also be disabled:
/// penalties.short_last_line_fraction = 4;
/// penalties.short_last_line_penalty = 0;
/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
/// vec!["This is a demo of the short last line",
/// "penalty."]);
/// }
/// ```
pub short_last_line_fraction: usize,
/// Penalty for a last line with a single short word.
///
/// Set this to zero if you do not want to penalize short last lines.
pub short_last_line_penalty: usize,
/// Penalty for lines ending with a hyphen.
pub hyphen_penalty: usize,
}
impl Penalties {
/// Default penalties for monospace text.
///
/// The penalties here work well for monospace text. This is
/// because they expect the gaps at the end of lines to be roughly
/// in the range `0..100`. If the gaps are larger, the
/// `overflow_penalty` and `hyphen_penalty` become insignificant.
pub const fn new() -> Self {
Penalties {
nline_penalty: 1000,
overflow_penalty: 50 * 50,
short_last_line_fraction: 4,
short_last_line_penalty: 25,
hyphen_penalty: 25,
}
}
}
impl Default for Penalties {
fn default() -> Self {
Self::new()
}
}
/// Cache for line numbers. This is necessary to avoid a O(n**2)
/// behavior when computing line numbers in [`wrap_optimal_fit`].
struct LineNumbers {
line_numbers: RefCell<Vec<usize>>,
}
impl LineNumbers {
fn new(size: usize) -> Self {
let mut line_numbers = Vec::with_capacity(size);
line_numbers.push(0);
LineNumbers {
line_numbers: RefCell::new(line_numbers),
}
}
fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
while self.line_numbers.borrow_mut().len() < i + 1 {
let pos = self.line_numbers.borrow().len();
let line_number = 1 + self.get(minima[pos].0, minima);
self.line_numbers.borrow_mut().push(line_number);
}
self.line_numbers.borrow()[i]
}
}
/// Overflow error during the [`wrap_optimal_fit`] computation.
#[derive(Debug, PartialEq, Eq)]
pub struct OverflowError;
impl std::fmt::Display for OverflowError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "wrap_optimal_fit cost computation overflowed")
}
}
impl std::error::Error for OverflowError {}
/// Wrap abstract fragments into lines with an optimal-fit algorithm.
///
/// The `line_widths` slice gives the target line width for each line
/// (the last slice element is repeated as necessary). This can be
/// used to implement hanging indentation.
///
/// The fragments must already have been split into the desired
/// widths, this function will not (and cannot) attempt to split them
/// further when arranging them into lines.
///
/// # Optimal-Fit Algorithm
///
/// The algorithm considers all possible break points and picks the
/// breaks which minimizes the gaps at the end of each line. More
/// precisely, the algorithm assigns a cost or penalty to each break
/// point, determined by `cost = gap * gap` where `gap = target_width -
/// line_width`. Shorter lines are thus penalized more heavily since
/// they leave behind a larger gap.
///
/// We can illustrate this with the text “To be, or not to be: that is
/// the question”. We will be wrapping it in a narrow column with room
/// for only 10 characters. The [greedy
/// algorithm](super::wrap_first_fit) will produce these lines, each
/// annotated with the corresponding penalty:
///
/// ```text
/// "To be, or" 1² = 1
/// "not to be:" 0² = 0
/// "that is" 3² = 9
/// "the" 7² = 49
/// "question" 2² = 4
/// ```
///
/// We see that line four with “the” leaves a gap of 7 columns, which
/// gives it a penalty of 49. The sum of the penalties is 63.
///
/// There are 10 words, which means that there are `2_u32.pow(9)` or
/// 512 different ways to typeset it. We can compute
/// the sum of the penalties for each possible line break and search
/// for the one with the lowest sum:
///
/// ```text
/// "To be," 4² = 16
/// "or not to" 1² = 1
/// "be: that" 2² = 4
/// "is the" 4² = 16
/// "question" 2² = 4
/// ```
///
/// The sum of the penalties is 41, which is better than what the
/// greedy algorithm produced.
///
/// Searching through all possible combinations would normally be
/// prohibitively slow. However, it turns out that the problem can be
/// formulated as the task of finding column minima in a cost matrix.
/// This matrix has a special form (totally monotone) which lets us
/// use a [linear-time algorithm called
/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
/// points.
///
/// This means that the time complexity remains O(_n_) where _n_ is
/// the number of words. Compared to
/// [`wrap_first_fit()`](super::wrap_first_fit), this function is
/// about 4 times slower.
///
/// The optimization of per-line costs over the entire paragraph is
/// inspired by the line breaking algorithm used in TeX, as described
/// in the 1981 article [_Breaking Paragraphs into
/// by Knuth and Plass. The implementation here is based on [Python
/// code by David
///
/// # Errors
///
/// In case of an overflow during the cost computation, an `Err` is
/// returned. Overflows happens when fragments or lines have infinite
/// widths (`f64::INFINITY`) or if the widths are so large that the
/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
/// (approximately 1e154):
///
/// ```
/// use textwrap::core::Fragment;
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
///
/// #[derive(Debug, PartialEq)]
/// struct Word(f64);
///
/// impl Fragment for Word {
/// fn width(&self) -> f64 { self.0 }
/// fn whitespace_width(&self) -> f64 { 1.0 }
/// fn penalty_width(&self) -> f64 { 0.0 }
/// }
///
/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
/// // larger than f64::MAX:
/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
/// Err(OverflowError));
/// ```
///
/// When using fragment widths and line widths which fit inside an
/// `u64`, overflows cannot happen. This means that fragments derived
/// from a `&str` cannot cause overflows.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
fragments: &'a [T],
line_widths: &'b [f64],
penalties: &'b Penalties,
) -> Result<Vec<&'a [T]>, OverflowError> {
// The final line width is used for all remaining lines.
let default_line_width = line_widths.last().copied().unwrap_or(0.0);
let mut widths = Vec::with_capacity(fragments.len() + 1);
let mut width = 0.0;
widths.push(width);
for fragment in fragments {
width += fragment.width() + fragment.whitespace_width();
widths.push(width);
}
let line_numbers = LineNumbers::new(fragments.len());
let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
// Line number for fragment `i`.
let line_number = line_numbers.get(i, minima);
let line_width = line_widths
.get(line_number)
.copied()
.unwrap_or(default_line_width);
let target_width = line_width.max(1.0);
// Compute the width of a line spanning fragments[i..j] in
// constant time. We need to adjust widths[j] by subtracting
// the whitespace of fragment[j-1] and then add the penalty.
let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
+ fragments[j - 1].penalty_width();
// We compute cost of the line containing fragments[i..j]. We
// start with values[i].1, which is the optimal cost for
// breaking before fragments[i].
//
// First, every extra line cost NLINE_PENALTY.
let mut cost = minima[i].1 + penalties.nline_penalty as f64;
// Next, we add a penalty depending on the line length.
if line_width > target_width {
// Lines that overflow get a hefty penalty.
let overflow = line_width - target_width;
cost += overflow * penalties.overflow_penalty as f64;
} else if j < fragments.len() {
// Other lines (except for the last line) get a milder
// penalty which depend on the size of the gap.
let gap = target_width - line_width;
cost += gap * gap;
} else if i + 1 == j
&& line_width < target_width / penalties.short_last_line_fraction as f64
{
// The last line can have any size gap, but we do add a
// penalty if the line is very short (typically because it
// contains just a single word).
cost += penalties.short_last_line_penalty as f64;
}
// Finally, we discourage hyphens.
if fragments[j - 1].penalty_width() > 0.0 {
// TODO: this should use a penalty value from the fragment
// instead.
cost += penalties.hyphen_penalty as f64;
}
cost
});
for (_, cost) in &minima {
if cost.is_infinite() {
return Err(OverflowError);
}
}
let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
let mut pos = fragments.len();
loop {
let prev = minima[pos].0;
lines.push(&fragments[prev..pos]);
pos = prev;
if pos == 0 {
break;
}
}
lines.reverse();
Ok(lines)
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug, PartialEq)]
struct Word(f64);
#[rustfmt::skip]
impl Fragment for Word {
fn width(&self) -> f64 { self.0 }
fn whitespace_width(&self) -> f64 { 1.0 }
fn penalty_width(&self) -> f64 { 0.0 }
}
#[test]
fn wrap_fragments_with_infinite_widths() {
let words = vec![Word(f64::INFINITY)];
assert_eq!(
wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
Err(OverflowError)
);
}
#[test]
fn wrap_fragments_with_huge_widths() {
let words = vec![Word(1e200), Word(1e250), Word(1e300)];
assert_eq!(
wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
Err(OverflowError)
);
}
#[test]
fn wrap_fragments_with_large_widths() {
// The gaps will be of the sizes between 1e25 and 1e75. This
// makes the `gap * gap` cost fit comfortably in a f64.
let words = vec![Word(1e25), Word(1e50), Word(1e75)];
assert_eq!(
wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
);
}
}