Source code

Revision control

Copy as Markdown

Other Tools

//! Functions for wrapping text.
use std::borrow::Cow;
use crate::core::{break_words, display_width, Word};
use crate::word_splitters::split_words;
use crate::Options;
/// Wrap a line of text at a given width.
///
/// The result is a vector of lines, each line is of type [`Cow<'_,
/// str>`](Cow), which means that the line will borrow from the input
/// `&str` if possible. The lines do not have trailing whitespace,
/// including a final `'\n'`. Please use [`fill()`](crate::fill()) if
/// you need a [`String`] instead.
///
/// The easiest way to use this function is to pass an integer for
/// `width_or_options`:
///
/// ```
/// use textwrap::wrap;
///
/// let lines = wrap("Memory safety without garbage collection.", 15);
/// assert_eq!(lines, &[
/// "Memory safety",
/// "without garbage",
/// "collection.",
/// ]);
/// ```
///
/// If you need to customize the wrapping, you can pass an [`Options`]
/// instead of an `usize`:
///
/// ```
/// use textwrap::{wrap, Options};
///
/// let options = Options::new(15)
/// .initial_indent("- ")
/// .subsequent_indent(" ");
/// let lines = wrap("Memory safety without garbage collection.", &options);
/// assert_eq!(lines, &[
/// "- Memory safety",
/// " without",
/// " garbage",
/// " collection.",
/// ]);
/// ```
///
/// # Optimal-Fit Wrapping
///
/// By default, `wrap` will try to ensure an even right margin by
/// finding breaks which avoid short lines. We call this an
/// “optimal-fit algorithm” since the line breaks are computed by
/// considering all possible line breaks. The alternative is a
/// “first-fit algorithm” which simply accumulates words until they no
/// longer fit on the line.
///
/// As an example, using the first-fit algorithm to wrap the famous
/// Hamlet quote “To be, or not to be: that is the question” in a
/// narrow column with room for only 10 characters looks like this:
///
/// ```
/// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap};
/// #
/// # let lines = wrap("To be, or not to be: that is the question",
/// # Options::new(10).wrap_algorithm(FirstFit));
/// # assert_eq!(lines.join("\n") + "\n", "\
/// To be, or
/// not to be:
/// that is
/// the
/// question
/// # ");
/// ```
///
/// Notice how the second to last line is quite narrow because
/// “question” was too large to fit? The greedy first-fit algorithm
/// doesn’t look ahead, so it has no other option than to put
/// “question” onto its own line.
///
/// With the optimal-fit wrapping algorithm, the previous lines are
/// shortened slightly in order to make the word “is” go into the
/// second last line:
///
/// ```
/// # #[cfg(feature = "smawk")] {
/// # use textwrap::{Options, WrapAlgorithm, wrap};
/// #
/// # let lines = wrap(
/// # "To be, or not to be: that is the question",
/// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit())
/// # );
/// # assert_eq!(lines.join("\n") + "\n", "\
/// To be,
/// or not to
/// be: that
/// is the
/// question
/// # "); }
/// ```
///
/// Please see [`WrapAlgorithm`](crate::WrapAlgorithm) for details on
/// the choices.
///
/// # Examples
///
/// The returned iterator yields lines of type `Cow<'_, str>`. If
/// possible, the wrapped lines will borrow from the input string. As
/// an example, a hanging indentation, the first line can borrow from
/// the input, but the subsequent lines become owned strings:
///
/// ```
/// use std::borrow::Cow::{Borrowed, Owned};
/// use textwrap::{wrap, Options};
///
/// let options = Options::new(15).subsequent_indent("....");
/// let lines = wrap("Wrapping text all day long.", &options);
/// let annotated = lines
/// .iter()
/// .map(|line| match line {
/// Borrowed(text) => format!("[Borrowed] {}", text),
/// Owned(text) => format!("[Owned] {}", text),
/// })
/// .collect::<Vec<_>>();
/// assert_eq!(
/// annotated,
/// &[
/// "[Borrowed] Wrapping text",
/// "[Owned] ....all day",
/// "[Owned] ....long.",
/// ]
/// );
/// ```
///
/// ## Leading and Trailing Whitespace
///
/// As a rule, leading whitespace (indentation) is preserved and
/// trailing whitespace is discarded.
///
/// In more details, when wrapping words into lines, words are found
/// by splitting the input text on space characters. One or more
/// spaces (shown here as “␣”) are attached to the end of each word:
///
/// ```text
/// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"]
/// ```
///
/// These words are then put into lines. The interword whitespace is
/// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"`
/// word falls at the end of a line:
///
/// ```
/// use textwrap::wrap;
///
/// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]);
/// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]);
/// ```
///
/// Notice how the trailing whitespace is removed in both case: in the
/// first example, `"bar␣"` becomes `"bar"` and in the second case
/// `"Foo␣␣␣"` becomes `"Foo"`.
///
/// Leading whitespace is preserved when the following word fits on
/// the first line. To understand this, consider how words are found
/// in a text with leading spaces:
///
/// ```text
/// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"]
/// ```
///
/// When put into lines, the indentation is preserved if `"foo"` fits
/// on the first line, otherwise you end up with an empty line:
///
/// ```
/// use textwrap::wrap;
///
/// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]);
/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]);
/// ```
pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
where
Opt: Into<Options<'a>>,
{
let options: Options = width_or_options.into();
let line_ending_str = options.line_ending.as_str();
let mut lines = Vec::new();
for line in text.split(line_ending_str) {
wrap_single_line(line, &options, &mut lines);
}
lines
}
pub(crate) fn wrap_single_line<'a>(
line: &'a str,
options: &Options<'_>,
lines: &mut Vec<Cow<'a, str>>,
) {
let indent = if lines.is_empty() {
options.initial_indent
} else {
options.subsequent_indent
};
if line.len() < options.width && indent.is_empty() {
lines.push(Cow::from(line.trim_end_matches(' ')));
} else {
wrap_single_line_slow_path(line, options, lines)
}
}
/// Wrap a single line of text.
///
/// This is taken when `line` is longer than `options.width`.
pub(crate) fn wrap_single_line_slow_path<'a>(
line: &'a str,
options: &Options<'_>,
lines: &mut Vec<Cow<'a, str>>,
) {
let initial_width = options
.width
.saturating_sub(display_width(options.initial_indent));
let subsequent_width = options
.width
.saturating_sub(display_width(options.subsequent_indent));
let line_widths = [initial_width, subsequent_width];
let words = options.word_separator.find_words(line);
let split_words = split_words(words, &options.word_splitter);
let broken_words = if options.break_words {
let mut broken_words = break_words(split_words, line_widths[1]);
if !options.initial_indent.is_empty() {
// Without this, the first word will always go into the
// first line. However, since we break words based on the
// _second_ line width, it can be wrong to unconditionally
// put the first word onto the first line. An empty
// zero-width word fixed this.
broken_words.insert(0, Word::from(""));
}
broken_words
} else {
split_words.collect::<Vec<_>>()
};
let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths);
let mut idx = 0;
for words in wrapped_words {
let last_word = match words.last() {
None => {
lines.push(Cow::from(""));
continue;
}
Some(word) => word,
};
// We assume here that all words are contiguous in `line`.
// That is, the sum of their lengths should add up to the
// length of `line`.
let len = words
.iter()
.map(|word| word.len() + word.whitespace.len())
.sum::<usize>()
- last_word.whitespace.len();
// The result is owned if we have indentation, otherwise we
// can simply borrow an empty string.
let mut result = if lines.is_empty() && !options.initial_indent.is_empty() {
Cow::Owned(options.initial_indent.to_owned())
} else if !lines.is_empty() && !options.subsequent_indent.is_empty() {
Cow::Owned(options.subsequent_indent.to_owned())
} else {
// We can use an empty string here since string
// concatenation for `Cow` preserves a borrowed value when
// either side is empty.
Cow::from("")
};
result += &line[idx..idx + len];
if !last_word.penalty.is_empty() {
result.to_mut().push_str(last_word.penalty);
}
lines.push(result);
// Advance by the length of `result`, plus the length of
// `last_word.whitespace` -- even if we had a penalty, we need
// to skip over the whitespace.
idx += len + last_word.whitespace.len();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{WordSeparator, WordSplitter, WrapAlgorithm};
#[cfg(feature = "hyphenation")]
use hyphenation::{Language, Load, Standard};
#[test]
fn no_wrap() {
assert_eq!(wrap("foo", 10), vec!["foo"]);
}
#[test]
fn wrap_simple() {
assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
}
#[test]
fn to_be_or_not() {
assert_eq!(
wrap(
"To be, or not to be, that is the question.",
Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit)
),
vec!["To be, or", "not to be,", "that is", "the", "question."]
);
}
#[test]
fn multiple_words_on_first_line() {
assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
}
#[test]
fn long_word() {
assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]);
}
#[test]
fn long_words() {
assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]);
}
#[test]
fn max_width() {
assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]);
let text = "Hello there! This is some English text. \
It should not be wrapped given the extents below.";
assert_eq!(wrap(text, usize::MAX), vec![text]);
}
#[test]
fn leading_whitespace() {
assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
}
#[test]
fn leading_whitespace_empty_first_line() {
// If there is no space for the first word, the first line
// will be empty. This is because the string is split into
// words like [" ", "foobar ", "baz"], which puts "foobar " on
// the second line. We never output trailing whitespace
assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]);
}
#[test]
fn trailing_whitespace() {
// Whitespace is only significant inside a line. After a line
// gets too long and is broken, the first word starts in
// column zero and is not indented.
assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]);
}
#[test]
fn issue_99() {
// We did not reset the in_whitespace flag correctly and did
// not handle single-character words after a line break.
assert_eq!(
wrap("aaabbbccc x yyyzzzwww", 9),
vec!["aaabbbccc", "x", "yyyzzzwww"]
);
}
#[test]
fn issue_129() {
// The dash is an em-dash which takes up four bytes. We used
// to panic since we tried to index into the character.
let options = Options::new(1).word_separator(WordSeparator::AsciiSpace);
assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]);
}
#[test]
fn wide_character_handling() {
assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
assert_eq!(
wrap(
"Hello, World!",
Options::new(15).word_separator(WordSeparator::AsciiSpace)
),
vec!["Hello,", "World!"]
);
// Wide characters are allowed to break if the
// unicode-linebreak feature is enabled.
#[cfg(feature = "unicode-linebreak")]
assert_eq!(
wrap(
"Hello, World!",
Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties),
),
vec!["Hello, W", "orld!"]
);
}
#[test]
fn indent_empty_line() {
// Previously, indentation was not applied to empty lines.
// However, this is somewhat inconsistent and undesirable if
// the indentation is something like a border ("| ") which you
// want to apply to all lines, empty or not.
let options = Options::new(10).initial_indent("!!!");
assert_eq!(wrap("", &options), vec!["!!!"]);
}
#[test]
fn indent_single_line() {
let options = Options::new(10).initial_indent(">>>"); // No trailing space
assert_eq!(wrap("foo", &options), vec![">>>foo"]);
}
#[test]
fn indent_first_emoji() {
let options = Options::new(10).initial_indent("👉👉");
assert_eq!(
wrap("x x x x x x x x x x x x x", &options),
vec!["👉👉x x x", "x x x x x", "x x x x x"]
);
}
#[test]
fn indent_multiple_lines() {
let options = Options::new(6).initial_indent("* ").subsequent_indent(" ");
assert_eq!(
wrap("foo bar baz", &options),
vec!["* foo", " bar", " baz"]
);
}
#[test]
fn only_initial_indent_multiple_lines() {
let options = Options::new(10).initial_indent(" ");
assert_eq!(wrap("foo\nbar\nbaz", &options), vec![" foo", "bar", "baz"]);
}
#[test]
fn only_subsequent_indent_multiple_lines() {
let options = Options::new(10).subsequent_indent(" ");
assert_eq!(
wrap("foo\nbar\nbaz", &options),
vec!["foo", " bar", " baz"]
);
}
#[test]
fn indent_break_words() {
let options = Options::new(5).initial_indent("* ").subsequent_indent(" ");
assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]);
}
#[test]
fn initial_indent_break_words() {
// This is a corner-case showing how the long word is broken
// according to the width of the subsequent lines. The first
// fragment of the word no longer fits on the first line,
// which ends up being pure indentation.
let options = Options::new(5).initial_indent("-->");
assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]);
}
#[test]
fn hyphens() {
assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
}
#[test]
fn trailing_hyphen() {
let options = Options::new(5).break_words(false);
assert_eq!(wrap("foobar-", &options), vec!["foobar-"]);
}
#[test]
fn multiple_hyphens() {
assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
}
#[test]
fn hyphens_flag() {
let options = Options::new(5).break_words(false);
assert_eq!(
wrap("The --foo-bar flag.", &options),
vec!["The", "--foo-", "bar", "flag."]
);
}
#[test]
fn repeated_hyphens() {
let options = Options::new(4).break_words(false);
assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]);
}
#[test]
fn hyphens_alphanumeric() {
assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
}
#[test]
fn hyphens_non_alphanumeric() {
let options = Options::new(5).break_words(false);
assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]);
}
#[test]
fn multiple_splits() {
assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
}
#[test]
fn forced_split() {
let options = Options::new(5).break_words(false);
assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]);
}
#[test]
fn multiple_unbroken_words_issue_193() {
let options = Options::new(3).break_words(false);
assert_eq!(
wrap("small large tiny", &options),
vec!["small", "large", "tiny"]
);
assert_eq!(
wrap("small large tiny", &options),
vec!["small", "large", "tiny"]
);
}
#[test]
fn very_narrow_lines_issue_193() {
let options = Options::new(1).break_words(false);
assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
}
#[test]
fn simple_hyphens() {
let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter);
assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
}
#[test]
fn no_hyphenation() {
let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
}
#[test]
#[cfg(feature = "hyphenation")]
fn auto_hyphenation_double_hyphenation() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
let options = Options::new(10);
assert_eq!(
wrap("Internationalization", &options),
vec!["Internatio", "nalization"]
);
let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("Internationalization", &options),
vec!["Interna-", "tionaliza-", "tion"]
);
}
#[test]
#[cfg(feature = "hyphenation")]
fn auto_hyphenation_issue_158() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
let options = Options::new(10);
assert_eq!(
wrap("participation is the key to success", &options),
vec!["participat", "ion is", "the key to", "success"]
);
let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("participation is the key to success", &options),
vec!["partici-", "pation is", "the key to", "success"]
);
}
#[test]
#[cfg(feature = "hyphenation")]
fn split_len_hyphenation() {
// Test that hyphenation takes the width of the whitespace
// into account.
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("garbage collection", &options),
vec!["garbage col-", "lection"]
);
}
#[test]
#[cfg(feature = "hyphenation")]
fn borrowed_lines() {
// Lines that end with an extra hyphen are owned, the final
// line is borrowed.
use std::borrow::Cow::{Borrowed, Owned};
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
let lines = wrap("Internationalization", &options);
assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]);
if let Borrowed(s) = lines[0] {
assert!(false, "should not have been borrowed: {:?}", s);
}
if let Borrowed(s) = lines[1] {
assert!(false, "should not have been borrowed: {:?}", s);
}
if let Owned(ref s) = lines[2] {
assert!(false, "should not have been owned: {:?}", s);
}
}
#[test]
#[cfg(feature = "hyphenation")]
fn auto_hyphenation_with_hyphen() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
let options = Options::new(8).break_words(false);
assert_eq!(
wrap("over-caffinated", &options),
vec!["over-", "caffinated"]
);
let options = options.word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("over-caffinated", &options),
vec!["over-", "caffi-", "nated"]
);
}
#[test]
fn break_words() {
assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
}
#[test]
fn break_words_wide_characters() {
// Even the poor man's version of `ch_width` counts these
// characters as wide.
let options = Options::new(5).word_separator(WordSeparator::AsciiSpace);
assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]);
}
#[test]
fn break_words_zero_width() {
assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]);
}
#[test]
fn break_long_first_word() {
assert_eq!(wrap("testx y", 4), vec!["test", "x y"]);
}
#[test]
fn wrap_preserves_line_breaks_trims_whitespace() {
assert_eq!(wrap(" ", 80), vec![""]);
assert_eq!(wrap(" \n ", 80), vec!["", ""]);
assert_eq!(wrap(" \n \n \n ", 80), vec!["", "", "", ""]);
}
#[test]
fn wrap_colored_text() {
// The words are much longer than 6 bytes, but they remain
// intact after filling the text.
let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m";
let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m";
assert_eq!(
wrap(&format!("{} {}", green_hello, blue_world), 6),
vec![green_hello, blue_world],
);
}
}