Source code

Revision control

Copy as Markdown

Other Tools

//! Word wrapping algorithms.
//!
//! After a text has been broken into words (or [`Fragment`]s), one
//! now has to decide how to break the fragments into lines. The
//! simplest algorithm for this is implemented by
//! [`wrap_first_fit()`]: it uses no look-ahead and simply adds
//! fragments to the line as long as they fit. However, this can lead
//! to poor line breaks if a large fragment almost-but-not-quite fits
//! on a line. When that happens, the fragment is moved to the next
//! line and it will leave behind a large gap.
//!
//! A more advanced algorithm, implemented by [`wrap_optimal_fit()`],
//! will take this into account. The optimal-fit algorithm considers
//! all possible line breaks and will attempt to minimize the gaps
//! left behind by overly short lines.
//!
//! While both algorithms run in linear time, the first-fit algorithm
//! is about 4 times faster than the optimal-fit algorithm.
#[cfg(feature = "smawk")]
mod optimal_fit;
#[cfg(feature = "smawk")]
pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
use crate::core::{Fragment, Word};
/// Describes how to wrap words into lines.
///
/// The simplest approach is to wrap words one word at a time and
/// accept the first way of wrapping which fit
/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
/// enabled, a more complex algorithm is available which will look at
/// an entire paragraph at a time in order to find optimal line breaks
/// ([`WrapAlgorithm::OptimalFit`]).
#[derive(Clone, Copy)]
pub enum WrapAlgorithm {
/// Wrap words using a fast and simple algorithm.
///
/// This algorithm uses no look-ahead when finding line breaks.
/// Implemented by [`wrap_first_fit()`], please see that function
/// for details and examples.
FirstFit,
/// Wrap words using an advanced algorithm with look-ahead.
///
/// This wrapping algorithm considers the entire paragraph to find
/// optimal line breaks. When wrapping text, "penalties" are
/// assigned to line breaks based on the gaps left at the end of
/// lines. See [`Penalties`] for details.
///
/// The underlying wrapping algorithm is implemented by
/// [`wrap_optimal_fit()`], please see that function for examples.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
#[cfg(feature = "smawk")]
OptimalFit(Penalties),
/// Custom wrapping function.
///
/// Use this if you want to implement your own wrapping algorithm.
/// The function can freely decide how to turn a slice of
/// [`Word`]s into lines.
///
/// # Example
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::{wrap, Options, WrapAlgorithm};
///
/// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
/// let mut lines = Vec::new();
/// let mut step = 1;
/// let mut start_idx = 0;
/// while start_idx + step <= words.len() {
/// lines.push(&words[start_idx .. start_idx+step]);
/// start_idx += step;
/// step += 1;
/// }
/// lines
/// }
///
/// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
/// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
/// vec!["First,",
/// "second, third,",
/// "fourth, fifth, sixth"]);
/// ```
Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
}
impl PartialEq for WrapAlgorithm {
/// Compare two wrap algorithms.
///
/// ```
/// use textwrap::WrapAlgorithm;
///
/// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit);
/// #[cfg(feature = "smawk")] {
/// assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit());
/// }
/// ```
///
/// Note that `WrapAlgorithm::Custom` values never compare equal:
///
/// ```
/// use textwrap::WrapAlgorithm;
///
/// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]),
/// WrapAlgorithm::Custom(|words, line_widths| vec![words]));
/// ```
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true,
#[cfg(feature = "smawk")]
(WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b,
(_, _) => false,
}
}
}
impl std::fmt::Debug for WrapAlgorithm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
#[cfg(feature = "smawk")]
WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
}
}
}
impl WrapAlgorithm {
/// Create new wrap algorithm.
///
/// The best wrapping algorithm is used by default, i.e.,
/// [`WrapAlgorithm::OptimalFit`] if available, otherwise
/// [`WrapAlgorithm::FirstFit`].
pub const fn new() -> Self {
#[cfg(not(feature = "smawk"))]
{
WrapAlgorithm::FirstFit
}
#[cfg(feature = "smawk")]
{
WrapAlgorithm::new_optimal_fit()
}
}
/// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
/// works well for monospace text.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
#[cfg(feature = "smawk")]
pub const fn new_optimal_fit() -> Self {
WrapAlgorithm::OptimalFit(Penalties::new())
}
/// Wrap words according to line widths.
///
/// The `line_widths` slice gives the target line width for each
/// line (the last slice element is repeated as necessary). This
/// can be used to implement hanging indentation.
#[inline]
pub fn wrap<'a, 'b>(
&self,
words: &'b [Word<'a>],
line_widths: &'b [usize],
) -> Vec<&'b [Word<'a>]> {
// Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
// = 9_007_199_254_740_992 can be represented without loss by
// a f64. Larger line widths will be rounded to the nearest
// representable number.
let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
match self {
WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
#[cfg(feature = "smawk")]
WrapAlgorithm::OptimalFit(penalties) => {
// The computation cannot overflow when the line
// widths are restricted to usize.
wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
}
WrapAlgorithm::Custom(func) => func(words, line_widths),
}
}
}
impl Default for WrapAlgorithm {
fn default() -> Self {
WrapAlgorithm::new()
}
}
/// Wrap abstract fragments into lines with a first-fit algorithm.
///
/// The `line_widths` slice gives the target line width for each line
/// (the last slice element is repeated as necessary). This can be
/// used to implement hanging indentation.
///
/// The fragments must already have been split into the desired
/// widths, this function will not (and cannot) attempt to split them
/// further when arranging them into lines.
///
/// # First-Fit Algorithm
///
/// This implements a simple “greedy” algorithm: accumulate fragments
/// one by one and when a fragment no longer fits, start a new line.
/// There is no look-ahead, we simply take first fit of the fragments
/// we find.
///
/// While fast and predictable, this algorithm can produce poor line
/// breaks when a long fragment is moved to a new line, leaving behind
/// a large gap:
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::wrap_algorithms::wrap_first_fit;
/// use textwrap::WordSeparator;
///
/// // Helper to convert wrapped lines to a Vec<String>.
/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
/// lines.iter().map(|line| {
/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
/// }).collect::<Vec<_>>()
/// }
///
/// let text = "These few words will unfortunately not wrap nicely.";
/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
/// vec!["These few words",
/// "will", // <-- short line
/// "unfortunately",
/// "not wrap",
/// "nicely."]);
///
/// // We can avoid the short line if we look ahead:
/// #[cfg(feature = "smawk")]
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
/// #[cfg(feature = "smawk")]
/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
/// vec!["These few",
/// "words will",
/// "unfortunately",
/// "not wrap",
/// "nicely."]);
/// ```
///
/// The [`wrap_optimal_fit()`] function was used above to get better
/// line breaks. It uses an advanced algorithm which tries to avoid
/// short lines. This function is about 4 times faster than
/// [`wrap_optimal_fit()`].
///
/// # Examples
///
/// Imagine you're building a house site and you have a number of
/// tasks you need to execute. Things like pour foundation, complete
/// framing, install plumbing, electric cabling, install insulation.
///
/// The construction workers can only work during daytime, so they
/// need to pack up everything at night. Because they need to secure
/// their tools and move machines back to the garage, this process
/// takes much more time than the time it would take them to simply
/// switch to another task.
///
/// You would like to make a list of tasks to execute every day based
/// on your estimates. You can model this with a program like this:
///
/// ```
/// use textwrap::core::{Fragment, Word};
/// use textwrap::wrap_algorithms::wrap_first_fit;
///
/// #[derive(Debug)]
/// struct Task<'a> {
/// name: &'a str,
/// hours: f64, // Time needed to complete task.
/// sweep: f64, // Time needed for a quick sweep after task during the day.
/// cleanup: f64, // Time needed for full cleanup if day ends with this task.
/// }
///
/// impl Fragment for Task<'_> {
/// fn width(&self) -> f64 { self.hours }
/// fn whitespace_width(&self) -> f64 { self.sweep }
/// fn penalty_width(&self) -> f64 { self.cleanup }
/// }
///
/// // The morning tasks
/// let tasks = vec![
/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 },
/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 },
/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// ];
///
/// // Fill tasks into days, taking `day_length` into account. The
/// // output shows the hours worked per day along with the names of
/// // the tasks for that day.
/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
/// let mut days = Vec::new();
/// // Assign tasks to days. The assignment is a vector of slices,
/// // with a slice per day.
/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
/// for day in assigned_days.iter() {
/// let last = day.last().unwrap();
/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
/// days.push((work_hours - last.sweep + last.cleanup, names));
/// }
/// days
/// }
///
/// // With a single crew working 8 hours a day:
/// assert_eq!(
/// assign_days(&tasks, 8.0),
/// [
/// (7.0, vec!["Foundation"]),
/// (8.0, vec!["Framing", "Plumbing"]),
/// (7.0, vec!["Electrical", "Insulation"]),
/// (5.0, vec!["Drywall"]),
/// (7.0, vec!["Floors", "Countertops"]),
/// (4.0, vec!["Bathrooms"]),
/// ]
/// );
///
/// // With two crews working in shifts, 16 hours a day:
/// assert_eq!(
/// assign_days(&tasks, 16.0),
/// [
/// (14.0, vec!["Foundation", "Framing", "Plumbing"]),
/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
/// (6.0, vec!["Countertops", "Bathrooms"]),
/// ]
/// );
/// ```
///
/// Apologies to anyone who actually knows how to build a house and
/// knows how long each step takes :-)
pub fn wrap_first_fit<'a, T: Fragment>(
fragments: &'a [T],
line_widths: &[f64],
) -> Vec<&'a [T]> {
// The final line width is used for all remaining lines.
let default_line_width = line_widths.last().copied().unwrap_or(0.0);
let mut lines = Vec::new();
let mut start = 0;
let mut width = 0.0;
for (idx, fragment) in fragments.iter().enumerate() {
let line_width = line_widths
.get(lines.len())
.copied()
.unwrap_or(default_line_width);
if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
lines.push(&fragments[start..idx]);
start = idx;
width = 0.0;
}
width += fragment.width() + fragment.whitespace_width();
}
lines.push(&fragments[start..]);
lines
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug, PartialEq)]
struct Word(f64);
#[rustfmt::skip]
impl Fragment for Word {
fn width(&self) -> f64 { self.0 }
fn whitespace_width(&self) -> f64 { 1.0 }
fn penalty_width(&self) -> f64 { 0.0 }
}
#[test]
fn wrap_string_longer_than_f64() {
let words = vec![
Word(1e307),
Word(2e307),
Word(3e307),
Word(4e307),
Word(5e307),
Word(6e307),
];
// Wrap at just under f64::MAX (~19e307). The tiny
// whitespace_widths disappear because of loss of precision.
assert_eq!(
wrap_first_fit(&words, &[15e307]),
&[
vec![
Word(1e307),
Word(2e307),
Word(3e307),
Word(4e307),
Word(5e307)
],
vec![Word(6e307)]
]
);
}
}