remapper.rs - mozsearch

use alloc::vec::Vec;

use crate::{nfa::noncontiguous, util::primitives::StateID};

/// Remappable is a tightly coupled abstraction that facilitates remapping

/// state identifiers in DFAs.

///

/// The main idea behind remapping state IDs is that DFAs often need to check

/// if a certain state is a "special" state of some kind (like a match state)

/// during a search. Since this is extremely perf critical code, we want this

/// check to be as fast as possible. Partitioning state IDs into, for example,

/// into "non-match" and "match" states means one can tell if a state is a

/// match state via a simple comparison of the state ID.

///

/// The issue is that during the DFA construction process, it's not

/// particularly easy to partition the states. Instead, the simplest thing is

/// to often just do a pass over all of the states and shuffle them into their

/// desired partitionings. To do that, we need a mechanism for swapping states.

/// Hence, this abstraction.

///

/// Normally, for such little code, I would just duplicate it. But this is a

/// key optimization and the implementation is a bit subtle. So the abstraction

/// is basically a ham-fisted attempt at DRY. The only place we use this is in

/// the dense and one-pass DFAs.

///

/// See also src/dfa/special.rs for a more detailed explanation of how dense

/// DFAs are partitioned.

pub(crate) trait Remappable: core::fmt::Debug {

    /// Return the total number of states.

    fn state_len(&self) -> usize;

    /// Swap the states pointed to by the given IDs. The underlying finite

    /// state machine should be mutated such that all of the transitions in

    /// `id1` are now in the memory region where the transitions for `id2`

    /// were, and all of the transitions in `id2` are now in the memory region

    /// where the transitions for `id1` were.

///

    /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.

///

    /// It is expected that, after calling this, the underlying state machine

    /// will be left in an inconsistent state, since any other transitions

    /// pointing to, e.g., `id1` need to be updated to point to `id2`, since

    /// that's where `id1` moved to.

///

    /// In order to "fix" the underlying inconsistent state, a `Remapper`

    /// should be used to guarantee that `remap` is called at the appropriate

    /// time.

    fn swap_states(&mut self, id1: StateID, id2: StateID);

    /// This must remap every single state ID in the underlying value according

    /// to the function given. For example, in a DFA, this should remap every

    /// transition and every starting state ID.

    fn remap(&mut self, map: impl Fn(StateID) -> StateID);

/// Remapper is an abstraction the manages the remapping of state IDs in a

/// finite state machine. This is useful when one wants to shuffle states into

/// different positions in the machine.

///

/// One of the key complexities this manages is the ability to correctly move

/// one state multiple times.

///

/// Once shuffling is complete, `remap` must be called, which will rewrite

/// all pertinent transitions to updated state IDs. Neglecting to call `remap`

/// will almost certainly result in a corrupt machine.

#[derive(Debug)]

pub(crate) struct Remapper {

    /// A map from the index of a state to its pre-multiplied identifier.

///

    /// When a state is swapped with another, then their corresponding

    /// locations in this map are also swapped. Thus, its new position will

    /// still point to its old pre-multiplied StateID.

///

    /// While there is a bit more to it, this then allows us to rewrite the

    /// state IDs in a DFA's transition table in a single pass. This is done

    /// by iterating over every ID in this map, then iterating over each

    /// transition for the state at that ID and re-mapping the transition from

    /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position

    /// in this map where `old_id` *started*, and set it to where it ended up

    /// after all swaps have been completed.

    map: Vec<StateID>,

    /// A way to map indices to state IDs (and back).

    idx: IndexMapper,

impl Remapper {

    /// Create a new remapper from the given remappable implementation. The

    /// remapper can then be used to swap states. The remappable value given

    /// here must the same one given to `swap` and `remap`.

///

    /// The given stride should be the stride of the transition table expressed

    /// as a power of 2. This stride is used to map between state IDs and state

    /// indices. If state IDs and state indices are equivalent, then provide

    /// a `stride2` of `0`, which acts as an identity.

    pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper {

        let idx = IndexMapper { stride2 };

        let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect();

        Remapper { map, idx }

    /// Swap two states. Once this is called, callers must follow through to

    /// call `remap`, or else it's possible for the underlying remappable

    /// value to be in a corrupt state.

    pub(crate) fn swap(

        &mut self,

        r: &mut impl Remappable,

        id1: StateID,

        id2: StateID,

) {

        if id1 == id2 {

            return;

        r.swap_states(id1, id2);

        self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2));

    /// Complete the remapping process by rewriting all state IDs in the

    /// remappable value according to the swaps performed.

    pub(crate) fn remap(mut self, r: &mut impl Remappable) {

        // Update the map to account for states that have been swapped

        // multiple times. For example, if (A, C) and (C, G) are swapped, then

        // transitions previously pointing to A should now point to G. But if

        // we don't update our map, they will erroneously be set to C. All we

        // do is follow the swaps in our map until we see our original state

        // ID.

//

        // The intuition here is to think about how changes are made to the

        // map: only through pairwise swaps. That means that starting at any

        // given state, it is always possible to find the loop back to that

        // state by following the swaps represented in the map (which might be

        // 0 swaps).

//

        // We are also careful to clone the map before starting in order to

        // freeze it. We use the frozen map to find our loops, since we need to

        // update our map as well. Without freezing it, our updates could break

        // the loops referenced above and produce incorrect results.

        let oldmap = self.map.clone();

        for i in 0..r.state_len() {

            let cur_id = self.idx.to_state_id(i);

            let mut new_id = oldmap[i];

            if cur_id == new_id {

                continue;

            loop {

                let id = oldmap[self.idx.to_index(new_id)];

                if cur_id == id {

                    self.map[i] = new_id;

                    break;

                new_id = id;

        r.remap(|sid| self.map[self.idx.to_index(sid)]);

/// A simple type for mapping between state indices and state IDs.

///

/// The reason why this exists is because state IDs are "premultiplied" in a

/// DFA. That is, in order to get to the transitions for a particular state,

/// one need only use the state ID as-is, instead of having to multiply it by

/// transition table's stride.

///

/// The downside of this is that it's inconvenient to map between state IDs

/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like

/// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`,

/// etc.

///

/// Since our state IDs are premultiplied, we can convert back-and-forth

/// between IDs and indices by simply unmultiplying the IDs and multiplying the

/// indices.

///

/// Note that for a sparse NFA, state IDs and indices are equivalent. In this

/// case, we set the stride of the index mapped to be `0`, which acts as an

/// identity.

#[derive(Debug)]

struct IndexMapper {

    /// The power of 2 corresponding to the stride of the corresponding

    /// transition table. 'id >> stride2' de-multiplies an ID while 'index <<

    /// stride2' pre-multiplies an index to an ID.

    stride2: usize,

impl IndexMapper {

    /// Convert a state ID to a state index.

    fn to_index(&self, id: StateID) -> usize {

        id.as_usize() >> self.stride2

    /// Convert a state index to a state ID.

    fn to_state_id(&self, index: usize) -> StateID {

        // CORRECTNESS: If the given index is not valid, then it is not

        // required for this to panic or return a valid state ID. We'll "just"

        // wind up with panics or silent logic errors at some other point. But

        // this is OK because if Remappable::state_len is correct and so is

        // 'to_index', then all inputs to 'to_state_id' should be valid indices

        // and thus transform into valid state IDs.

        StateID::new_unchecked(index << self.stride2)

impl Remappable for noncontiguous::NFA {

    fn state_len(&self) -> usize {

        noncontiguous::NFA::states(self).len()

    fn swap_states(&mut self, id1: StateID, id2: StateID) {

        noncontiguous::NFA::swap_states(self, id1, id2)

    fn remap(&mut self, map: impl Fn(StateID) -> StateID) {

        noncontiguous::NFA::remap(self, map)

Source code

Revision control

Copy as Markdown

Other Tools