DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (05f4ac2d6dd6)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef INCREMENTAL_TOKENIZER_H__
#define INCREMENTAL_TOKENIZER_H__

#include "mozilla/Tokenizer.h"

#include "nsError.h"
#include <functional>

class nsIInputStream;

namespace mozilla {

class IncrementalTokenizer : public TokenizerBase<char> {
 public:
  /**
   * The consumer callback.  The function is called for every single token
   * as found in the input.  Failure result returned by this callback stops
   * the tokenization immediately and bubbles to result of Feed/FinishInput.
   *
   * Fragment()s of consumed tokens are ensured to remain valid until next call
   * to Feed/FinishInput and are pointing to a single linear buffer.  Hence,
   * those can be safely used to accumulate the data for processing after
   * Feed/FinishInput returned.
   */
  typedef std::function<nsresult(Token const&, IncrementalTokenizer& i)>
      Consumer;

  /**
   * For aWhitespaces and aAdditionalWordChars arguments see TokenizerBase.
   *
   * @param aConsumer
   *    A mandatory non-null argument, a function that consumes the tokens as
   * they come when the tokenizer is fed.
   * @param aRawMinBuffered
   *    When we have buffered at least aRawMinBuffered data, but there was no
   * custom token found so far because of too small incremental feed chunks,
   * deliver the raw data to preserve streaming and to save memory.  This only
   * has effect in OnlyCustomTokenizing mode.
   */
  explicit IncrementalTokenizer(Consumer&& aConsumer,
                                const char* aWhitespaces = nullptr,
                                const char* aAdditionalWordChars = nullptr,
                                uint32_t aRawMinBuffered = 1024);

  /**
   * Pushes the input to be tokenized.  These directly call the Consumer
   * callback on every found token.  Result of the Consumer callback is returned
   * here.
   *
   * The tokenizer must be initialized with a valid consumer prior call to these
   * methods.  It's not allowed to call Feed/FinishInput from inside the
   * Consumer callback.
   */
  nsresult FeedInput(const nsACString& aInput);
  nsresult FeedInput(nsIInputStream* aInput, uint32_t aCount);
  nsresult FinishInput();

  /**
   * Can only be called from inside the consumer callback.
   *
   * When there is still anything to read from the input, tokenize it, store
   * the token type and value to aToken result and shift the cursor past this
   * just parsed token.  Each call to Next() reads another token from
   * the input and shifts the cursor.
   *
   * Returns false if there is not enough data to deterministically recognize
   * tokens or when the last returned token was EOF.
   */
  MOZ_MUST_USE
  bool Next(Token& aToken);

  /**
   * Can only be called from inside the consumer callback.
   *
   * Tells the tokenizer to revert the cursor and stop the async parsing until
   * next feed of the input.  This is useful when more than one token is needed
   * to decide on the syntax but there is not enough input to get a next token
   * (Next() returned false.)
   */
  void NeedMoreInput();

  /**
   * Can only be called from inside the consumer callback.
   *
   * This makes the consumer callback be called again while parsing
   * the input at the previous cursor position again.  This is useful when
   * the tokenizer state (custom tokens, tokenization mode) has changed and
   * we want to re-parse the input again.
   */
  void Rollback();

 private:
  // Loops over the input with TokenizerBase::Parse and calls the Consumer
  // callback.
  nsresult Process();

#ifdef DEBUG
  // True when inside the consumer callback, used only for assertions.
  bool mConsuming;
#endif  // DEBUG
  // Modifyable only from the Consumer callback, tells the parser to break,
  // rollback and wait for more input.
  bool mNeedMoreInput;
  // Modifyable only from the Consumer callback, tells the parser to rollback
  // and parse the input again, with (if modified) new settings of the
  // tokenizer.
  bool mRollback;
  // The input buffer.  Updated with each call to Feed/FinishInput.
  nsCString mInput;
  // Numerical index pointing at the current cursor position.  We don't keep
  // direct reference to the string buffer since the buffer gets often
  // reallocated.
  nsCString::index_type mInputCursor;
  // Refernce to the consumer function.
  Consumer mConsumer;
};

}  // namespace mozilla

#endif