DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (7b5b4eed4707)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
 * Token-affiliated data structures except for TokenKind (defined in its own
 * header).
 */

#ifndef frontend_Token_h
#define frontend_Token_h

#include "mozilla/Assertions.h"  // MOZ_ASSERT

#include <stdint.h>  // uint32_t

#include "frontend/TokenKind.h"  // js::frontend::TokenKind
#include "js/RegExpFlags.h"      // JS::RegExpFlags
#include "vm/StringType.h"       // js::PropertyName

namespace js {

namespace frontend {

struct TokenPos {
  uint32_t begin = 0;  // Offset of the token's first code unit.
  uint32_t end = 0;    // Offset of 1 past the token's last code unit.

  TokenPos() = default;
  TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}

  // Return a TokenPos that covers left, right, and anything in between.
  static TokenPos box(const TokenPos& left, const TokenPos& right) {
    MOZ_ASSERT(left.begin <= left.end);
    MOZ_ASSERT(left.end <= right.begin);
    MOZ_ASSERT(right.begin <= right.end);
    return TokenPos(left.begin, right.end);
  }

  bool operator==(const TokenPos& bpos) const {
    return begin == bpos.begin && end == bpos.end;
  }

  bool operator!=(const TokenPos& bpos) const {
    return begin != bpos.begin || end != bpos.end;
  }

  bool operator<(const TokenPos& bpos) const { return begin < bpos.begin; }

  bool operator<=(const TokenPos& bpos) const { return begin <= bpos.begin; }

  bool operator>(const TokenPos& bpos) const { return !(*this <= bpos); }

  bool operator>=(const TokenPos& bpos) const { return !(*this < bpos); }

  bool encloses(const TokenPos& pos) const {
    return begin <= pos.begin && pos.end <= end;
  }
};

enum DecimalPoint { NoDecimal = false, HasDecimal = true };

enum class InvalidEscapeType {
  // No invalid character escapes.
  None,
  // A malformed \x escape.
  Hexadecimal,
  // A malformed \u escape.
  Unicode,
  // An otherwise well-formed \u escape which represents a
  // codepoint > 10FFFF.
  UnicodeOverflow,
  // An octal escape in a template token.
  Octal
};

// The only escapes found in IdentifierName are of the Unicode flavor.
enum class IdentifierEscapes { None, SawUnicodeEscape };

enum class NameVisibility { Public, Private };

class TokenStreamShared;

struct Token {
 private:
  // The lexical grammar of JavaScript has a quirk around the '/' character.
  // As the spec puts it:
  //
  // > There are several situations where the identification of lexical input
  // > elements is sensitive to the syntactic grammar context that is consuming
  // > the input elements. This requires multiple goal symbols for the lexical
  // > grammar. [...] The InputElementRegExp goal symbol is used in all
  // > syntactic grammar contexts where a RegularExpressionLiteral is permitted
  // > [...]  In all other contexts, InputElementDiv is used as the lexical
  // > goal symbol.
  //
  // https://tc39.github.io/ecma262/#sec-lexical-and-regexp-grammars
  //
  // What "sensitive to the syntactic grammar context" means is, the parser has
  // to tell the TokenStream whether to interpret '/' as division or
  // RegExp. Because only one or the other (or neither) will be legal at that
  // point in the program, and only the parser knows which one.
  //
  // But there's a problem: the parser often gets a token, puts it back, then
  // consumes it later; or (equivalently) peeks at a token, leaves it, peeks
  // again later, then finally consumes it. Of course we don't actually re-scan
  // the token every time; we cache it in the TokenStream. This leads to the
  // following rule:
  //
  // The parser must not pass SlashIsRegExp when getting/peeking at a token
  // previously scanned with SlashIsDiv; or vice versa.
  //
  // That way, code that asks for a SlashIsRegExp mode will never get a cached
  // Div token. But this rule is easy to screw up, because tokens are so often
  // peeked at on Parser.cpp line A and consumed on line B, where |A-B| is
  // thousands of lines. We therefore enforce it with the frontend's most
  // annoying assertion (in verifyConsistentModifier), and provide
  // Modifier::SlashIsInvalid to help avoid tripping it.
  //
  // This enum belongs in TokenStream, but C++, so we define it here and
  // typedef it there.
  enum Modifier {
    // Parse `/` and `/=` as the division operators. (That is, use
    // InputElementDiv as the goal symbol.)
    SlashIsDiv,

    // Parse `/` as the beginning of a RegExp literal. (That is, use
    // InputElementRegExp.)
    SlashIsRegExp,

    // Neither a Div token nor a RegExp token is syntactically valid here. When
    // the parser calls `getToken(SlashIsInvalid)`, it must be prepared to see
    // either one (and throw a SyntaxError either way).
    //
    // It's OK to use SlashIsInvalid to get a token that was originally scanned
    // with SlashIsDiv or SlashIsRegExp. The reverse--peeking with
    // SlashIsInvalid, then getting with another mode--is not OK. If either Div
    // or RegExp is syntactically valid here, use the appropriate modifier.
    SlashIsInvalid,
  };
  friend class TokenStreamShared;

 public:
  // WARNING: TokenStreamPosition assumes that the only GC things a Token
  //          includes are atoms.  DON'T ADD NON-ATOM GC THING POINTERS HERE
  //          UNLESS YOU ADD ADDITIONAL ROOTING TO THAT CLASS.

  /** The type of this token. */
  TokenKind type;

  /** The token's position in the overall script. */
  TokenPos pos;

  union {
   private:
    friend struct Token;

    /** Non-numeric atom. */
    PropertyName* name;

    /** Potentially-numeric atom. */
    JSAtom* atom;

    struct {
      /** Numeric literal's value. */
      double value;

      /** Does the numeric literal contain a '.'? */
      DecimalPoint decimalPoint;
    } number;

    /** Regular expression flags; use charBuffer to access source chars. */
    JS::RegExpFlags reflags;
  } u;

#ifdef DEBUG
  /** The modifier used to get this token. */
  Modifier modifier;
#endif

  // Mutators

  void setName(PropertyName* name) {
    MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName);
    u.name = name;
  }

  void setAtom(JSAtom* atom) {
    MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead ||
               type == TokenKind::NoSubsTemplate);
    u.atom = atom;
  }

  void setRegExpFlags(JS::RegExpFlags flags) {
    MOZ_ASSERT(type == TokenKind::RegExp);
    u.reflags = flags;
  }

  void setNumber(double n, DecimalPoint decimalPoint) {
    MOZ_ASSERT(type == TokenKind::Number);
    u.number.value = n;
    u.number.decimalPoint = decimalPoint;
  }

  // Type-safe accessors

  PropertyName* name() const {
    MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName);
    return u.name->JSAtom::asPropertyName();  // poor-man's type verification
  }

  JSAtom* atom() const {
    MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead ||
               type == TokenKind::NoSubsTemplate);
    return u.atom;
  }

  JS::RegExpFlags regExpFlags() const {
    MOZ_ASSERT(type == TokenKind::RegExp);
    return u.reflags;
  }

  double number() const {
    MOZ_ASSERT(type == TokenKind::Number);
    return u.number.value;
  }

  DecimalPoint decimalPoint() const {
    MOZ_ASSERT(type == TokenKind::Number);
    return u.number.decimalPoint;
  }
};

}  // namespace frontend

}  // namespace js

#endif  // frontend_Token_h