DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (5b81998bb7ab)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Parses and evaluates simple statements for Preprocessor:

Expression currently supports the following grammar, whitespace is ignored:

expression :
  and_cond ( '||' expression ) ? ;
and_cond:
  test ( '&&' and_cond ) ? ;
test:
  unary ( ( '==' | '!=' ) unary ) ? ;
unary :
  '!'? value ;
value :
  [0-9]+ # integer
  | 'defined(' \w+ ')'
  | \w+  # string identifier or value;
"""

import re

class Expression:
  def __init__(self, expression_string):
    """
    Create a new expression with this string.
    The expression will already be parsed into an Abstract Syntax Tree.
    """
    self.content = expression_string
    self.offset = 0
    self.__ignore_whitespace()
    self.e = self.__get_logical_or()
    if self.content:
      raise Expression.ParseError, self

  def __get_logical_or(self):
    """
    Production: and_cond ( '||' expression ) ?
    """
    if not len(self.content):
      return None
    rv = Expression.__AST("logical_op")
    # test
    rv.append(self.__get_logical_and())
    self.__ignore_whitespace()
    if self.content[:2] != '||':
      # no logical op needed, short cut to our prime element
      return rv[0]
    # append operator
    rv.append(Expression.__ASTLeaf('op', self.content[:2]))
    self.__strip(2)
    self.__ignore_whitespace()
    rv.append(self.__get_logical_or())
    self.__ignore_whitespace()
    return rv

  def __get_logical_and(self):
    """
    Production: test ( '&&' and_cond ) ?
    """
    if not len(self.content):
      return None
    rv = Expression.__AST("logical_op")
    # test
    rv.append(self.__get_equality())
    self.__ignore_whitespace()
    if self.content[:2] != '&&':
      # no logical op needed, short cut to our prime element
      return rv[0]
    # append operator
    rv.append(Expression.__ASTLeaf('op', self.content[:2]))
    self.__strip(2)
    self.__ignore_whitespace()
    rv.append(self.__get_logical_and())
    self.__ignore_whitespace()
    return rv

  def __get_equality(self):
    """
    Production: unary ( ( '==' | '!=' ) unary ) ?
    """
    if not len(self.content):
      return None
    rv = Expression.__AST("equality")
    # unary 
    rv.append(self.__get_unary())
    self.__ignore_whitespace()
    if not re.match('[=!]=', self.content):
      # no equality needed, short cut to our prime unary
      return rv[0]
    # append operator
    rv.append(Expression.__ASTLeaf('op', self.content[:2]))
    self.__strip(2)
    self.__ignore_whitespace()
    rv.append(self.__get_unary())
    self.__ignore_whitespace()
    return rv

  def __get_unary(self):
    """
    Production: '!'? value
    """
    # eat whitespace right away, too
    not_ws = re.match('!\s*', self.content)
    if not not_ws:
      return self.__get_value()
    rv = Expression.__AST('not')
    self.__strip(not_ws.end())
    rv.append(self.__get_value())
    self.__ignore_whitespace()
    return rv

  def __get_value(self):
    """
    Production: ( [0-9]+ | 'defined(' \w+ ')' | \w+ )
    Note that the order is important, and the expression is kind-of
    ambiguous as \w includes 0-9. One could make it unambiguous by
    removing 0-9 from the first char of a string literal.
    """
    rv = None
    m = re.match('defined\s*\(\s*(\w+)\s*\)', self.content)
    if m:
      word_len = m.end()
      rv = Expression.__ASTLeaf('defined', m.group(1))
    else:
      word_len = re.match('[0-9]*', self.content).end()
      if word_len:
        value = int(self.content[:word_len])
        rv = Expression.__ASTLeaf('int', value)
      else:
        word_len = re.match('\w*', self.content).end()
        if word_len:
          rv = Expression.__ASTLeaf('string', self.content[:word_len])
        else:
          raise Expression.ParseError, self
    self.__strip(word_len)
    self.__ignore_whitespace()
    return rv

  def __ignore_whitespace(self):
    ws_len = re.match('\s*', self.content).end()
    self.__strip(ws_len)
    return

  def __strip(self, length):
    """
    Remove a given amount of chars from the input and update
    the offset.
    """
    self.content = self.content[length:]
    self.offset += length
  
  def evaluate(self, context):
    """
    Evaluate the expression with the given context
    """
    
    # Helper function to evaluate __get_equality results
    def eval_equality(tok):
      left = opmap[tok[0].type](tok[0])
      right = opmap[tok[2].type](tok[2])
      rv = left == right
      if tok[1].value == '!=':
        rv = not rv
      return rv
    # Helper function to evaluate __get_logical_and and __get_logical_or results
    def eval_logical_op(tok):
      left = opmap[tok[0].type](tok[0])
      right = opmap[tok[2].type](tok[2])
      if tok[1].value == '&&':
        return left and right
      elif tok[1].value == '||':
        return left or right
      raise Expression.ParseError, self

    # Mapping from token types to evaluator functions
    # Apart from (non-)equality, all these can be simple lambda forms.
    opmap = {
      'logical_op': eval_logical_op,
      'equality': eval_equality,
      'not': lambda tok: not opmap[tok[0].type](tok[0]),
      'string': lambda tok: context[tok.value],
      'defined': lambda tok: tok.value in context,
      'int': lambda tok: tok.value}

    return opmap[self.e.type](self.e);
  
  class __AST(list):
    """
    Internal class implementing Abstract Syntax Tree nodes
    """
    def __init__(self, type):
      self.type = type
      super(self.__class__, self).__init__(self)
  
  class __ASTLeaf:
    """
    Internal class implementing Abstract Syntax Tree leafs
    """
    def __init__(self, type, value):
      self.value = value
      self.type = type
    def __str__(self):
      return self.value.__str__()
    def __repr__(self):
      return self.value.__repr__()
  
  class ParseError(StandardError):
    """
    Error raised when parsing fails.
    It has two members, offset and content, which give the offset of the
    error and the offending content.
    """
    def __init__(self, expression):
      self.offset = expression.offset
      self.content = expression.content[:3]
    def __str__(self):
      return 'Unexpected content at offset {0}, "{1}"'.format(self.offset, 
                                                              self.content)

class Context(dict):
  """
  This class holds variable values by subclassing dict, and while it
  truthfully reports True and False on
  
  name in context
  
  it returns the variable name itself on
  
  context["name"]

  to reflect the ambiguity between string literals and preprocessor
  variables.
  """
  def __getitem__(self, key):
    if key in self:
      return super(self.__class__, self).__getitem__(key)
    return key