DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Header

Mercurial (01032a31b2e2)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "Dafsa.h"

#include "mozilla/Assertions.h"
#include "nsAString.h"

const int mozilla::Dafsa::kKeyNotFound = -1;

// Note the DAFSA implementation was lifted from eTLD code in Chromium that was
// originally lifted from Firefox.

// Read next offset from pos.
// Returns true if an offset could be read, false otherwise.
static bool GetNextOffset(const unsigned char** pos, const unsigned char* end,
                          const unsigned char** offset) {
  if (*pos == end) return false;

  // When reading an offset the byte array must always contain at least
  // three more bytes to consume. First the offset to read, then a node
  // to skip over and finally a destination node. No object can be smaller
  // than one byte.
  MOZ_ASSERT(*pos + 2 < end);
  size_t bytes_consumed;
  switch (**pos & 0x60) {
    case 0x60:  // Read three byte offset
      *offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2];
      bytes_consumed = 3;
      break;
    case 0x40:  // Read two byte offset
      *offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1];
      bytes_consumed = 2;
      break;
    default:
      *offset += (*pos)[0] & 0x3F;
      bytes_consumed = 1;
  }
  if ((**pos & 0x80) != 0) {
    *pos = end;
  } else {
    *pos += bytes_consumed;
  }
  return true;
}

// Check if byte at offset is last in label.
static bool IsEOL(const unsigned char* offset, const unsigned char* end) {
  MOZ_ASSERT(offset < end);
  return (*offset & 0x80) != 0;
}

// Check if byte at offset matches first character in key.
// This version matches characters not last in label.
static bool IsMatch(const unsigned char* offset, const unsigned char* end,
                    const char* key) {
  MOZ_ASSERT(offset < end);
  return *offset == *key;
}

// Check if byte at offset matches first character in key.
// This version matches characters last in label.
static bool IsEndCharMatch(const unsigned char* offset,
                           const unsigned char* end, const char* key) {
  MOZ_ASSERT(offset < end);
  return *offset == (*key | 0x80);
}

// Read return value at offset.
// Returns true if a return value could be read, false otherwise.
static bool GetReturnValue(const unsigned char* offset,
                           const unsigned char* end, int* return_value) {
  MOZ_ASSERT(offset < end);
  if ((*offset & 0xE0) == 0x80) {
    *return_value = *offset & 0x0F;
    return true;
  }
  return false;
}

// Lookup a domain key in a byte array generated by make_dafsa.py.
// The rule type is returned if key is found, otherwise kKeyNotFound is
// returned.
static int LookupString(const unsigned char* graph, size_t length,
                        const char* key, size_t key_length) {
  const unsigned char* pos = graph;
  const unsigned char* end = graph + length;
  const unsigned char* offset = pos;
  const char* key_end = key + key_length;
  while (GetNextOffset(&pos, end, &offset)) {
    //   char <char>+ end_char offsets
    //   char <char>+ return value
    //   char end_char offsets
    //   char return value
    //   end_char offsets
    //   return_value
    bool did_consume = false;
    if (key != key_end && !IsEOL(offset, end)) {
      // Leading <char> is not a match. Don't dive into this child
      if (!IsMatch(offset, end, key)) continue;
      did_consume = true;
      ++offset;
      ++key;
      // Possible matches at this point:
      // <char>+ end_char offsets
      // <char>+ return value
      // end_char offsets
      // return value
      // Remove all remaining <char> nodes possible
      while (!IsEOL(offset, end) && key != key_end) {
        if (!IsMatch(offset, end, key)) return mozilla::Dafsa::kKeyNotFound;
        ++key;
        ++offset;
      }
    }
    // Possible matches at this point:
    // end_char offsets
    // return_value
    // If one or more <char> elements were consumed, a failure
    // to match is terminal. Otherwise, try the next node.
    if (key == key_end) {
      int return_value;
      if (GetReturnValue(offset, end, &return_value)) return return_value;
      // The DAFSA guarantees that if the first char is a match, all
      // remaining char elements MUST match if the key is truly present.
      if (did_consume) return mozilla::Dafsa::kKeyNotFound;
      continue;
    }
    if (!IsEndCharMatch(offset, end, key)) {
      if (did_consume) return mozilla::Dafsa::kKeyNotFound;  // Unexpected
      continue;
    }
    ++key;
    pos = ++offset;  // Dive into child
  }
  return mozilla::Dafsa::kKeyNotFound;  // No match
}

namespace mozilla {

int Dafsa::Lookup(const nsACString& aKey) const {
  return LookupString(mData.Elements(), mData.Length(), aKey.BeginReading(),
                      aKey.Length());
}

}  // namespace mozilla