DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (d38398e5144e)

VCS Links

nsHtml5Parser

Macros

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef NS_HTML5_PARSER
#define NS_HTML5_PARSER

#include "nsAutoPtr.h"
#include "nsIParser.h"
#include "nsDeque.h"
#include "nsIURL.h"
#include "nsParserCIID.h"
#include "nsITokenizer.h"
#include "nsIContentSink.h"
#include "nsIRequest.h"
#include "nsIChannel.h"
#include "nsCOMArray.h"
#include "nsContentSink.h"
#include "nsCycleCollectionParticipant.h"
#include "nsIInputStream.h"
#include "nsDetectionConfident.h"
#include "nsHtml5OwningUTF16Buffer.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5StreamParser.h"
#include "nsHtml5AtomTable.h"
#include "nsWeakReference.h"
#include "nsHtml5StreamListener.h"

class nsHtml5Parser final : public nsIParser,
                            public nsSupportsWeakReference
{
  public:
    NS_DECL_CYCLE_COLLECTING_ISUPPORTS

    NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser)

    nsHtml5Parser();

    /* Start nsIParser */
    /**
     * No-op for backwards compat.
     */
    NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;

    /**
     * Returns the tree op executor for backwards compat.
     */
    NS_IMETHOD_(nsIContentSink*) GetContentSink() override;

    /**
     * Always returns "view" for backwards compat.
     */
    NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;

    /**
     * No-op for backwards compat.
     */
    NS_IMETHOD_(void) SetCommand(const char* aCommand) override;

    /**
     * No-op for backwards compat.
     */
    NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;

    /**
     *  Call this method once you've created a parser, and want to instruct it
     *  about what charset to load
     *
     *  @param   aCharset the charset of a document
     *  @param   aCharsetSource the source of the charset
     */
    NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource) override;

    /**
     * Don't call. For interface compat only.
     */
    NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource) override
    {
      NS_NOTREACHED("No one should call this.");
    }

    /**
     * Get the channel associated with this parser
     * @param aChannel out param that will contain the result
     * @return NS_OK if successful or NS_NOT_AVAILABLE if not
     */
    NS_IMETHOD GetChannel(nsIChannel** aChannel) override;

    /**
     * Return |this| for backwards compat.
     */
    NS_IMETHOD GetDTD(nsIDTD** aDTD) override;

    /**
     * Get the stream parser for this parser
     */
    virtual nsIStreamListener* GetStreamListener() override;

    /**
     * Don't call. For interface compat only.
     */
    NS_IMETHOD ContinueInterruptedParsing() override;

    /**
     * Blocks the parser.
     */
    NS_IMETHOD_(void) BlockParser() override;

    /**
     * Unblocks the parser.
     */
    NS_IMETHOD_(void) UnblockParser() override;

    /**
     * Asynchronously continues parsing.
     */
    NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;

    /**
     * Query whether the parser is enabled (i.e. not blocked) or not.
     */
    NS_IMETHOD_(bool) IsParserEnabled() override;

    /**
     * Query whether the parser thinks it's done with parsing.
     */
    NS_IMETHOD_(bool) IsComplete() override;

    /**
     * Set up request observer.
     *
     * @param   aURL used for View Source title
     * @param   aListener a listener to forward notifications to
     * @param   aKey the root context key (used for document.write)
     * @param   aMode ignored (for interface compat only)
     */
    NS_IMETHOD Parse(nsIURI* aURL,
                     nsIRequestObserver* aListener = nullptr,
                     void* aKey = 0,
                     nsDTDMode aMode = eDTDMode_autodetect) override;

    /**
     * document.write and document.close
     *
     * @param   aSourceBuffer the argument of document.write (empty for .close())
     * @param   aKey a key unique to the script element that caused this call
     * @param   aContentType "text/html" for HTML mode, else text/plain mode
     * @param   aLastCall true if .close() false if .write()
     * @param   aMode ignored (for interface compat only)
     */
    nsresult Parse(const nsAString& aSourceBuffer,
                   void* aKey,
                   const nsACString& aContentType,
                   bool aLastCall,
                   nsDTDMode aMode = eDTDMode_autodetect);

    /**
     * Stops the parser prematurely
     */
    NS_IMETHOD Terminate() override;

    /**
     * Don't call. For interface backwards compat only.
     */
    NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
                             nsTArray<nsString>& aTagStack) override;

    /**
     * Don't call. For interface compat only.
     */
    NS_IMETHOD BuildModel() override;

    /**
     * Don't call. For interface compat only.
     */
    NS_IMETHOD CancelParsingEvents() override;

    /**
     * Don't call. For interface compat only.
     */
    virtual void Reset() override;

    /**
     * True if the insertion point (per HTML5) is defined.
     */
    virtual bool IsInsertionPointDefined() override;

    /**
     * Call immediately before starting to evaluate a parser-inserted script or
     * in general when the spec says to define an insertion point.
     */
    virtual void PushDefinedInsertionPoint() override;

    /**
     * Call immediately after having evaluated a parser-inserted script or
     * generally want to restore to the state before the last
     * PushDefinedInsertionPoint call.
     */
    virtual void PopDefinedInsertionPoint() override;

    /**
     * Marks the HTML5 parser as not a script-created parser: Prepares the 
     * parser to be able to read a stream.
     *
     * @param aCommand the parser command (Yeah, this is bad API design. Let's
     * make this better when retiring nsIParser)
     */
    virtual void MarkAsNotScriptCreated(const char* aCommand) override;

    /**
     * True if this is a script-created HTML5 parser.
     */
    virtual bool IsScriptCreated() override;

    /* End nsIParser  */

    // Not from an external interface
    // Non-inherited methods

  public:

    /**
     * Initializes the parser to load from a channel.
     */
    virtual nsresult Initialize(nsIDocument* aDoc,
                        nsIURI* aURI,
                        nsISupports* aContainer,
                        nsIChannel* aChannel);

    inline nsHtml5Tokenizer* GetTokenizer() {
      return mTokenizer;
    }

    void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState, int32_t aLine);

    void DropStreamParser()
    {
      if (GetStreamParser()) {
        GetStreamParser()->DropTimer();
        mStreamListener->DropDelegate();
        mStreamListener = nullptr;
      }
    }
    
    void StartTokenizer(bool aScriptingEnabled);
    
    void ContinueAfterFailedCharsetSwitch();

    nsHtml5StreamParser* GetStreamParser()
    {
      if (!mStreamListener) {
        return nullptr;
      }
      return mStreamListener->GetDelegate();
    }

    /**
     * Parse until pending data is exhausted or a script blocks the parser
     */
    nsresult ParseUntilBlocked();

  private:

    virtual ~nsHtml5Parser();

    // State variables

    /**
     * Whether the last character tokenized was a carriage return (for CRLF)
     */
    bool                          mLastWasCR;

    /**
     * Whether the last character tokenized was a carriage return (for CRLF)
     * when preparsing document.write.
     */
    bool                          mDocWriteSpeculativeLastWasCR;

    /**
     * The parser is blocking on a script
     */
    bool                          mBlocked;

    /**
     * Whether the document.write() speculator is already active.
     */
    bool                          mDocWriteSpeculatorActive;
    
    /**
     * The number of PushDefinedInsertionPoint calls we've seen without a
     * matching PopDefinedInsertionPoint.
     */
    int32_t                       mInsertionPointPushLevel;

    /**
     * True if document.close() has been called.
     */
    bool                          mDocumentClosed;

    bool                          mInDocumentWrite;

    // Portable parser objects
    /**
     * The first buffer in the pending UTF-16 buffer queue
     */
    RefPtr<nsHtml5OwningUTF16Buffer>  mFirstBuffer;

    /**
     * The last buffer in the pending UTF-16 buffer queue. Always points
     * to a sentinel object with nullptr as its parser key.
     */
    nsHtml5OwningUTF16Buffer* mLastBuffer; // weak ref;

    /**
     * The tree operation executor
     */
    RefPtr<nsHtml5TreeOpExecutor>     mExecutor;

    /**
     * The HTML5 tree builder
     */
    const nsAutoPtr<nsHtml5TreeBuilder> mTreeBuilder;

    /**
     * The HTML5 tokenizer
     */
    const nsAutoPtr<nsHtml5Tokenizer>   mTokenizer;

    /**
     * Another HTML5 tree builder for preloading document.written content.
     */
    nsAutoPtr<nsHtml5TreeBuilder> mDocWriteSpeculativeTreeBuilder;

    /**
     * Another HTML5 tokenizer for preloading document.written content.
     */
    nsAutoPtr<nsHtml5Tokenizer>   mDocWriteSpeculativeTokenizer;

    /**
     * The stream listener holding the stream parser.
     */
    RefPtr<nsHtml5StreamListener>     mStreamListener;

    /**
     *
     */
    int32_t                             mRootContextLineNumber;
    
    /**
     * Whether it's OK to transfer parsing back to the stream parser
     */
    bool                                mReturnToStreamParserPermitted;

    /**
     * The scoped atom table
     */
    nsHtml5AtomTable                    mAtomTable;

};
#endif