summaryrefslogtreecommitdiffstats
path: root/WebCore/html/HTMLTokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/html/HTMLTokenizer.h')
-rw-r--r--WebCore/html/HTMLTokenizer.h31
1 files changed, 25 insertions, 6 deletions
diff --git a/WebCore/html/HTMLTokenizer.h b/WebCore/html/HTMLTokenizer.h
index c2e52ab..96c2d7c 100644
--- a/WebCore/html/HTMLTokenizer.h
+++ b/WebCore/html/HTMLTokenizer.h
@@ -83,6 +83,10 @@ public:
AfterAttributeValueQuotedState,
SelfClosingStartTagState,
BogusCommentState,
+ // The ContinueBogusCommentState is not in the HTML5 spec, but we use
+ // it internally to keep track of whether we've started the bogus
+ // comment token yet.
+ ContinueBogusCommentState,
MarkupDeclarationOpenState,
CommentStartState,
CommentStartDashState,
@@ -164,7 +168,7 @@ private:
// a number of specific character values are parse errors and should be replaced
// by the replacement character. We suspect this is a problem with the spec as doing
// that filtering breaks surrogate pair handling and causes us not to match Minefield.
- if (m_nextInputCharacter == '\0')
+ if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source))
m_nextInputCharacter = 0xFFFD;
}
return true;
@@ -179,25 +183,40 @@ private:
return peek(source, lineNumber);
}
+ static const UChar endOfFileMarker;
+
private:
+ bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
+ {
+ return source.isClosed() && source.length() == 1;
+ }
+
// http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
UChar m_nextInputCharacter;
bool m_skipNextNewLine;
};
- inline void emitCharacter(UChar);
- inline void emitParseError();
- inline void emitCurrentToken();
- inline void emitCodePoint(unsigned);
+ inline bool processEntity(SegmentedString&);
+
+ inline void parseError();
+ inline void bufferCharacter(UChar);
+ inline void bufferCodePoint(unsigned);
- inline bool processEntity(SegmentedString& source);
+ inline bool emitAndResumeIn(SegmentedString&, State);
+ inline bool emitAndReconsumeIn(SegmentedString&, State);
+ inline bool emitEndOfFile(SegmentedString&);
+ inline bool flushEmitAndResumeIn(SegmentedString&, State);
+ // Return whether we need to emit a character token before dealing with
+ // the buffered end tag.
+ inline bool flushBufferedEndTag(SegmentedString&);
inline bool temporaryBufferIs(const String&);
// Sometimes we speculatively consume input characters and we don't
// know whether they represent end tags or RCDATA, etc. These
// functions help manage these state.
inline void addToPossibleEndTag(UChar cc);
+ inline void saveEndTagNameIfNeeded();
inline bool isAppropriateEndTag();
inline bool shouldEmitBufferedCharacterToken(const SegmentedString&);