feat(HtmlLexer): allow "<" in text tokens

fixes #5550
This commit is contained in:
Victor Berchet 2015-12-06 13:11:00 -08:00
parent 3a438615c3
commit aecf68117a
2 changed files with 85 additions and 36 deletions

View File

@ -6,6 +6,7 @@ import {
CONST_EXPR,
serializeEnum
} from 'angular2/src/facade/lang';
import {ListWrapper} from 'angular2/src/facade/collection';
import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';
@ -161,7 +162,7 @@ class _HtmlTokenizer {
}
this._beginToken(HtmlTokenType.EOF);
this._endToken([]);
return new HtmlTokenizeResult(this.tokens, this.errors);
return new HtmlTokenizeResult(mergeTextTokens(this.tokens), this.errors);
}
private _getLocation(): ParseLocation {
@ -374,21 +375,37 @@ class _HtmlTokenizer {
}
private _consumeTagOpen(start: ParseLocation) {
this._attemptUntilFn(isNotWhitespace);
var nameStart = this.index;
this._consumeTagOpenStart(start);
var lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
this._attemptUntilFn(isNotWhitespace);
while (this.peek !== $SLASH && this.peek !== $GT) {
this._consumeAttributeName();
let savedPos = this._savePosition();
let lowercaseTagName;
try {
this._attemptUntilFn(isNotWhitespace);
if (this._attemptChar($EQ)) {
var nameStart = this.index;
this._consumeTagOpenStart(start);
lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
this._attemptUntilFn(isNotWhitespace);
while (this.peek !== $SLASH && this.peek !== $GT) {
this._consumeAttributeName();
this._attemptUntilFn(isNotWhitespace);
if (this._attemptChar($EQ)) {
this._attemptUntilFn(isNotWhitespace);
this._consumeAttributeValue();
}
this._attemptUntilFn(isNotWhitespace);
this._consumeAttributeValue();
}
this._attemptUntilFn(isNotWhitespace);
this._consumeTagOpenEnd();
} catch (e) {
if (e instanceof ControlFlowError) {
// When the start tag is invalid, assume we want a "<"
this._restorePosition(savedPos);
// Back to back text tokens are merged at the end
this._beginToken(HtmlTokenType.TEXT, start);
this._endToken(['<']);
return;
}
throw e;
}
this._consumeTagOpenEnd();
var contentTokenType = getHtmlTagDefinition(lowercaseTagName).contentType;
if (contentTokenType === HtmlTagContentType.RAW_TEXT) {
this._consumeRawTextWithTagClose(lowercaseTagName, false);
@ -470,13 +487,20 @@ class _HtmlTokenizer {
this._endToken([this._processCarriageReturns(parts.join(''))]);
}
private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; }
private _savePosition(): number[] {
return [this.peek, this.index, this.column, this.line, this.tokens.length];
}
private _restorePosition(position: number[]): void {
this.peek = position[0];
this.index = position[1];
this.column = position[2];
this.line = position[3];
let nbTokens = position[4];
if (nbTokens < this.tokens.length) {
// remove any extra tokens
this.tokens = ListWrapper.slice(this.tokens, 0, nbTokens);
}
}
}
@ -516,3 +540,21 @@ function isAsciiLetter(code: number): boolean {
function isAsciiHexDigit(code: number): boolean {
return code >= $a && code <= $f || code >= $0 && code <= $9;
}
function mergeTextTokens(srcTokens: HtmlToken[]): HtmlToken[] {
let dstTokens = [];
let lastDstToken: HtmlToken;
for (let i = 0; i < srcTokens.length; i++) {
let token = srcTokens[i];
if (isPresent(lastDstToken) && lastDstToken.type == HtmlTokenType.TEXT &&
token.type == HtmlTokenType.TEXT) {
lastDstToken.parts[0] += token.parts[0];
lastDstToken.sourceSpan.end = token.sourceSpan.end;
} else {
lastDstToken = token;
dstTokens.push(lastDstToken);
}
}
return dstTokens;
}

View File

@ -192,15 +192,6 @@ export function main() {
]);
});
it('should report missing name after <', () => {
expect(tokenizeAndHumanizeErrors('<'))
.toEqual([[HtmlTokenType.TAG_OPEN_START, 'Unexpected character "EOF"', '0:1']]);
});
it('should report missing >', () => {
expect(tokenizeAndHumanizeErrors('<name'))
.toEqual([[HtmlTokenType.TAG_OPEN_START, 'Unexpected character "EOF"', '0:5']]);
});
});
describe('attributes', () => {
@ -335,20 +326,6 @@ export function main() {
]);
});
it('should report missing value after =', () => {
expect(tokenizeAndHumanizeErrors('<name a='))
.toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:8']]);
});
it('should report missing end quote for \'', () => {
expect(tokenizeAndHumanizeErrors('<name a=\''))
.toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:9']]);
});
it('should report missing end quote for "', () => {
expect(tokenizeAndHumanizeErrors('<name a="'))
.toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:9']]);
});
});
describe('closing tags', () => {
@ -448,6 +425,36 @@ export function main() {
expect(tokenizeAndHumanizeSourceSpans('a'))
.toEqual([[HtmlTokenType.TEXT, 'a'], [HtmlTokenType.EOF, '']]);
});
it('should allow "<" in text nodes', () => {
expect(tokenizeAndHumanizeParts('{{ a < b ? c : d }}'))
.toEqual([[HtmlTokenType.TEXT, '{{ a < b ? c : d }}'], [HtmlTokenType.EOF]]);
expect(tokenizeAndHumanizeSourceSpans('<p>a<b</p>'))
.toEqual([
[HtmlTokenType.TAG_OPEN_START, '<p'],
[HtmlTokenType.TAG_OPEN_END, '>'],
[HtmlTokenType.TEXT, 'a<b'],
[HtmlTokenType.TAG_CLOSE, '</p>'],
[HtmlTokenType.EOF, ''],
]);
});
// TODO(vicb): make the lexer aware of Angular expressions
// see https://github.com/angular/angular/issues/5679
it('should parse valid start tag in interpolation', () => {
expect(tokenizeAndHumanizeParts('{{ a <b && c > d }}'))
.toEqual([
[HtmlTokenType.TEXT, '{{ a '],
[HtmlTokenType.TAG_OPEN_START, null, 'b'],
[HtmlTokenType.ATTR_NAME, null, '&&'],
[HtmlTokenType.ATTR_NAME, null, 'c'],
[HtmlTokenType.TAG_OPEN_END],
[HtmlTokenType.TEXT, ' d }}'],
[HtmlTokenType.EOF]
]);
});
});
describe('raw text', () => {