feat(compiler): record end of expression Token (#33549)
In the past, only the starting index of an expression Token has been recorded, so a parser could demarkate the span of a token only by the start locations of two tokens. This may lead to trailing whitespace being included in the token span: ```html {{ token1 + token2 }} ^^^^^^^^^ recorded span of `token1` ``` It's also not enough for a parser to determine the end of a token by adding the length of the token value to the token's start location, because lexed expression values may not exactly reflect the source code. For example, `"d\\"e"` is lexed as a string token whose value is `d"e`. Instead, this commit adds a `end` field to expression tokens. `end` is one past the last index of the token source code. This will enable a parser to determine the span of a token just by looking at that token. This is a breaking change because the contructor interface of `Token` has changed. Part of #33477. PR Close #33549
This commit is contained in:
parent
8410278b6d
commit
19944c2424
|
@ -35,7 +35,7 @@ export class Lexer {
|
|||
|
||||
export class Token {
|
||||
constructor(
|
||||
public index: number, public type: TokenType, public numValue: number,
|
||||
public index: number, public end: number, public type: TokenType, public numValue: number,
|
||||
public strValue: string) {}
|
||||
|
||||
isCharacter(code: number): boolean {
|
||||
|
@ -91,35 +91,35 @@ export class Token {
|
|||
}
|
||||
}
|
||||
|
||||
function newCharacterToken(index: number, code: number): Token {
|
||||
return new Token(index, TokenType.Character, code, String.fromCharCode(code));
|
||||
function newCharacterToken(index: number, end: number, code: number): Token {
|
||||
return new Token(index, end, TokenType.Character, code, String.fromCharCode(code));
|
||||
}
|
||||
|
||||
function newIdentifierToken(index: number, text: string): Token {
|
||||
return new Token(index, TokenType.Identifier, 0, text);
|
||||
function newIdentifierToken(index: number, end: number, text: string): Token {
|
||||
return new Token(index, end, TokenType.Identifier, 0, text);
|
||||
}
|
||||
|
||||
function newKeywordToken(index: number, text: string): Token {
|
||||
return new Token(index, TokenType.Keyword, 0, text);
|
||||
function newKeywordToken(index: number, end: number, text: string): Token {
|
||||
return new Token(index, end, TokenType.Keyword, 0, text);
|
||||
}
|
||||
|
||||
function newOperatorToken(index: number, text: string): Token {
|
||||
return new Token(index, TokenType.Operator, 0, text);
|
||||
function newOperatorToken(index: number, end: number, text: string): Token {
|
||||
return new Token(index, end, TokenType.Operator, 0, text);
|
||||
}
|
||||
|
||||
function newStringToken(index: number, text: string): Token {
|
||||
return new Token(index, TokenType.String, 0, text);
|
||||
function newStringToken(index: number, end: number, text: string): Token {
|
||||
return new Token(index, end, TokenType.String, 0, text);
|
||||
}
|
||||
|
||||
function newNumberToken(index: number, n: number): Token {
|
||||
return new Token(index, TokenType.Number, n, '');
|
||||
function newNumberToken(index: number, end: number, n: number): Token {
|
||||
return new Token(index, end, TokenType.Number, n, '');
|
||||
}
|
||||
|
||||
function newErrorToken(index: number, message: string): Token {
|
||||
return new Token(index, TokenType.Error, 0, message);
|
||||
function newErrorToken(index: number, end: number, message: string): Token {
|
||||
return new Token(index, end, TokenType.Error, 0, message);
|
||||
}
|
||||
|
||||
export const EOF: Token = new Token(-1, TokenType.Character, 0, '');
|
||||
export const EOF: Token = new Token(-1, -1, TokenType.Character, 0, '');
|
||||
|
||||
class _Scanner {
|
||||
length: number;
|
||||
|
@ -165,7 +165,7 @@ class _Scanner {
|
|||
case chars.$PERIOD:
|
||||
this.advance();
|
||||
return chars.isDigit(this.peek) ? this.scanNumber(start) :
|
||||
newCharacterToken(start, chars.$PERIOD);
|
||||
newCharacterToken(start, this.index, chars.$PERIOD);
|
||||
case chars.$LPAREN:
|
||||
case chars.$RPAREN:
|
||||
case chars.$LBRACE:
|
||||
|
@ -211,13 +211,13 @@ class _Scanner {
|
|||
|
||||
scanCharacter(start: number, code: number): Token {
|
||||
this.advance();
|
||||
return newCharacterToken(start, code);
|
||||
return newCharacterToken(start, this.index, code);
|
||||
}
|
||||
|
||||
|
||||
scanOperator(start: number, str: string): Token {
|
||||
this.advance();
|
||||
return newOperatorToken(start, str);
|
||||
return newOperatorToken(start, this.index, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -243,7 +243,7 @@ class _Scanner {
|
|||
this.advance();
|
||||
str += three;
|
||||
}
|
||||
return newOperatorToken(start, str);
|
||||
return newOperatorToken(start, this.index, str);
|
||||
}
|
||||
|
||||
scanIdentifier(): Token {
|
||||
|
@ -251,8 +251,8 @@ class _Scanner {
|
|||
this.advance();
|
||||
while (isIdentifierPart(this.peek)) this.advance();
|
||||
const str: string = this.input.substring(start, this.index);
|
||||
return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, str) :
|
||||
newIdentifierToken(start, str);
|
||||
return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, this.index, str) :
|
||||
newIdentifierToken(start, this.index, str);
|
||||
}
|
||||
|
||||
scanNumber(start: number): Token {
|
||||
|
@ -275,7 +275,7 @@ class _Scanner {
|
|||
}
|
||||
const str: string = this.input.substring(start, this.index);
|
||||
const value: number = simple ? parseIntAutoRadix(str) : parseFloat(str);
|
||||
return newNumberToken(start, value);
|
||||
return newNumberToken(start, this.index, value);
|
||||
}
|
||||
|
||||
scanString(): Token {
|
||||
|
@ -321,13 +321,14 @@ class _Scanner {
|
|||
const last: string = input.substring(marker, this.index);
|
||||
this.advance(); // Skip terminating quote.
|
||||
|
||||
return newStringToken(start, buffer + last);
|
||||
return newStringToken(start, this.index, buffer + last);
|
||||
}
|
||||
|
||||
error(message: string, offset: number): Token {
|
||||
const position: number = this.index + offset;
|
||||
return newErrorToken(
|
||||
position, `Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
|
||||
position, this.index,
|
||||
`Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,48 +12,49 @@ function lex(text: string): any[] {
|
|||
return new Lexer().tokenize(text);
|
||||
}
|
||||
|
||||
function expectToken(token: any, index: number) {
|
||||
function expectToken(token: any, index: number, end: number) {
|
||||
expect(token instanceof Token).toBe(true);
|
||||
expect(token.index).toEqual(index);
|
||||
expect(token.end).toEqual(end);
|
||||
}
|
||||
|
||||
function expectCharacterToken(token: any, index: number, character: string) {
|
||||
function expectCharacterToken(token: any, index: number, end: number, character: string) {
|
||||
expect(character.length).toBe(1);
|
||||
expectToken(token, index);
|
||||
expectToken(token, index, end);
|
||||
expect(token.isCharacter(character.charCodeAt(0))).toBe(true);
|
||||
}
|
||||
|
||||
function expectOperatorToken(token: any, index: number, operator: string) {
|
||||
expectToken(token, index);
|
||||
function expectOperatorToken(token: any, index: number, end: number, operator: string) {
|
||||
expectToken(token, index, end);
|
||||
expect(token.isOperator(operator)).toBe(true);
|
||||
}
|
||||
|
||||
function expectNumberToken(token: any, index: number, n: number) {
|
||||
expectToken(token, index);
|
||||
function expectNumberToken(token: any, index: number, end: number, n: number) {
|
||||
expectToken(token, index, end);
|
||||
expect(token.isNumber()).toBe(true);
|
||||
expect(token.toNumber()).toEqual(n);
|
||||
}
|
||||
|
||||
function expectStringToken(token: any, index: number, str: string) {
|
||||
expectToken(token, index);
|
||||
function expectStringToken(token: any, index: number, end: number, str: string) {
|
||||
expectToken(token, index, end);
|
||||
expect(token.isString()).toBe(true);
|
||||
expect(token.toString()).toEqual(str);
|
||||
}
|
||||
|
||||
function expectIdentifierToken(token: any, index: number, identifier: string) {
|
||||
expectToken(token, index);
|
||||
function expectIdentifierToken(token: any, index: number, end: number, identifier: string) {
|
||||
expectToken(token, index, end);
|
||||
expect(token.isIdentifier()).toBe(true);
|
||||
expect(token.toString()).toEqual(identifier);
|
||||
}
|
||||
|
||||
function expectKeywordToken(token: any, index: number, keyword: string) {
|
||||
expectToken(token, index);
|
||||
function expectKeywordToken(token: any, index: number, end: number, keyword: string) {
|
||||
expectToken(token, index, end);
|
||||
expect(token.isKeyword()).toBe(true);
|
||||
expect(token.toString()).toEqual(keyword);
|
||||
}
|
||||
|
||||
function expectErrorToken(token: Token, index: any, message: string) {
|
||||
expectToken(token, index);
|
||||
function expectErrorToken(token: Token, index: any, end: number, message: string) {
|
||||
expectToken(token, index, end);
|
||||
expect(token.isError()).toBe(true);
|
||||
expect(token.toString()).toEqual(message);
|
||||
}
|
||||
|
@ -64,88 +65,88 @@ function expectErrorToken(token: Token, index: any, message: string) {
|
|||
it('should tokenize a simple identifier', () => {
|
||||
const tokens: number[] = lex('j');
|
||||
expect(tokens.length).toEqual(1);
|
||||
expectIdentifierToken(tokens[0], 0, 'j');
|
||||
expectIdentifierToken(tokens[0], 0, 1, 'j');
|
||||
});
|
||||
|
||||
it('should tokenize "this"', () => {
|
||||
const tokens: number[] = lex('this');
|
||||
expect(tokens.length).toEqual(1);
|
||||
expectKeywordToken(tokens[0], 0, 'this');
|
||||
expectKeywordToken(tokens[0], 0, 4, 'this');
|
||||
});
|
||||
|
||||
it('should tokenize a dotted identifier', () => {
|
||||
const tokens: number[] = lex('j.k');
|
||||
expect(tokens.length).toEqual(3);
|
||||
expectIdentifierToken(tokens[0], 0, 'j');
|
||||
expectCharacterToken(tokens[1], 1, '.');
|
||||
expectIdentifierToken(tokens[2], 2, 'k');
|
||||
expectIdentifierToken(tokens[0], 0, 1, 'j');
|
||||
expectCharacterToken(tokens[1], 1, 2, '.');
|
||||
expectIdentifierToken(tokens[2], 2, 3, 'k');
|
||||
});
|
||||
|
||||
it('should tokenize an operator', () => {
|
||||
const tokens: number[] = lex('j-k');
|
||||
expect(tokens.length).toEqual(3);
|
||||
expectOperatorToken(tokens[1], 1, '-');
|
||||
expectOperatorToken(tokens[1], 1, 2, '-');
|
||||
});
|
||||
|
||||
it('should tokenize an indexed operator', () => {
|
||||
const tokens: number[] = lex('j[k]');
|
||||
expect(tokens.length).toEqual(4);
|
||||
expectCharacterToken(tokens[1], 1, '[');
|
||||
expectCharacterToken(tokens[3], 3, ']');
|
||||
expectCharacterToken(tokens[1], 1, 2, '[');
|
||||
expectCharacterToken(tokens[3], 3, 4, ']');
|
||||
});
|
||||
|
||||
it('should tokenize numbers', () => {
|
||||
const tokens: number[] = lex('88');
|
||||
expect(tokens.length).toEqual(1);
|
||||
expectNumberToken(tokens[0], 0, 88);
|
||||
expectNumberToken(tokens[0], 0, 2, 88);
|
||||
});
|
||||
|
||||
it('should tokenize numbers within index ops',
|
||||
() => { expectNumberToken(lex('a[22]')[2], 2, 22); });
|
||||
() => { expectNumberToken(lex('a[22]')[2], 2, 4, 22); });
|
||||
|
||||
it('should tokenize simple quoted strings',
|
||||
() => { expectStringToken(lex('"a"')[0], 0, 'a'); });
|
||||
() => { expectStringToken(lex('"a"')[0], 0, 3, 'a'); });
|
||||
|
||||
it('should tokenize quoted strings with escaped quotes',
|
||||
() => { expectStringToken(lex('"a\\""')[0], 0, 'a"'); });
|
||||
() => { expectStringToken(lex('"a\\""')[0], 0, 5, 'a"'); });
|
||||
|
||||
it('should tokenize a string', () => {
|
||||
const tokens: Token[] = lex('j-a.bc[22]+1.3|f:\'a\\\'c\':"d\\"e"');
|
||||
expectIdentifierToken(tokens[0], 0, 'j');
|
||||
expectOperatorToken(tokens[1], 1, '-');
|
||||
expectIdentifierToken(tokens[2], 2, 'a');
|
||||
expectCharacterToken(tokens[3], 3, '.');
|
||||
expectIdentifierToken(tokens[4], 4, 'bc');
|
||||
expectCharacterToken(tokens[5], 6, '[');
|
||||
expectNumberToken(tokens[6], 7, 22);
|
||||
expectCharacterToken(tokens[7], 9, ']');
|
||||
expectOperatorToken(tokens[8], 10, '+');
|
||||
expectNumberToken(tokens[9], 11, 1.3);
|
||||
expectOperatorToken(tokens[10], 14, '|');
|
||||
expectIdentifierToken(tokens[11], 15, 'f');
|
||||
expectCharacterToken(tokens[12], 16, ':');
|
||||
expectStringToken(tokens[13], 17, 'a\'c');
|
||||
expectCharacterToken(tokens[14], 23, ':');
|
||||
expectStringToken(tokens[15], 24, 'd"e');
|
||||
expectIdentifierToken(tokens[0], 0, 1, 'j');
|
||||
expectOperatorToken(tokens[1], 1, 2, '-');
|
||||
expectIdentifierToken(tokens[2], 2, 3, 'a');
|
||||
expectCharacterToken(tokens[3], 3, 4, '.');
|
||||
expectIdentifierToken(tokens[4], 4, 6, 'bc');
|
||||
expectCharacterToken(tokens[5], 6, 7, '[');
|
||||
expectNumberToken(tokens[6], 7, 9, 22);
|
||||
expectCharacterToken(tokens[7], 9, 10, ']');
|
||||
expectOperatorToken(tokens[8], 10, 11, '+');
|
||||
expectNumberToken(tokens[9], 11, 14, 1.3);
|
||||
expectOperatorToken(tokens[10], 14, 15, '|');
|
||||
expectIdentifierToken(tokens[11], 15, 16, 'f');
|
||||
expectCharacterToken(tokens[12], 16, 17, ':');
|
||||
expectStringToken(tokens[13], 17, 23, 'a\'c');
|
||||
expectCharacterToken(tokens[14], 23, 24, ':');
|
||||
expectStringToken(tokens[15], 24, 30, 'd"e');
|
||||
});
|
||||
|
||||
it('should tokenize undefined', () => {
|
||||
const tokens: Token[] = lex('undefined');
|
||||
expectKeywordToken(tokens[0], 0, 'undefined');
|
||||
expectKeywordToken(tokens[0], 0, 9, 'undefined');
|
||||
expect(tokens[0].isKeywordUndefined()).toBe(true);
|
||||
});
|
||||
|
||||
it('should ignore whitespace', () => {
|
||||
const tokens: Token[] = lex('a \t \n \r b');
|
||||
expectIdentifierToken(tokens[0], 0, 'a');
|
||||
expectIdentifierToken(tokens[1], 8, 'b');
|
||||
expectIdentifierToken(tokens[0], 0, 1, 'a');
|
||||
expectIdentifierToken(tokens[1], 8, 9, 'b');
|
||||
});
|
||||
|
||||
it('should tokenize quoted string', () => {
|
||||
const str = '[\'\\\'\', "\\""]';
|
||||
const tokens: Token[] = lex(str);
|
||||
expectStringToken(tokens[1], 1, '\'');
|
||||
expectStringToken(tokens[3], 7, '"');
|
||||
expectStringToken(tokens[1], 1, 5, '\'');
|
||||
expectStringToken(tokens[3], 7, 11, '"');
|
||||
});
|
||||
|
||||
it('should tokenize escaped quoted string', () => {
|
||||
|
@ -163,86 +164,89 @@ function expectErrorToken(token: Token, index: any, message: string) {
|
|||
|
||||
it('should tokenize relation', () => {
|
||||
const tokens: Token[] = lex('! == != < > <= >= === !==');
|
||||
expectOperatorToken(tokens[0], 0, '!');
|
||||
expectOperatorToken(tokens[1], 2, '==');
|
||||
expectOperatorToken(tokens[2], 5, '!=');
|
||||
expectOperatorToken(tokens[3], 8, '<');
|
||||
expectOperatorToken(tokens[4], 10, '>');
|
||||
expectOperatorToken(tokens[5], 12, '<=');
|
||||
expectOperatorToken(tokens[6], 15, '>=');
|
||||
expectOperatorToken(tokens[7], 18, '===');
|
||||
expectOperatorToken(tokens[8], 22, '!==');
|
||||
expectOperatorToken(tokens[0], 0, 1, '!');
|
||||
expectOperatorToken(tokens[1], 2, 4, '==');
|
||||
expectOperatorToken(tokens[2], 5, 7, '!=');
|
||||
expectOperatorToken(tokens[3], 8, 9, '<');
|
||||
expectOperatorToken(tokens[4], 10, 11, '>');
|
||||
expectOperatorToken(tokens[5], 12, 14, '<=');
|
||||
expectOperatorToken(tokens[6], 15, 17, '>=');
|
||||
expectOperatorToken(tokens[7], 18, 21, '===');
|
||||
expectOperatorToken(tokens[8], 22, 25, '!==');
|
||||
});
|
||||
|
||||
it('should tokenize statements', () => {
|
||||
const tokens: Token[] = lex('a;b;');
|
||||
expectIdentifierToken(tokens[0], 0, 'a');
|
||||
expectCharacterToken(tokens[1], 1, ';');
|
||||
expectIdentifierToken(tokens[2], 2, 'b');
|
||||
expectCharacterToken(tokens[3], 3, ';');
|
||||
expectIdentifierToken(tokens[0], 0, 1, 'a');
|
||||
expectCharacterToken(tokens[1], 1, 2, ';');
|
||||
expectIdentifierToken(tokens[2], 2, 3, 'b');
|
||||
expectCharacterToken(tokens[3], 3, 4, ';');
|
||||
});
|
||||
|
||||
it('should tokenize function invocation', () => {
|
||||
const tokens: Token[] = lex('a()');
|
||||
expectIdentifierToken(tokens[0], 0, 'a');
|
||||
expectCharacterToken(tokens[1], 1, '(');
|
||||
expectCharacterToken(tokens[2], 2, ')');
|
||||
expectIdentifierToken(tokens[0], 0, 1, 'a');
|
||||
expectCharacterToken(tokens[1], 1, 2, '(');
|
||||
expectCharacterToken(tokens[2], 2, 3, ')');
|
||||
});
|
||||
|
||||
it('should tokenize simple method invocations', () => {
|
||||
const tokens: Token[] = lex('a.method()');
|
||||
expectIdentifierToken(tokens[2], 2, 'method');
|
||||
expectIdentifierToken(tokens[2], 2, 8, 'method');
|
||||
});
|
||||
|
||||
it('should tokenize method invocation', () => {
|
||||
const tokens: Token[] = lex('a.b.c (d) - e.f()');
|
||||
expectIdentifierToken(tokens[0], 0, 'a');
|
||||
expectCharacterToken(tokens[1], 1, '.');
|
||||
expectIdentifierToken(tokens[2], 2, 'b');
|
||||
expectCharacterToken(tokens[3], 3, '.');
|
||||
expectIdentifierToken(tokens[4], 4, 'c');
|
||||
expectCharacterToken(tokens[5], 6, '(');
|
||||
expectIdentifierToken(tokens[6], 7, 'd');
|
||||
expectCharacterToken(tokens[7], 8, ')');
|
||||
expectOperatorToken(tokens[8], 10, '-');
|
||||
expectIdentifierToken(tokens[9], 12, 'e');
|
||||
expectCharacterToken(tokens[10], 13, '.');
|
||||
expectIdentifierToken(tokens[11], 14, 'f');
|
||||
expectCharacterToken(tokens[12], 15, '(');
|
||||
expectCharacterToken(tokens[13], 16, ')');
|
||||
expectIdentifierToken(tokens[0], 0, 1, 'a');
|
||||
expectCharacterToken(tokens[1], 1, 2, '.');
|
||||
expectIdentifierToken(tokens[2], 2, 3, 'b');
|
||||
expectCharacterToken(tokens[3], 3, 4, '.');
|
||||
expectIdentifierToken(tokens[4], 4, 5, 'c');
|
||||
expectCharacterToken(tokens[5], 6, 7, '(');
|
||||
expectIdentifierToken(tokens[6], 7, 8, 'd');
|
||||
expectCharacterToken(tokens[7], 8, 9, ')');
|
||||
expectOperatorToken(tokens[8], 10, 11, '-');
|
||||
expectIdentifierToken(tokens[9], 12, 13, 'e');
|
||||
expectCharacterToken(tokens[10], 13, 14, '.');
|
||||
expectIdentifierToken(tokens[11], 14, 15, 'f');
|
||||
expectCharacterToken(tokens[12], 15, 16, '(');
|
||||
expectCharacterToken(tokens[13], 16, 17, ')');
|
||||
});
|
||||
|
||||
it('should tokenize number', () => { expectNumberToken(lex('0.5')[0], 0, 0.5); });
|
||||
it('should tokenize number', () => { expectNumberToken(lex('0.5')[0], 0, 3, 0.5); });
|
||||
|
||||
it('should tokenize number with exponent', () => {
|
||||
let tokens: Token[] = lex('0.5E-10');
|
||||
expect(tokens.length).toEqual(1);
|
||||
expectNumberToken(tokens[0], 0, 0.5E-10);
|
||||
expectNumberToken(tokens[0], 0, 7, 0.5E-10);
|
||||
tokens = lex('0.5E+10');
|
||||
expectNumberToken(tokens[0], 0, 0.5E+10);
|
||||
expectNumberToken(tokens[0], 0, 7, 0.5E+10);
|
||||
});
|
||||
|
||||
it('should return exception for invalid exponent', () => {
|
||||
expectErrorToken(
|
||||
lex('0.5E-')[0], 4, 'Lexer Error: Invalid exponent at column 4 in expression [0.5E-]');
|
||||
lex('0.5E-')[0], 4, 5,
|
||||
'Lexer Error: Invalid exponent at column 4 in expression [0.5E-]');
|
||||
|
||||
expectErrorToken(
|
||||
lex('0.5E-A')[0], 4,
|
||||
lex('0.5E-A')[0], 4, 5,
|
||||
'Lexer Error: Invalid exponent at column 4 in expression [0.5E-A]');
|
||||
});
|
||||
|
||||
it('should tokenize number starting with a dot',
|
||||
() => { expectNumberToken(lex('.5')[0], 0, 0.5); });
|
||||
() => { expectNumberToken(lex('.5')[0], 0, 2, 0.5); });
|
||||
|
||||
it('should throw error on invalid unicode', () => {
|
||||
expectErrorToken(
|
||||
lex('\'\\u1\'\'bla\'')[0], 2,
|
||||
lex('\'\\u1\'\'bla\'')[0], 2, 2,
|
||||
'Lexer Error: Invalid unicode escape [\\u1\'\'b] at column 2 in expression [\'\\u1\'\'bla\']');
|
||||
});
|
||||
|
||||
it('should tokenize hash as operator', () => { expectOperatorToken(lex('#')[0], 0, '#'); });
|
||||
it('should tokenize hash as operator',
|
||||
() => { expectOperatorToken(lex('#')[0], 0, 1, '#'); });
|
||||
|
||||
it('should tokenize ?. as operator', () => { expectOperatorToken(lex('?.')[0], 0, '?.'); });
|
||||
it('should tokenize ?. as operator',
|
||||
() => { expectOperatorToken(lex('?.')[0], 0, 2, '?.'); });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue