feat(compiler): support tokenizing escaped strings (#28055)
In order to support source mapping of templates, we need to be able to tokenize the template in its original context. When the template is defined inline as a JavaScript string in a TS/JS source file, the tokenizer must be able to handle string escape sequences, such as `\n` and `\"` as they appear in the original source file. This commit teaches the lexer how to unescape these sequences, but only when the `escapedString` option is set to true. Otherwise there is no change to the tokenizing behaviour. PR Close #28055
This commit is contained in:
parent
eeb560ac88
commit
2424184d42
|
@ -7,6 +7,7 @@
|
|||
*/
|
||||
|
||||
export const $EOF = 0;
|
||||
export const $BSPACE = 8;
|
||||
export const $TAB = 9;
|
||||
export const $LF = 10;
|
||||
export const $VTAB = 11;
|
||||
|
@ -36,6 +37,7 @@ export const $GT = 62;
|
|||
export const $QUESTION = 63;
|
||||
|
||||
export const $0 = 48;
|
||||
export const $7 = 55;
|
||||
export const $9 = 57;
|
||||
|
||||
export const $A = 65;
|
||||
|
@ -51,6 +53,7 @@ export const $CARET = 94;
|
|||
export const $_ = 95;
|
||||
|
||||
export const $a = 97;
|
||||
export const $b = 98;
|
||||
export const $e = 101;
|
||||
export const $f = 102;
|
||||
export const $n = 110;
|
||||
|
@ -87,3 +90,11 @@ export function isAsciiLetter(code: number): boolean {
|
|||
export function isAsciiHexDigit(code: number): boolean {
|
||||
return code >= $a && code <= $f || code >= $A && code <= $F || isDigit(code);
|
||||
}
|
||||
|
||||
export function isNewLine(code: number): boolean {
|
||||
return code === $LF || code === $CR;
|
||||
}
|
||||
|
||||
export function isOctalDigit(code: number): boolean {
|
||||
return $0 <= code && code <= $7;
|
||||
}
|
||||
|
|
|
@ -70,6 +70,30 @@ export interface TokenizeOptions {
|
|||
* The entire `source` string is parsed if this is not provided.
|
||||
* */
|
||||
range?: LexerRange;
|
||||
/**
|
||||
* If this text is stored in a JavaScript string, then we have to deal with escape sequences.
|
||||
*
|
||||
* **Example 1:**
|
||||
*
|
||||
* ```
|
||||
* "abc\"def\nghi"
|
||||
* ```
|
||||
*
|
||||
* - The `\"` must be converted to `"`.
|
||||
* - The `\n` must be converted to a new line character in a token,
|
||||
* but it should not increment the current line for source mapping.
|
||||
*
|
||||
* **Example 2:**
|
||||
*
|
||||
* ```
|
||||
* "abc\
|
||||
* def"
|
||||
* ```
|
||||
*
|
||||
* The line continuation (`\` followed by a newline) should be removed from a token
|
||||
* but the new line should increment the current line for source mapping.
|
||||
*/
|
||||
escapedString?: boolean;
|
||||
}
|
||||
|
||||
export function tokenize(
|
||||
|
@ -99,6 +123,7 @@ class _Tokenizer {
|
|||
private _end: number;
|
||||
private _tokenizeIcu: boolean;
|
||||
private _interpolationConfig: InterpolationConfig;
|
||||
private _escapedString: boolean;
|
||||
private _peek: number = -1;
|
||||
private _nextPeek: number = -1;
|
||||
private _index: number;
|
||||
|
@ -123,6 +148,7 @@ class _Tokenizer {
|
|||
options: TokenizeOptions) {
|
||||
this._tokenizeIcu = options.tokenizeExpansionForms || false;
|
||||
this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
|
||||
this._escapedString = options.escapedString || false;
|
||||
this._input = _file.content;
|
||||
if (options.range) {
|
||||
this._end = options.range.endPos;
|
||||
|
@ -266,10 +292,13 @@ class _Tokenizer {
|
|||
if (this._index >= this._end) {
|
||||
throw this._createError(_unexpectedCharacterErrorMsg(chars.$EOF), this._getSpan());
|
||||
}
|
||||
if (this._peek === chars.$LF) {
|
||||
// The actual character in the input might be different to the _peek if we are processing
|
||||
// escape characters. We only want to track "real" new lines.
|
||||
const actualChar = this._input.charCodeAt(this._index);
|
||||
if (actualChar === chars.$LF) {
|
||||
this._line++;
|
||||
this._column = 0;
|
||||
} else if (this._peek !== chars.$LF && this._peek !== chars.$CR) {
|
||||
} else if (!chars.isNewLine(actualChar)) {
|
||||
this._column++;
|
||||
}
|
||||
this._index++;
|
||||
|
@ -284,6 +313,22 @@ class _Tokenizer {
|
|||
this._peek = this._index >= this._end ? chars.$EOF : this._input.charCodeAt(this._index);
|
||||
this._nextPeek =
|
||||
this._index + 1 >= this._end ? chars.$EOF : this._input.charCodeAt(this._index + 1);
|
||||
if (this._peek === chars.$BACKSLASH && processingEscapeSequence !== true &&
|
||||
this._escapedString) {
|
||||
this._processEscapeSequence();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance the specific number of characters.
|
||||
* @param count The number of characters to advance.
|
||||
* @param processingEscapeSequence Whether we want `advance()` to process escape sequences.
|
||||
*/
|
||||
private _advanceN(count: number, processingEscapeSequence?: boolean) {
|
||||
while (count) {
|
||||
this._advance(processingEscapeSequence);
|
||||
count--;
|
||||
}
|
||||
}
|
||||
|
||||
private _attemptCharCode(charCode: number): boolean {
|
||||
|
@ -368,9 +413,11 @@ class _Tokenizer {
|
|||
if (decodeEntities && this._peek === chars.$AMPERSAND) {
|
||||
return this._decodeEntity();
|
||||
} else {
|
||||
const index = this._index;
|
||||
// Don't rely upon reading directly from `_input` as the actual char value
|
||||
// may have been generated from an escape sequence.
|
||||
const char = String.fromCodePoint(this._peek);
|
||||
this._advance();
|
||||
return this._input[index];
|
||||
return char;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -410,6 +457,122 @@ class _Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the escape sequence that starts at the current position in the text.
|
||||
*
|
||||
* This method is called from `_advance()` to ensure that escape sequences are
|
||||
* always processed correctly however tokens are being consumed.
|
||||
*
|
||||
* But note that this method also calls `_advance()` (re-entering) to move through
|
||||
* the characters within an escape sequence. In that case it tells `_advance()` not
|
||||
* to attempt to process further escape sequences by passing `true` as its first
|
||||
* argument.
|
||||
*/
|
||||
private _processEscapeSequence(): void {
|
||||
this._advance(true); // advance past the backslash
|
||||
|
||||
// First check for standard control char sequences
|
||||
if (this._peekChar() === chars.$n) {
|
||||
this._peek = chars.$LF;
|
||||
} else if (this._peekChar() === chars.$r) {
|
||||
this._peek = chars.$CR;
|
||||
} else if (this._peekChar() === chars.$v) {
|
||||
this._peek = chars.$VTAB;
|
||||
} else if (this._peekChar() === chars.$t) {
|
||||
this._peek = chars.$TAB;
|
||||
} else if (this._peekChar() === chars.$b) {
|
||||
this._peek = chars.$BSPACE;
|
||||
} else if (this._peekChar() === chars.$f) {
|
||||
this._peek = chars.$FF;
|
||||
}
|
||||
|
||||
// Now consider more complex sequences
|
||||
|
||||
else if (this._peekChar() === chars.$u) {
|
||||
// Unicode code-point sequence
|
||||
this._advance(true); // advance past the `u` char
|
||||
if (this._peekChar() === chars.$LBRACE) {
|
||||
// Variable length Unicode, e.g. `\x{123}`
|
||||
this._advance(true); // advance past the `{` char
|
||||
// Advance past the variable number of hex digits until we hit a `}` char
|
||||
const start = this._getLocation();
|
||||
while (this._peekChar() !== chars.$RBRACE) {
|
||||
this._advance(true);
|
||||
}
|
||||
this._decodeHexDigits(start, this._index - start.offset);
|
||||
} else {
|
||||
// Fixed length Unicode, e.g. `\u1234`
|
||||
this._parseFixedHexSequence(4);
|
||||
}
|
||||
}
|
||||
|
||||
else if (this._peekChar() === chars.$x) {
|
||||
// Hex char code, e.g. `\x2F`
|
||||
this._advance(true); // advance past the `x` char
|
||||
this._parseFixedHexSequence(2);
|
||||
}
|
||||
|
||||
else if (chars.isOctalDigit(this._peekChar())) {
|
||||
// Octal char code, e.g. `\012`,
|
||||
const start = this._index;
|
||||
let length = 1;
|
||||
// Note that we work with `_nextPeek` because, although we check the next character
|
||||
// after the sequence to find the end of the sequence,
|
||||
// we do not want to advance that far to check the character, otherwise we will
|
||||
// have to back up.
|
||||
while (chars.isOctalDigit(this._nextPeek) && length < 3) {
|
||||
this._advance(true);
|
||||
length++;
|
||||
}
|
||||
const octal = this._input.substr(start, length);
|
||||
this._peek = parseInt(octal, 8);
|
||||
}
|
||||
|
||||
else if (chars.isNewLine(this._peekChar())) {
|
||||
// Line continuation `\` followed by a new line
|
||||
this._advance(true); // advance over the newline
|
||||
}
|
||||
|
||||
// If none of the `if` blocks were executed then we just have an escaped normal character.
|
||||
// In that case we just, effectively, skip the backslash from the character.
|
||||
}
|
||||
|
||||
private _parseFixedHexSequence(length: number) {
|
||||
const start = this._getLocation();
|
||||
this._advanceN(length - 1, true);
|
||||
this._decodeHexDigits(start, length);
|
||||
}
|
||||
|
||||
private _decodeHexDigits(start: ParseLocation, length: number) {
|
||||
const hex = this._input.substr(start.offset, length);
|
||||
const charCode = parseInt(hex, 16);
|
||||
if (!isNaN(charCode)) {
|
||||
this._peek = charCode;
|
||||
} else {
|
||||
throw this._createError(
|
||||
'Invalid hexadecimal escape sequence', this._getSpan(start, this._getLocation()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This little helper is to solve a problem where the TS compiler will narrow
|
||||
* the type of `_peek` after an `if` statment, even if there is a call to a
|
||||
* method that might mutate the `_peek`.
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* ```
|
||||
* if (this._peek === 10) {
|
||||
* this._advance(); // mutates _peek
|
||||
* if (this._peek === 20) {
|
||||
* ...
|
||||
* ```
|
||||
*
|
||||
* The second if statement fails TS compilation because the compiler has determined
|
||||
* that `_peek` is `10` and so can never be equal to `20`.
|
||||
*/
|
||||
private _peekChar(): number { return this._peek; }
|
||||
|
||||
private _consumeRawText(
|
||||
decodeEntities: boolean, firstCharOfEnd: number, attemptEndRest: () => boolean): Token {
|
||||
let tagCloseStart: ParseLocation;
|
||||
|
|
|
@ -1580,6 +1580,30 @@ export interface ParseTemplateOptions {
|
|||
* The entire `source` string is parsed if this is not provided.
|
||||
* */
|
||||
range?: LexerRange;
|
||||
/**
|
||||
* If this text is stored in a JavaScript string, then we have to deal with escape sequences.
|
||||
*
|
||||
* **Example 1:**
|
||||
*
|
||||
* ```
|
||||
* "abc\"def\nghi"
|
||||
* ```
|
||||
*
|
||||
* - The `\"` must be converted to `"`.
|
||||
* - The `\n` must be converted to a new line character in a token,
|
||||
* but it should not increment the current line for source mapping.
|
||||
*
|
||||
* **Example 2:**
|
||||
*
|
||||
* ```
|
||||
* "abc\
|
||||
* def"
|
||||
* ```
|
||||
*
|
||||
* The line continuation (`\` followed by a newline) should be removed from a token
|
||||
* but the new line should increment the current line for source mapping.
|
||||
*/
|
||||
escapedString?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -824,6 +824,267 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
|||
});
|
||||
});
|
||||
|
||||
describe('(processing escaped strings)', () => {
|
||||
it('should unescape standard escape sequences', () => {
|
||||
expect(tokenizeAndHumanizeParts('\\\' \\\' \\\'', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\' \' \''],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\" \\" \\"', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\" \" \"'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\` \\` \\`', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\` \` \`'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\\\ \\\\ \\\\', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\\ \\ \\'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\n \\n \\n', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\n \n \n'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\r \\r \\r', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\n \n \n'], // post processing converts `\r` to `\n`
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\v \\v \\v', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\v \v \v'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\t \\t \\t', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\t \t \t'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\b \\b \\b', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\b \b \b'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\f \\f \\f', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\f \f \f'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts(
|
||||
'\\\' \\" \\` \\\\ \\n \\r \\v \\t \\b \\f', {escapedString: true}))
|
||||
.toEqual([
|
||||
[lex.TokenType.TEXT, '\' \" \` \\ \n \n \v \t \b \f'],
|
||||
[lex.TokenType.EOF],
|
||||
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape null sequences', () => {
|
||||
expect(tokenizeAndHumanizeParts('\\0', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
// \09 is not an octal number so the \0 is taken as EOF
|
||||
expect(tokenizeAndHumanizeParts('\\09', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape octal sequences', () => {
|
||||
// \19 is read as an octal `\1` followed by a normal char `9`
|
||||
// \1234 is read as an octal `\123` followed by a normal char `4`
|
||||
// \999 is not an octal number so its backslash just gets removed.
|
||||
expect(tokenizeAndHumanizeParts(
|
||||
'\\001 \\01 \\1 \\12 \\223 \\19 \\2234 \\999', {escapedString: true}))
|
||||
.toEqual([
|
||||
[lex.TokenType.TEXT, '\x01 \x01 \x01 \x0A \x93 \x019 \x934 999'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape hex sequences', () => {
|
||||
expect(tokenizeAndHumanizeParts('\\x12 \\x4F \\xDC', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\x12 \x4F \xDC'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should report an error on an invalid hex sequence', () => {
|
||||
expect(tokenizeAndHumanizeErrors('\\xGG', {escapedString: true})).toEqual([
|
||||
[null, 'Invalid hexadecimal escape sequence', '0:2']
|
||||
]);
|
||||
|
||||
expect(tokenizeAndHumanizeErrors('abc \\x xyz', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, 'Invalid hexadecimal escape sequence', '0:6']
|
||||
]);
|
||||
|
||||
expect(tokenizeAndHumanizeErrors('abc\\x', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, 'Unexpected character "EOF"', '0:5']
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape fixed length Unicode sequences', () => {
|
||||
expect(tokenizeAndHumanizeParts('\\u0123 \\uABCD', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, '\u0123 \uABCD'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should error on an invalid fixed length Unicode sequence', () => {
|
||||
expect(tokenizeAndHumanizeErrors('\\uGGGG', {escapedString: true})).toEqual([
|
||||
[null, 'Invalid hexadecimal escape sequence', '0:2']
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape variable length Unicode sequences', () => {
|
||||
expect(tokenizeAndHumanizeParts(
|
||||
'\\u{01} \\u{ABC} \\u{1234} \\u{123AB}', {escapedString: true}))
|
||||
.toEqual([
|
||||
[lex.TokenType.TEXT, '\u{01} \u{ABC} \u{1234} \u{123AB}'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should error on an invalid variable length Unicode sequence', () => {
|
||||
expect(tokenizeAndHumanizeErrors('\\u{GG}', {escapedString: true})).toEqual([
|
||||
[null, 'Invalid hexadecimal escape sequence', '0:3']
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape line continuations', () => {
|
||||
expect(tokenizeAndHumanizeParts('abc\\\ndef', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, 'abcdef'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeParts('\\\nx\\\ny\\\n', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, 'xy'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should remove backslash from "non-escape" sequences', () => {
|
||||
expect(tokenizeAndHumanizeParts('\a \g \~', {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, 'a g ~'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape sequences in plain text', () => {
|
||||
expect(tokenizeAndHumanizeParts('abc\ndef\\nghi\\tjkl\\`\\\'\\"mno', {escapedString: true}))
|
||||
.toEqual([
|
||||
[lex.TokenType.TEXT, 'abc\ndef\nghi\tjkl`\'"mno'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape sequences in raw text', () => {
|
||||
expect(tokenizeAndHumanizeParts(
|
||||
'<script>abc\ndef\\nghi\\tjkl\\`\\\'\\"mno</script>', {escapedString: true}))
|
||||
.toEqual([
|
||||
[lex.TokenType.TAG_OPEN_START, null, 'script'],
|
||||
[lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.RAW_TEXT, 'abc\ndef\nghi\tjkl`\'"mno'],
|
||||
[lex.TokenType.TAG_CLOSE, null, 'script'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should unescape sequences in escapable raw text', () => {
|
||||
expect(tokenizeAndHumanizeParts(
|
||||
'<title>abc\ndef\\nghi\\tjkl\\`\\\'\\"mno</title>', {escapedString: true}))
|
||||
.toEqual([
|
||||
[lex.TokenType.TAG_OPEN_START, null, 'title'],
|
||||
[lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.ESCAPABLE_RAW_TEXT, 'abc\ndef\nghi\tjkl`\'"mno'],
|
||||
[lex.TokenType.TAG_CLOSE, null, 'title'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should parse over escape sequences in tag definitions', () => {
|
||||
expect(tokenizeAndHumanizeParts('<t a=\\"b\\" \\n c=\\\'d\\\'>', {escapedString: true}))
|
||||
.toEqual([
|
||||
[lex.TokenType.TAG_OPEN_START, null, 't'],
|
||||
[lex.TokenType.ATTR_NAME, null, 'a'],
|
||||
[lex.TokenType.ATTR_VALUE, 'b'],
|
||||
[lex.TokenType.ATTR_NAME, null, 'c'],
|
||||
[lex.TokenType.ATTR_VALUE, 'd'],
|
||||
[lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should tokenize the correct span when there are escape sequences', () => {
|
||||
const text =
|
||||
'selector: "app-root",\ntemplate: "line 1\\n\\"line 2\\"\\nline 3",\ninputs: []';
|
||||
const range = {
|
||||
startPos: 33,
|
||||
startLine: 1,
|
||||
startCol: 10,
|
||||
endPos: 59,
|
||||
};
|
||||
expect(tokenizeAndHumanizeParts(text, {range, escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, 'line 1\n"line 2"\nline 3'],
|
||||
[lex.TokenType.EOF],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeSourceSpans(text, {range, escapedString: true})).toEqual([
|
||||
[lex.TokenType.TEXT, 'line 1\\n\\"line 2\\"\\nline 3'],
|
||||
[lex.TokenType.EOF, ''],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should account for escape sequences when computing source spans ', () => {
|
||||
const text = '<t>line 1</t>\n' + // <- unescaped line break
|
||||
'<t>line 2</t>\\n' + // <- escaped line break
|
||||
'<t>line 3\\\n' + // <- line continuation
|
||||
'</t>';
|
||||
|
||||
expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TAG_OPEN_START, null, 't'], [lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.TEXT, 'line 1'], [lex.TokenType.TAG_CLOSE, null, 't'],
|
||||
[lex.TokenType.TEXT, '\n'],
|
||||
|
||||
[lex.TokenType.TAG_OPEN_START, null, 't'], [lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.TEXT, 'line 2'], [lex.TokenType.TAG_CLOSE, null, 't'],
|
||||
[lex.TokenType.TEXT, '\n'],
|
||||
|
||||
[lex.TokenType.TAG_OPEN_START, null, 't'], [lex.TokenType.TAG_OPEN_END],
|
||||
[lex.TokenType.TEXT, 'line 3'], // <- line continuation does not appear in token
|
||||
[lex.TokenType.TAG_CLOSE, null, 't'],
|
||||
|
||||
[lex.TokenType.EOF]
|
||||
]);
|
||||
expect(tokenizeAndHumanizeLineColumn(text, {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TAG_OPEN_START, '0:0'],
|
||||
[lex.TokenType.TAG_OPEN_END, '0:2'],
|
||||
[lex.TokenType.TEXT, '0:3'],
|
||||
[lex.TokenType.TAG_CLOSE, '0:9'],
|
||||
[lex.TokenType.TEXT, '0:13'], // <- real newline increments the row
|
||||
|
||||
[lex.TokenType.TAG_OPEN_START, '1:0'],
|
||||
[lex.TokenType.TAG_OPEN_END, '1:2'],
|
||||
[lex.TokenType.TEXT, '1:3'],
|
||||
[lex.TokenType.TAG_CLOSE, '1:9'],
|
||||
[lex.TokenType.TEXT, '1:14'], // <- escaped newline does not increment the row
|
||||
|
||||
[lex.TokenType.TAG_OPEN_START, '1:15'],
|
||||
[lex.TokenType.TAG_OPEN_END, '1:17'],
|
||||
[lex.TokenType.TEXT, '1:18'], // <- the line continuation increments the row
|
||||
[lex.TokenType.TAG_CLOSE, '2:0'],
|
||||
|
||||
[lex.TokenType.EOF, '2:4'],
|
||||
]);
|
||||
expect(tokenizeAndHumanizeSourceSpans(text, {escapedString: true})).toEqual([
|
||||
[lex.TokenType.TAG_OPEN_START, '<t'], [lex.TokenType.TAG_OPEN_END, '>'],
|
||||
[lex.TokenType.TEXT, 'line 1'], [lex.TokenType.TAG_CLOSE, '</t>'],
|
||||
[lex.TokenType.TEXT, '\n'],
|
||||
|
||||
[lex.TokenType.TAG_OPEN_START, '<t'], [lex.TokenType.TAG_OPEN_END, '>'],
|
||||
[lex.TokenType.TEXT, 'line 2'], [lex.TokenType.TAG_CLOSE, '</t>\\'],
|
||||
[lex.TokenType.TEXT, 'n'],
|
||||
|
||||
[lex.TokenType.TAG_OPEN_START, '<t'], [lex.TokenType.TAG_OPEN_END, '>'],
|
||||
[lex.TokenType.TEXT, 'line 3\\\n'], [lex.TokenType.TAG_CLOSE, '</t>'],
|
||||
|
||||
[lex.TokenType.EOF, '']
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue