feat(compiler): support tokenizing a sub-section of an input string (#28055)
The lexer that does the tokenizing can now process only a part the source string, by passing a `range` property in the `options` argument. The locations of the nodes that are tokenized will now take into account the position of the span in the context of the original source string. This `range` option is, in turn, exposed from the template parser as well. Being able to process parts of files helps to enable SourceMap support when compiling inline component templates. PR Close #28055
This commit is contained in:
parent
1b0580a9ec
commit
eeb560ac88
|
@ -50,6 +50,13 @@ export class TokenizeResult {
|
||||||
constructor(public tokens: Token[], public errors: TokenError[]) {}
|
constructor(public tokens: Token[], public errors: TokenError[]) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface LexerRange {
|
||||||
|
startPos: number;
|
||||||
|
startLine: number;
|
||||||
|
startCol: number;
|
||||||
|
endPos: number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Options that modify how the text is tokenized.
|
* Options that modify how the text is tokenized.
|
||||||
*/
|
*/
|
||||||
|
@ -58,6 +65,11 @@ export interface TokenizeOptions {
|
||||||
tokenizeExpansionForms?: boolean;
|
tokenizeExpansionForms?: boolean;
|
||||||
/** How to tokenize interpolation markers. */
|
/** How to tokenize interpolation markers. */
|
||||||
interpolationConfig?: InterpolationConfig;
|
interpolationConfig?: InterpolationConfig;
|
||||||
|
/**
|
||||||
|
* The start and end point of the text to parse within the `source` string.
|
||||||
|
* The entire `source` string is parsed if this is not provided.
|
||||||
|
* */
|
||||||
|
range?: LexerRange;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function tokenize(
|
export function tokenize(
|
||||||
|
@ -84,14 +96,14 @@ class _ControlFlowError {
|
||||||
// See http://www.w3.org/TR/html51/syntax.html#writing
|
// See http://www.w3.org/TR/html51/syntax.html#writing
|
||||||
class _Tokenizer {
|
class _Tokenizer {
|
||||||
private _input: string;
|
private _input: string;
|
||||||
private _length: number;
|
private _end: number;
|
||||||
private _tokenizeIcu: boolean;
|
private _tokenizeIcu: boolean;
|
||||||
private _interpolationConfig: InterpolationConfig;
|
private _interpolationConfig: InterpolationConfig;
|
||||||
private _peek: number = -1;
|
private _peek: number = -1;
|
||||||
private _nextPeek: number = -1;
|
private _nextPeek: number = -1;
|
||||||
private _index: number = -1;
|
private _index: number;
|
||||||
private _line: number = 0;
|
private _line: number;
|
||||||
private _column: number = -1;
|
private _column: number;
|
||||||
private _currentTokenStart: ParseLocation|null = null;
|
private _currentTokenStart: ParseLocation|null = null;
|
||||||
private _currentTokenType: TokenType|null = null;
|
private _currentTokenType: TokenType|null = null;
|
||||||
private _expansionCaseStack: TokenType[] = [];
|
private _expansionCaseStack: TokenType[] = [];
|
||||||
|
@ -112,8 +124,26 @@ class _Tokenizer {
|
||||||
this._tokenizeIcu = options.tokenizeExpansionForms || false;
|
this._tokenizeIcu = options.tokenizeExpansionForms || false;
|
||||||
this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
|
this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
|
||||||
this._input = _file.content;
|
this._input = _file.content;
|
||||||
this._length = _file.content.length;
|
if (options.range) {
|
||||||
this._advance();
|
this._end = options.range.endPos;
|
||||||
|
this._index = options.range.startPos;
|
||||||
|
this._line = options.range.startLine;
|
||||||
|
this._column = options.range.startCol;
|
||||||
|
} else {
|
||||||
|
this._end = this._input.length;
|
||||||
|
this._index = 0;
|
||||||
|
this._line = 0;
|
||||||
|
this._column = 0;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
this._initPeek();
|
||||||
|
} catch (e) {
|
||||||
|
if (e instanceof _ControlFlowError) {
|
||||||
|
this.errors.push(e.error);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private _processCarriageReturns(content: string): string {
|
private _processCarriageReturns(content: string): string {
|
||||||
|
@ -232,8 +262,8 @@ class _Tokenizer {
|
||||||
return new _ControlFlowError(error);
|
return new _ControlFlowError(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
private _advance() {
|
private _advance(processingEscapeSequence?: boolean) {
|
||||||
if (this._index >= this._length) {
|
if (this._index >= this._end) {
|
||||||
throw this._createError(_unexpectedCharacterErrorMsg(chars.$EOF), this._getSpan());
|
throw this._createError(_unexpectedCharacterErrorMsg(chars.$EOF), this._getSpan());
|
||||||
}
|
}
|
||||||
if (this._peek === chars.$LF) {
|
if (this._peek === chars.$LF) {
|
||||||
|
@ -243,9 +273,17 @@ class _Tokenizer {
|
||||||
this._column++;
|
this._column++;
|
||||||
}
|
}
|
||||||
this._index++;
|
this._index++;
|
||||||
this._peek = this._index >= this._length ? chars.$EOF : this._input.charCodeAt(this._index);
|
this._initPeek(processingEscapeSequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the _peek and _nextPeek properties based on the current _index.
|
||||||
|
* @param processingEscapeSequence whether we are in the middle of processing an escape sequence.
|
||||||
|
*/
|
||||||
|
private _initPeek(processingEscapeSequence?: boolean) {
|
||||||
|
this._peek = this._index >= this._end ? chars.$EOF : this._input.charCodeAt(this._index);
|
||||||
this._nextPeek =
|
this._nextPeek =
|
||||||
this._index + 1 >= this._length ? chars.$EOF : this._input.charCodeAt(this._index + 1);
|
this._index + 1 >= this._end ? chars.$EOF : this._input.charCodeAt(this._index + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private _attemptCharCode(charCode: number): boolean {
|
private _attemptCharCode(charCode: number): boolean {
|
||||||
|
@ -274,7 +312,7 @@ class _Tokenizer {
|
||||||
|
|
||||||
private _attemptStr(chars: string): boolean {
|
private _attemptStr(chars: string): boolean {
|
||||||
const len = chars.length;
|
const len = chars.length;
|
||||||
if (this._index + len > this._length) {
|
if (this._index + len > this._end) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const initialPosition = this._savePosition();
|
const initialPosition = this._savePosition();
|
||||||
|
|
|
@ -18,6 +18,7 @@ import * as html from '../../ml_parser/ast';
|
||||||
import {HtmlParser} from '../../ml_parser/html_parser';
|
import {HtmlParser} from '../../ml_parser/html_parser';
|
||||||
import {WhitespaceVisitor} from '../../ml_parser/html_whitespaces';
|
import {WhitespaceVisitor} from '../../ml_parser/html_whitespaces';
|
||||||
import {DEFAULT_INTERPOLATION_CONFIG, InterpolationConfig} from '../../ml_parser/interpolation_config';
|
import {DEFAULT_INTERPOLATION_CONFIG, InterpolationConfig} from '../../ml_parser/interpolation_config';
|
||||||
|
import {LexerRange} from '../../ml_parser/lexer';
|
||||||
import {isNgContainer as checkIsNgContainer, splitNsName} from '../../ml_parser/tags';
|
import {isNgContainer as checkIsNgContainer, splitNsName} from '../../ml_parser/tags';
|
||||||
import {mapLiteral} from '../../output/map_util';
|
import {mapLiteral} from '../../output/map_util';
|
||||||
import * as o from '../../output/output_ast';
|
import * as o from '../../output/output_ast';
|
||||||
|
@ -1574,6 +1575,11 @@ export interface ParseTemplateOptions {
|
||||||
* How to parse interpolation markers.
|
* How to parse interpolation markers.
|
||||||
*/
|
*/
|
||||||
interpolationConfig?: InterpolationConfig;
|
interpolationConfig?: InterpolationConfig;
|
||||||
|
/**
|
||||||
|
* The start and end point of the text to parse within the `source` string.
|
||||||
|
* The entire `source` string is parsed if this is not provided.
|
||||||
|
* */
|
||||||
|
range?: LexerRange;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -55,6 +55,28 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('content ranges', () => {
|
||||||
|
it('should only process the text within the range', () => {
|
||||||
|
expect(tokenizeAndHumanizeSourceSpans(
|
||||||
|
'pre 1\npre 2\npre 3 `line 1\nline 2\nline 3` post 1\n post 2\n post 3',
|
||||||
|
{range: {startPos: 19, startLine: 2, startCol: 7, endPos: 39}}))
|
||||||
|
.toEqual([
|
||||||
|
[lex.TokenType.TEXT, 'line 1\nline 2\nline 3'],
|
||||||
|
[lex.TokenType.EOF, ''],
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should take into account preceding (non-processed) lines and columns', () => {
|
||||||
|
expect(tokenizeAndHumanizeLineColumn(
|
||||||
|
'pre 1\npre 2\npre 3 `line 1\nline 2\nline 3` post 1\n post 2\n post 3',
|
||||||
|
{range: {startPos: 19, startLine: 2, startCol: 7, endPos: 39}}))
|
||||||
|
.toEqual([
|
||||||
|
[lex.TokenType.TEXT, '2:7'],
|
||||||
|
[lex.TokenType.EOF, '4:6'],
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('comments', () => {
|
describe('comments', () => {
|
||||||
it('should parse comments', () => {
|
it('should parse comments', () => {
|
||||||
expect(tokenizeAndHumanizeParts('<!--t\ne\rs\r\nt-->')).toEqual([
|
expect(tokenizeAndHumanizeParts('<!--t\ne\rs\r\nt-->')).toEqual([
|
||||||
|
|
Loading…
Reference in New Issue