From ff9f4de4f1147ba9ea6d17c31442a2eedcf4e0d2 Mon Sep 17 00:00:00 2001 From: Joey Perrott Date: Fri, 31 Jul 2020 12:26:39 -0700 Subject: [PATCH] fix(compiler): update unparsable character reference entity error messages (#38319) Within an angular template, when a character entity is unable to be parsed, previously a generic unexpected character error was thrown. This does not properly express the issue that was discovered as the issue is actually caused by the discovered character making the whole of the entity unparsable. The compiler will now instead inform via the error message what string was attempted to be parsed and what it was attempted to be parsed as. Example, for this template: ```

ģp

``` Before this change: `Unexpected character "p"` After this change: `Unable to parse entity "ģp" - hexadecimal character reference entities must end with ";"` Fixes #26067 PR Close #38319 --- packages/compiler/src/ml_parser/lexer.ts | 17 ++++++++++++++++- packages/compiler/test/ml_parser/lexer_spec.ts | 16 ++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index ffe386537e..38a82a7d23 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -138,6 +138,16 @@ function _unknownEntityErrorMsg(entitySrc: string): string { return `Unknown entity "${entitySrc}" - use the "&#;" or "&#x;" syntax`; } +function _unparsableEntityErrorMsg(type: CharacterReferenceType, entityStr: string): string { + return `Unable to parse entity "${entityStr}" - ${ + type} character reference entities must end with ";"`; +} + +enum CharacterReferenceType { + HEX = 'hexadecimal', + DEC = 'decimal', +} + class _ControlFlowError { constructor(public error: TokenError) {} } @@ -400,8 +410,13 @@ class _Tokenizer { const codeStart = this._cursor.clone(); this._attemptCharCodeUntilFn(isDigitEntityEnd); if (this._cursor.peek() != chars.$SEMICOLON) { + // Advance cursor to include the peeked character in the string provided to the error + // message. + this._cursor.advance(); + const entityType = isHex ? CharacterReferenceType.HEX : CharacterReferenceType.DEC; throw this._createError( - _unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan()); + _unparsableEntityErrorMsg(entityType, this._cursor.getChars(start)), + this._cursor.getSpan()); } const strNum = this._cursor.getChars(codeStart); this._cursor.advance(); diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts index 79b387e86f..32895b12ea 100644 --- a/packages/compiler/test/ml_parser/lexer_spec.ts +++ b/packages/compiler/test/ml_parser/lexer_spec.ts @@ -477,12 +477,16 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u lex.TokenType.TEXT, 'Unknown entity "tbo" - use the "&#;" or "&#x;" syntax', '0:0' ]]); - expect(tokenizeAndHumanizeErrors('sdf;')).toEqual([ - [lex.TokenType.TEXT, 'Unexpected character "s"', '0:3'] - ]); - expect(tokenizeAndHumanizeErrors(' sdf;')).toEqual([ - [lex.TokenType.TEXT, 'Unexpected character "s"', '0:4'] - ]); + expect(tokenizeAndHumanizeErrors('sdf;')).toEqual([[ + lex.TokenType.TEXT, + 'Unable to parse entity "s" - decimal character reference entities must end with ";"', + '0:4' + ]]); + expect(tokenizeAndHumanizeErrors(' sdf;')).toEqual([[ + lex.TokenType.TEXT, + 'Unable to parse entity " s" - hexadecimal character reference entities must end with ";"', + '0:5' + ]]); expect(tokenizeAndHumanizeErrors('઼')).toEqual([ [lex.TokenType.TEXT, 'Unexpected character "EOF"', '0:6']