fix(ivy): correctly tokenize escaped characters in templates (#28978)

Previously the start of a character indicated by an escape sequence was being incorrectly computed by the lexer, which caused tokens to include the start of the escaped character sequence in the preceding token. In particular this affected the name extracted from opening tags if the name was terminated by an escape sequence. For example, `<t\n>` would have the name `t\` rather than `t`. This fix refactors the lexer to use a "cursor" object to iterate over the characters in the template source. There are two cursor implementations, one expects a simple string, the other expects a string that contains JavaScript escape sequences that need to be unescaped. PR Close #28978
2019-02-26 13:06:26 +00:00 · 2019-02-26 13:06:26 +00:00 · f7c867ebc2
commit f7c867ebc2
parent 76979e12c9
2 changed files with 476 additions and 347 deletions
--- a/packages/compiler/src/ml_parser/lexer.ts
+++ b/packages/compiler/src/ml_parser/lexer.ts
--- a/packages/compiler/test/ml_parser/lexer_spec.ts
+++ b/packages/compiler/test/ml_parser/lexer_spec.ts
@ -456,7 +456,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
          lex.TokenType.TEXT,
          'Unknown entity "tbo" - use the "&#<decimal>;" or  "&#x<hex>;" syntax', '0:0'
        ]]);
-        expect(tokenizeAndHumanizeErrors('&#asdf;')).toEqual([
+        expect(tokenizeAndHumanizeErrors('&#3sdf;')).toEqual([
          [lex.TokenType.TEXT, 'Unexpected character "s"', '0:3']
        ]);
        expect(tokenizeAndHumanizeErrors('&#xasdf;')).toEqual([
@ -891,7 +891,6 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
            .toEqual([
              [lex.TokenType.TEXT, '\' \" \` \\ \n \n \v \t \b \f'],
              [lex.TokenType.EOF],
-
            ]);
      });

@ -1033,6 +1032,68 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
            ]);
      });

+      it('should parse over escaped new line in tag definitions', () => {
+        const text = '<t\\n></t>';
+        expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
+          [lex.TokenType.TAG_OPEN_START, '', 't'],
+          [lex.TokenType.TAG_OPEN_END],
+          [lex.TokenType.TAG_CLOSE, '', 't'],
+          [lex.TokenType.EOF],
+        ]);
+      });
+
+      it('should parse over escaped characters in tag definitions', () => {
+        const text = '<t\u{000013}></t>';
+        expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
+          [lex.TokenType.TAG_OPEN_START, '', 't'],
+          [lex.TokenType.TAG_OPEN_END],
+          [lex.TokenType.TAG_CLOSE, '', 't'],
+          [lex.TokenType.EOF],
+        ]);
+      });
+
+      it('should unescape characters in tag names', () => {
+        const text = '<t\\x64></t\\x64>';
+        expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
+          [lex.TokenType.TAG_OPEN_START, '', 'td'],
+          [lex.TokenType.TAG_OPEN_END],
+          [lex.TokenType.TAG_CLOSE, '', 'td'],
+          [lex.TokenType.EOF],
+        ]);
+        expect(tokenizeAndHumanizeSourceSpans(text, {escapedString: true})).toEqual([
+          [lex.TokenType.TAG_OPEN_START, '<t\\x64'],
+          [lex.TokenType.TAG_OPEN_END, '>'],
+          [lex.TokenType.TAG_CLOSE, '</t\\x64>'],
+          [lex.TokenType.EOF, ''],
+        ]);
+      });
+
+      it('should unescape characters in attributes', () => {
+        const text = '<t \\x64="\\x65"></t>';
+        expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
+          [lex.TokenType.TAG_OPEN_START, '', 't'],
+          [lex.TokenType.ATTR_NAME, '', 'd'],
+          [lex.TokenType.ATTR_QUOTE, '"'],
+          [lex.TokenType.ATTR_VALUE, 'e'],
+          [lex.TokenType.ATTR_QUOTE, '"'],
+          [lex.TokenType.TAG_OPEN_END],
+          [lex.TokenType.TAG_CLOSE, '', 't'],
+          [lex.TokenType.EOF],
+        ]);
+      });
+
+      it('should parse over escaped new line in attribute values', () => {
+        const text = '<t a=b\\n></t>';
+        expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
+          [lex.TokenType.TAG_OPEN_START, '', 't'],
+          [lex.TokenType.ATTR_NAME, '', 'a'],
+          [lex.TokenType.ATTR_VALUE, 'b'],
+          [lex.TokenType.TAG_OPEN_END],
+          [lex.TokenType.TAG_CLOSE, '', 't'],
+          [lex.TokenType.EOF],
+        ]);
+      });
+
      it('should tokenize the correct span when there are escape sequences', () => {
        const text =
            'selector: "app-root",\ntemplate: "line 1\\n\\"line 2\\"\\nline 3",\ninputs: []';
@ -1057,7 +1118,6 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
            '<t>line 2</t>\\n' +          // <- escaped line break
            '<t>line 3\\\n' +             // <- line continuation
            '</t>';
-
        expect(tokenizeAndHumanizeParts(text, {escapedString: true})).toEqual([
          [lex.TokenType.TAG_OPEN_START, '', 't'], [lex.TokenType.TAG_OPEN_END],
          [lex.TokenType.TEXT, 'line 1'], [lex.TokenType.TAG_CLOSE, '', 't'],
@ -1084,7 +1144,7 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
          [lex.TokenType.TAG_OPEN_END, '1:2'],
          [lex.TokenType.TEXT, '1:3'],
          [lex.TokenType.TAG_CLOSE, '1:9'],
-          [lex.TokenType.TEXT, '1:14'],  // <- escaped newline does not increment the row
+          [lex.TokenType.TEXT, '1:13'],  // <- escaped newline does not increment the row

          [lex.TokenType.TAG_OPEN_START, '1:15'],
          [lex.TokenType.TAG_OPEN_END, '1:17'],
@ -1099,8 +1159,8 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
          [lex.TokenType.TEXT, '\n'],

          [lex.TokenType.TAG_OPEN_START, '<t'], [lex.TokenType.TAG_OPEN_END, '>'],
-          [lex.TokenType.TEXT, 'line 2'], [lex.TokenType.TAG_CLOSE, '</t>\\'],
-          [lex.TokenType.TEXT, 'n'],
+          [lex.TokenType.TEXT, 'line 2'], [lex.TokenType.TAG_CLOSE, '</t>'],
+          [lex.TokenType.TEXT, '\\n'],

          [lex.TokenType.TAG_OPEN_START, '<t'], [lex.TokenType.TAG_OPEN_END, '>'],
          [lex.TokenType.TEXT, 'line 3\\\n'], [lex.TokenType.TAG_CLOSE, '</t>'],