From 0e2a577b42e71ca62e72bdac5f8a2aad720d5922 Mon Sep 17 00:00:00 2001 From: Pete Bacon Darwin Date: Mon, 2 Mar 2020 10:57:47 +0000 Subject: [PATCH] fix(localize): improve matching and parsing of XTB translation files (#35793) This commit improves the `canParse()` method to check that the file is valid XML and has the expected root node. Previously it was relying upon a regular expression to do this. PR Close #35793 --- .../xtb_translation_parser.ts | 131 +++--- .../xtb_translation_parser_spec.ts | 387 ++++++++++++++++-- 2 files changed, 433 insertions(+), 85 deletions(-) diff --git a/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xtb_translation_parser.ts b/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xtb_translation_parser.ts index a2f122f812..c3a6388d18 100644 --- a/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xtb_translation_parser.ts +++ b/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xtb_translation_parser.ts @@ -5,7 +5,7 @@ * Use of this source code is governed by an MIT-style license that can be * found in the LICENSE file at https://angular.io/license */ -import {Element, Node, XmlParser, visitAll} from '@angular/compiler'; +import {Element, ParseErrorLevel, visitAll} from '@angular/compiler'; import {ɵParsedTranslation} from '@angular/localize'; import {extname} from 'path'; @@ -14,87 +14,100 @@ import {BaseVisitor} from '../base_visitor'; import {MessageSerializer} from '../message_serialization/message_serializer'; import {TargetMessageRenderer} from '../message_serialization/target_message_renderer'; -import {TranslationParseError} from './translation_parse_error'; import {ParsedTranslationBundle, TranslationParser} from './translation_parser'; -import {getAttrOrThrow, parseInnerRange} from './translation_utils'; - +import {XmlTranslationParserHint, addParseDiagnostic, addParseError, canParseXml, getAttribute, parseInnerRange} from './translation_utils'; /** * A translation parser that can load XB files. */ -export class XtbTranslationParser implements TranslationParser { - constructor(private diagnostics: Diagnostics = new Diagnostics()) {} - - canParse(filePath: string, contents: string): boolean { +export class XtbTranslationParser implements TranslationParser { + canParse(filePath: string, contents: string): XmlTranslationParserHint|false { const extension = extname(filePath); - return (extension === '.xtb' || extension === '.xmb') && - contents.includes(' attr.name === 'lang'); + const bundle: ParsedTranslationBundle = { + locale: langAttr && langAttr.value, + translations: {}, + diagnostics: new Diagnostics() + }; + errors.forEach(e => addParseError(bundle.diagnostics, e)); + + const bundleVisitor = new XtbVisitor(); + visitAll(bundleVisitor, element.children, bundle); + return bundle; + } + + private extractBundleDeprecated(filePath: string, contents: string) { + const hint = this.canParse(filePath, contents); + if (!hint) { + throw new Error(`Unable to parse "${filePath}" as XMB/XTB format.`); + } + const bundle = this.extractBundle(hint); + if (bundle.diagnostics.hasErrors) { + const message = + bundle.diagnostics.formatDiagnostics(`Failed to parse "${filePath}" as XMB/XTB format`); + throw new Error(message); } return bundle; } } class XtbVisitor extends BaseVisitor { - static extractBundle(diagnostics: Diagnostics, messageBundles: Node[]): ParsedTranslationBundle - |undefined { - const visitor = new this(diagnostics); - const bundles: ParsedTranslationBundle[] = visitAll(visitor, messageBundles, undefined); - return bundles[0]; - } - - constructor(private diagnostics: Diagnostics) { super(); } - - visitElement(element: Element, bundle: ParsedTranslationBundle|undefined): any { + visitElement(element: Element, bundle: ParsedTranslationBundle): any { switch (element.name) { - case 'translationbundle': - if (bundle) { - throw new TranslationParseError( - element.sourceSpan, ' elements can not be nested'); - } - const langAttr = element.attrs.find((attr) => attr.name === 'lang'); - bundle = { - locale: langAttr && langAttr.value, - translations: {}, - diagnostics: this.diagnostics - }; - visitAll(this, element.children, bundle); - return bundle; - case 'translation': - if (!bundle) { - throw new TranslationParseError( - element.sourceSpan, ' must be inside a '); + // Error if no `id` attribute + const id = getAttribute(element, 'id'); + if (id === undefined) { + addParseDiagnostic( + bundle.diagnostics, element.sourceSpan, + `Missing required "id" attribute on element.`, ParseErrorLevel.ERROR); + return; } - const id = getAttrOrThrow(element, 'id'); - if (bundle.translations.hasOwnProperty(id)) { - throw new TranslationParseError( - element.sourceSpan, `Duplicated translations for message "${id}"`); - } else { - try { - bundle.translations[id] = serializeTargetMessage(element); - } catch (error) { - if (typeof error === 'string') { - this.diagnostics.warn( - `Could not parse message with id "${id}" - perhaps it has an unrecognised ICU format?\n` + - error); - } else { - throw error; - } + + // Error if there is already a translation with the same id + if (bundle.translations[id] !== undefined) { + addParseDiagnostic( + bundle.diagnostics, element.sourceSpan, `Duplicated translations for message "${id}"`, + ParseErrorLevel.ERROR); + return; + } + + try { + bundle.translations[id] = serializeTargetMessage(element); + } catch (error) { + if (typeof error === 'string') { + bundle.diagnostics.warn( + `Could not parse message with id "${id}" - perhaps it has an unrecognised ICU format?\n` + + error); + } else if (error.span && error.msg && error.level) { + addParseDiagnostic(bundle.diagnostics, error.span, error.msg, error.level); + } else { + throw error; } } break; default: - throw new TranslationParseError(element.sourceSpan, 'Unexpected tag'); + addParseDiagnostic( + bundle.diagnostics, element.sourceSpan, `Unexpected <${element.name}> tag.`, + ParseErrorLevel.ERROR); } } } diff --git a/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xtb_translation_parser_spec.ts b/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xtb_translation_parser_spec.ts index d07babbb9b..dbb44a9a94 100644 --- a/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xtb_translation_parser_spec.ts +++ b/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xtb_translation_parser_spec.ts @@ -13,24 +13,24 @@ describe('XtbTranslationParser', () => { describe('canParse()', () => { it('should return true if the file extension is `.xtb` or `.xmb` and it contains the `` tag', () => { - const parser = new XtbTranslationParser(new Diagnostics()); - expect(parser.canParse('/some/file.xtb', '')).toBe(true); - expect(parser.canParse('/some/file.xmb', '')).toBe(true); - expect(parser.canParse('/some/file.xtb', '')).toBe(true); - expect(parser.canParse('/some/file.xmb', '')).toBe(true); + const parser = new XtbTranslationParser(); + expect(parser.canParse('/some/file.xtb', '')).toBeTruthy(); + expect(parser.canParse('/some/file.xmb', '')).toBeTruthy(); + expect(parser.canParse('/some/file.xtb', '')).toBeTruthy(); + expect(parser.canParse('/some/file.xmb', '')).toBeTruthy(); expect(parser.canParse('/some/file.json', '')).toBe(false); expect(parser.canParse('/some/file.xmb', '')).toBe(false); expect(parser.canParse('/some/file.xtb', '')).toBe(false); }); }); - describe('parse()', () => { + describe('parse() [without hint]', () => { it('should extract the locale from the file contents', () => { const XTB = ` rab `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.locale).toEqual('fr'); }); @@ -39,17 +39,17 @@ describe('XtbTranslationParser', () => { const XTB = ` - + - + ]> rab `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations['8841459487341224498']).toEqual(ɵmakeParsedTranslation(['rab'])); @@ -60,7 +60,7 @@ describe('XtbTranslationParser', () => { rab `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations['8877975308926375834']) @@ -73,7 +73,7 @@ describe('XtbTranslationParser', () => { ** {VAR_PLURAL, plural, =1 {rab}} `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations['7717087045075616176']) @@ -90,7 +90,7 @@ describe('XtbTranslationParser', () => { toto tata `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations[ɵcomputeMsgId('foo')]).toEqual(ɵmakeParsedTranslation(['oof'])); @@ -103,7 +103,7 @@ describe('XtbTranslationParser', () => { `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations[ɵcomputeMsgId('{$LINE_BREAK}{$TAG_IMG}{$TAG_IMG_1}')]) @@ -123,7 +123,7 @@ describe('XtbTranslationParser', () => { `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations[ɵcomputeMsgId('hello {$START_TAG_SPAN}{$CLOSE_TAG_SPAN}')]) @@ -150,7 +150,7 @@ describe('XtbTranslationParser', () => { Le test: {VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {profondément imbriqué}}} =other {beaucoup}} `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations[ɵcomputeMsgId('Test: {$ICU}')]) @@ -177,7 +177,7 @@ describe('XtbTranslationParser', () => { multi\nlignes `; - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); expect(result.translations[ɵcomputeMsgId('multi\nlines')]) @@ -194,8 +194,7 @@ describe('XtbTranslationParser', () => { `; // Parsing the file should not fail - const diagnostics = new Diagnostics(); - const parser = new XtbTranslationParser(diagnostics); + const parser = new XtbTranslationParser(); const result = parser.parse('/some/file.xtb', XTB); // We should be able to read the valid message @@ -204,7 +203,7 @@ describe('XtbTranslationParser', () => { // Trying to access the invalid message should fail expect(result.translations['invalid']).toBeUndefined(); - expect(diagnostics.messages).toContain({ + expect(result.diagnostics.messages).toContain({ type: 'warning', message: `Could not parse message with id "invalid" - perhaps it has an unrecognised ICU format?\n` + @@ -219,9 +218,11 @@ describe('XtbTranslationParser', () => { ''; expect(() => { - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); parser.parse('/some/file.xtb', XTB); - }).toThrowError(/ elements can not be nested/); + }).toThrowError(`Failed to parse "/some/file.xtb" as XMB/XTB format +ERRORS: + - Unexpected tag. ("[ERROR ->]"): /some/file.xtb@0:19`); }); it('should throw when a translation has no id attribute', () => { @@ -231,7 +232,7 @@ describe('XtbTranslationParser', () => { `; expect(() => { - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); parser.parse('/some/file.xtb', XTB); }).toThrowError(/Missing required "id" attribute/); }); @@ -244,7 +245,7 @@ describe('XtbTranslationParser', () => { `; expect(() => { - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); parser.parse('/some/file.xtb', XTB); }).toThrowError(/Duplicated translations for message "deadbeef"/); }); @@ -260,7 +261,7 @@ describe('XtbTranslationParser', () => { `; expect(() => { - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); parser.parse('/some/file.xtb', XTB); }).toThrowError(/Invalid element found in message/); }); @@ -272,10 +273,344 @@ describe('XtbTranslationParser', () => { `; expect(() => { - const parser = new XtbTranslationParser(new Diagnostics()); + const parser = new XtbTranslationParser(); parser.parse('/some/file.xtb', XTB); }).toThrowError(/required "name" attribute/gi); }); }); }); + + describe('parse() [with hint]', () => { + it('should extract the locale from the file contents', () => { + const XTB = ` + + rab + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + expect(result.locale).toEqual('fr'); + }); + + it('should extract basic messages', () => { + const XTB = ` + + + + + + + + + ]> + + rab + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations['8841459487341224498']).toEqual(ɵmakeParsedTranslation(['rab'])); + }); + + it('should extract translations with simple placeholders', () => { + const XTB = ` + + rab + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations['8877975308926375834']) + .toEqual(ɵmakeParsedTranslation(['', 'rab', ''], ['START_PARAGRAPH', 'CLOSE_PARAGRAPH'])); + }); + + it('should extract translations with simple ICU expressions', () => { + const XTB = ` + + ** + {VAR_PLURAL, plural, =1 {rab}} + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations['7717087045075616176']) + .toEqual(ɵmakeParsedTranslation(['*', '*'], ['ICU'])); + expect(result.translations['5115002811911870583']) + .toEqual(ɵmakeParsedTranslation( + ['{VAR_PLURAL, plural, =1 {{START_PARAGRAPH}rab{CLOSE_PARAGRAPH}}}'], [])); + }); + + it('should extract translations with duplicate source messages', () => { + const XTB = ` + + oof + toto + tata + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations[ɵcomputeMsgId('foo')]).toEqual(ɵmakeParsedTranslation(['oof'])); + expect(result.translations['i']).toEqual(ɵmakeParsedTranslation(['toto'])); + expect(result.translations['bar']).toEqual(ɵmakeParsedTranslation(['tata'])); + }); + + it('should extract translations with only placeholders, which are re-ordered', () => { + const XTB = ` + + + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations[ɵcomputeMsgId('{$LINE_BREAK}{$TAG_IMG}{$TAG_IMG_1}')]) + .toEqual( + ɵmakeParsedTranslation(['', '', '', ''], ['TAG_IMG_1', 'TAG_IMG', 'LINE_BREAK'])); + }); + + it('should extract translations with empty target', () => { + /** + * Source HTML: + * + * ``` + *
hello
+ * ``` + */ + const XTB = ` + + + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations[ɵcomputeMsgId('hello {$START_TAG_SPAN}{$CLOSE_TAG_SPAN}')]) + .toEqual(ɵmakeParsedTranslation([''])); + }); + + it('should extract translations with deeply nested ICUs', () => { + /** + * Source HTML: + * + * ``` + * Test: { count, plural, =0 { { sex, select, other {

deeply nested

}} } =other {a lot}} + * ``` + * + * Note that the message gets split into two translation units: + * * The first one contains the outer message with an `ICU` placeholder + * * The second one is the ICU expansion itself + * + * Note that special markers `VAR_PLURAL` and `VAR_SELECT` are added, which are then + replaced by IVY at runtime with the actual values being rendered by the ICU expansion. + */ + const XTB = ` + + Le test: + {VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {profondément imbriqué}}} =other {beaucoup}} + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations[ɵcomputeMsgId('Test: {$ICU}')]) + .toEqual(ɵmakeParsedTranslation(['Le test: ', ''], ['ICU'])); + + expect( + result.translations[ɵcomputeMsgId( + '{VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {{START_PARAGRAPH}deeply nested{CLOSE_PARAGRAPH}}}} =other {beaucoup}}')]) + .toEqual(ɵmakeParsedTranslation([ + '{VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {{START_PARAGRAPH}profondément imbriqué{CLOSE_PARAGRAPH}}}} =other {beaucoup}}' + ])); + }); + + it('should extract translations containing multiple lines', () => { + /** + * Source HTML: + * + * ``` + *
multi + * lines
+ * ``` + */ + const XTB = ` + + multi\nlignes + `; + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + expect(result.translations[ɵcomputeMsgId('multi\nlines')]) + .toEqual(ɵmakeParsedTranslation(['multi\nlignes'])); + }); + + it('should warn on unrecognised ICU messages', () => { + // See https://github.com/angular/angular/issues/14046 + + const XTB = ` + + This is a valid message + {REGION_COUNT_1, plural, =0 {unused plural form} =1 {1 region} other {{REGION_COUNT_2} regions}} + `; + + // Parsing the file should not fail + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + + // We should be able to read the valid message + expect(result.translations['valid']) + .toEqual(ɵmakeParsedTranslation(['This is a valid message'])); + + // Trying to access the invalid message should fail + expect(result.translations['invalid']).toBeUndefined(); + expect(result.diagnostics.messages).toContain({ + type: 'warning', + message: + `Could not parse message with id "invalid" - perhaps it has an unrecognised ICU format?\n` + + `Error: Unexpected character "EOF" (Do you have an unescaped "{" in your template? Use "{{ '{' }}") to escape it.)\n` + + `Error: Invalid ICU message. Missing '}'.` + }); + }); + + describe('[structure errors]', () => { + it('should throw when there are nested translationbundle tags', () => { + const XTB = + ''; + + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message) + .toEqual( + `Unexpected tag. ("[ERROR ->]"): /some/file.xtb@0:19`); + }); + + it('should throw when a translation has no id attribute', () => { + const XTB = ` + + + `; + + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message) + .toEqual(`Missing required "id" attribute on element. (" + + [ERROR ->] + "): /some/file.xtb@2:12`); + }); + + it('should throw on duplicate translation id', () => { + const XTB = ` + + + + `; + + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message) + .toEqual(`Duplicated translations for message "deadbeef" (" + + + [ERROR ->] + "): /some/file.xtb@3:12`); + }); + }); + + describe('[message errors]', () => { + it('should throw on unknown message tags', () => { + const XTB = ` + + + + + `; + + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message).toEqual(`Invalid element found in message. (" + + + [ERROR ->] + + "): /some/file.xtb@3:14`); + }); + + it('should throw when a placeholder misses a name attribute', () => { + const XTB = ` + + + `; + + const parser = new XtbTranslationParser(); + const hint = parser.canParse('/some/file.xtb', XTB); + if (!hint) { + return fail('expected XTB to be valid'); + } + const result = parser.parse('/some/file.xtb', XTB, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message) + .toEqual(`Missing required "name" attribute: (" + + [ERROR ->] + "): /some/file.xtb@2:39`); + }); + }); + }); });