diff --git a/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xliff2_translation_parser.ts b/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xliff2_translation_parser.ts index 12f35626a8..0dbd7f54ec 100644 --- a/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xliff2_translation_parser.ts +++ b/packages/localize/src/tools/src/translate/translation_files/translation_parsers/xliff2_translation_parser.ts @@ -5,20 +5,16 @@ * Use of this source code is governed by an MIT-style license that can be * found in the LICENSE file at https://angular.io/license */ -import {Element, Node, XmlParser, visitAll} from '@angular/compiler'; -import {ɵMessageId, ɵParsedTranslation} from '@angular/localize'; -import {extname} from 'path'; +import {Element, Node, ParseErrorLevel, visitAll} from '@angular/compiler'; +import {ɵParsedTranslation} from '@angular/localize'; import {Diagnostics} from '../../../diagnostics'; import {BaseVisitor} from '../base_visitor'; import {MessageSerializer} from '../message_serialization/message_serializer'; import {TargetMessageRenderer} from '../message_serialization/target_message_renderer'; -import {TranslationParseError} from './translation_parse_error'; import {ParsedTranslationBundle, TranslationParser} from './translation_parser'; -import {getAttrOrThrow, getAttribute, parseInnerRange} from './translation_utils'; - -const XLIFF_2_0_NS_REGEX = /xmlns="urn:oasis:names:tc:xliff:document:2.0"/; +import {XmlTranslationParserHint, addParseDiagnostic, addParseError, canParseXml, getAttribute, isNamedElement, parseInnerRange} from './translation_utils'; /** * A translation parser that can load translations from XLIFF 2 files. @@ -26,85 +22,132 @@ const XLIFF_2_0_NS_REGEX = /xmlns="urn:oasis:names:tc:xliff:document:2.0"/; * http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html * */ -export class Xliff2TranslationParser implements TranslationParser { - canParse(filePath: string, contents: string): boolean { - return (extname(filePath) === '.xlf') && XLIFF_2_0_NS_REGEX.test(contents); +export class Xliff2TranslationParser implements TranslationParser { + canParse(filePath: string, contents: string): XmlTranslationParserHint|false { + return canParseXml(filePath, contents, 'xliff', {version: '2.0'}); } - parse(filePath: string, contents: string): ParsedTranslationBundle { - const xmlParser = new XmlParser(); - const xml = xmlParser.parse(contents, filePath); - const bundle = Xliff2TranslationBundleVisitor.extractBundle(xml.rootNodes); - if (bundle === undefined) { + parse(filePath: string, contents: string, hint?: XmlTranslationParserHint): + ParsedTranslationBundle { + if (hint) { + return this.extractBundle(hint); + } else { + return this.extractBundleDeprecated(filePath, contents); + } + } + + private extractBundle({element, errors}: XmlTranslationParserHint): ParsedTranslationBundle { + const diagnostics = new Diagnostics(); + errors.forEach(e => addParseError(diagnostics, e)); + + if (element.children.length === 0) { + addParseDiagnostic( + diagnostics, element.sourceSpan, 'Missing expected element', + ParseErrorLevel.WARNING); + return {locale: undefined, translations: {}, diagnostics}; + } + + const locale = getAttribute(element, 'trgLang'); + const files = element.children.filter(isFileElement); + if (files.length === 0) { + addParseDiagnostic( + diagnostics, element.sourceSpan, 'No elements found in ', + ParseErrorLevel.WARNING); + } else if (files.length > 1) { + addParseDiagnostic( + diagnostics, files[1].sourceSpan, 'More than one element found in ', + ParseErrorLevel.WARNING); + } + + const bundle = {locale, translations: {}, diagnostics}; + const translationVisitor = new Xliff2TranslationVisitor(); + visitAll(translationVisitor, files[0].children, {bundle}); + + return bundle; + } + + private extractBundleDeprecated(filePath: string, contents: string) { + const hint = this.canParse(filePath, contents); + if (!hint) { throw new Error(`Unable to parse "${filePath}" as XLIFF 2.0 format.`); } + const bundle = this.extractBundle(hint); + if (bundle.diagnostics.hasErrors) { + const message = + bundle.diagnostics.formatDiagnostics(`Failed to parse "${filePath}" as XLIFF 2.0 format`); + throw new Error(message); + } return bundle; } } -interface BundleVisitorContext { - parsedLocale?: string; -} -class Xliff2TranslationBundleVisitor extends BaseVisitor { - private bundle: ParsedTranslationBundle|undefined; - - static extractBundle(xliff: Node[]): ParsedTranslationBundle|undefined { - const visitor = new this(); - visitAll(visitor, xliff, {}); - return visitor.bundle; - } - - visitElement(element: Element, {parsedLocale}: BundleVisitorContext): any { - if (element.name === 'xliff') { - parsedLocale = getAttribute(element, 'trgLang'); - return visitAll(this, element.children, {parsedLocale}); - } else if (element.name === 'file') { - this.bundle = { - locale: parsedLocale, - translations: Xliff2TranslationVisitor.extractTranslations(element), - diagnostics: new Diagnostics(), - }; - } else { - return visitAll(this, element.children, {parsedLocale}); - } - } +interface TranslationVisitorContext { + unit?: string; + bundle: ParsedTranslationBundle; } class Xliff2TranslationVisitor extends BaseVisitor { - private translations: Record<ɵMessageId, ɵParsedTranslation> = {}; - - static extractTranslations(file: Element): Record { - const visitor = new this(); - visitAll(visitor, file.children); - return visitor.translations; - } - - visitElement(element: Element, context: any): any { + visitElement(element: Element, {bundle, unit}: TranslationVisitorContext): any { if (element.name === 'unit') { - const externalId = getAttrOrThrow(element, 'id'); - if (this.translations[externalId] !== undefined) { - throw new TranslationParseError( - element.sourceSpan, `Duplicated translations for message "${externalId}"`); - } - visitAll(this, element.children, {unit: externalId}); + this.visitUnitElement(element, bundle); } else if (element.name === 'segment') { - assertTranslationUnit(element, context); - const targetMessage = element.children.find(isTargetElement); - if (targetMessage === undefined) { - throw new TranslationParseError(element.sourceSpan, 'Missing required element'); - } - this.translations[context.unit] = serializeTargetMessage(targetMessage); + this.visitSegmentElement(element, bundle, unit); } else { - return visitAll(this, element.children); + visitAll(this, element.children, {bundle, unit}); } } -} -function assertTranslationUnit(segment: Element, context: any) { - if (context === undefined || context.unit === undefined) { - throw new TranslationParseError( - segment.sourceSpan, 'Invalid element: should be a child of a element.'); + private visitUnitElement(element: Element, bundle: ParsedTranslationBundle): void { + // Error if no `id` attribute + const externalId = getAttribute(element, 'id'); + if (externalId === undefined) { + addParseDiagnostic( + bundle.diagnostics, element.sourceSpan, + `Missing required "id" attribute on element.`, ParseErrorLevel.ERROR); + return; + } + + // Error if there is already a translation with the same id + if (bundle.translations[externalId] !== undefined) { + addParseDiagnostic( + bundle.diagnostics, element.sourceSpan, + `Duplicated translations for message "${externalId}"`, ParseErrorLevel.ERROR); + return; + } + + visitAll(this, element.children, {bundle, unit: externalId}); + } + + private visitSegmentElement( + element: Element, bundle: ParsedTranslationBundle, unit: string|undefined): void { + // A `` element must be below a `` element + if (unit === undefined) { + addParseDiagnostic( + bundle.diagnostics, element.sourceSpan, + 'Invalid element: should be a child of a element.', + ParseErrorLevel.ERROR); + return; + } + + const targetMessage = element.children.find(isNamedElement('target')); + if (targetMessage === undefined) { + addParseDiagnostic( + bundle.diagnostics, element.sourceSpan, 'Missing required element', + ParseErrorLevel.ERROR); + return; + } + + try { + bundle.translations[unit] = serializeTargetMessage(targetMessage); + } catch (e) { + // Capture any errors from serialize the target message + if (e.span && e.msg && e.level) { + addParseDiagnostic(bundle.diagnostics, e.span, e.msg, e.level); + } else { + throw e; + } + } } } @@ -118,6 +161,6 @@ function serializeTargetMessage(source: Element): ɵParsedTranslation { return serializer.serialize(parseInnerRange(source)); } -function isTargetElement(node: Node): node is Element { - return node instanceof Element && node.name === 'target'; +function isFileElement(node: Node): node is Element { + return node instanceof Element && node.name === 'file'; } diff --git a/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xliff2_translation_parser_spec.ts b/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xliff2_translation_parser_spec.ts index 10381a3b2b..1ee37f708c 100644 --- a/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xliff2_translation_parser_spec.ts +++ b/packages/localize/src/tools/test/translate/translation_files/translation_parsers/xliff2_translation_parser_spec.ts @@ -10,24 +10,27 @@ import {Xliff2TranslationParser} from '../../../../src/translate/translation_fil describe('Xliff2TranslationParser', () => { describe('canParse()', () => { - it('should return true if the file extension is `.xlf` and it contains the XLIFF namespace', () => { - const parser = new Xliff2TranslationParser(); - expect( - parser.canParse( - '/some/file.xlf', - '')) - .toBe(true); - expect( - parser.canParse( - '/some/file.json', - '')) - .toBe(false); - expect(parser.canParse('/some/file.xlf', '')).toBe(false); - expect(parser.canParse('/some/file.json', '')).toBe(false); - }); + it('should return true if the file contains an element with version="2.0" attribute', + () => { + const parser = new Xliff2TranslationParser(); + expect(parser.canParse( + '/some/file.xlf', + '')) + .toBeTruthy(); + expect(parser.canParse( + '/some/file.json', + '')) + .toBeTruthy(); + expect(parser.canParse('/some/file.xliff', '')).toBeTruthy(); + expect(parser.canParse('/some/file.json', '')).toBeTruthy(); + expect(parser.canParse('/some/file.xlf', '')).toBe(false); + expect(parser.canParse('/some/file.xlf', '')).toBe(false); + expect(parser.canParse('/some/file.xlf', '')).toBe(false); + expect(parser.canParse('/some/file.json', '')).toBe(false); + }); }); - describe('parse()', () => { + describe('parse() [without hint]', () => { it('should extract the locale from the file contents', () => { const XLIFF = ` @@ -475,4 +478,550 @@ describe('Xliff2TranslationParser', () => { }); }); }); + + describe('parse() [with hint]', () => { + it('should extract the locale from the file contents', () => { + const XLIFF = ` + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + expect(result.locale).toEqual('fr'); + }); + + it('should return undefined locale if there is no locale in the file', () => { + const XLIFF = ` + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + expect(result.locale).toBeUndefined(); + }); + + it('should extract basic messages', () => { + /** + * Source HTML: + * + * ``` + *
translatable attribute
+ * ``` + */ + const XLIFF = ` + + + + + file.ts:2 + + + translatable attribute + etubirtta elbatalsnart + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations[ɵcomputeMsgId('translatable attribute', '')]) + .toEqual(ɵmakeParsedTranslation(['etubirtta elbatalsnart'])); + }); + + it('should extract translations with simple placeholders', () => { + /** + * Source HTML: + * + * ``` + *
translatable element >with placeholders {{ interpolation}}
+ * ``` + */ + const XLIFF = ` + + + + + file.ts:3 + + + translatable element with placeholders + tnemele elbatalsnart sredlohecalp htiw + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect( + result.translations[ɵcomputeMsgId( + 'translatable element {$START_BOLD_TEXT}with placeholders{$LOSE_BOLD_TEXT} {$INTERPOLATION}')]) + .toEqual(ɵmakeParsedTranslation( + ['', ' tnemele elbatalsnart ', 'sredlohecalp htiw', ''], + ['INTERPOLATION', 'START_BOLD_TEXT', 'CLOSE_BOLD_TEXT'])); + }); + + it('should extract translations with simple ICU expressions', () => { + /** + * Source HTML: + * + * ``` + *
{VAR_PLURAL, plural, =0 {

test

} }
+ * ``` + */ + const XLIFF = ` + + + + + file.ts:4 + + + {VAR_PLURAL, plural, =0 {test} } + {VAR_PLURAL, plural, =0 {TEST} } + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations[ɵcomputeMsgId( + '{VAR_PLURAL, plural, =0 {{START_PARAGRAPH}test{CLOSE_PARAGRAPH}}}')]) + .toEqual(ɵmakeParsedTranslation( + ['{VAR_PLURAL, plural, =0 {{START_PARAGRAPH}TEST{CLOSE_PARAGRAPH}}}'], [])); + }); + + it('should extract translations with duplicate source messages', () => { + /** + * Source HTML: + * + * ``` + *
foo
+ *
foo
+ *
foo
+ * ``` + */ + const XLIFF = ` + + + + + d + m + file.ts:5 + + + foo + oof + + + + + d + m + file.ts:5 + + + foo + toto + + + + + d + m + file.ts:5 + + + foo + tata + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations[ɵcomputeMsgId('foo')]).toEqual(ɵmakeParsedTranslation(['oof'])); + expect(result.translations['i']).toEqual(ɵmakeParsedTranslation(['toto'])); + expect(result.translations['bar']).toEqual(ɵmakeParsedTranslation(['tata'])); + }); + + it('should extract translations with only placeholders, which are re-ordered', () => { + /** + * Source HTML: + * + * ``` + *

+ * ``` + */ + const XLIFF = ` + + + + + ph names + file.ts:7 + + + + + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations[ɵcomputeMsgId('{$LINE_BREAK}{$TAG_IMG}{$TAG_IMG_1}')]) + .toEqual( + ɵmakeParsedTranslation(['', '', '', ''], ['TAG_IMG_1', 'TAG_IMG', 'LINE_BREAK'])); + }); + + it('should extract translations with empty target', () => { + /** + * Source HTML: + * + * ``` + *
hello
+ * ``` + */ + const XLIFF = ` + + + + + empty element + file.ts:8 + + + hello + + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations[ɵcomputeMsgId('hello {$START_TAG_SPAN}{$CLOSE_TAG_SPAN}')]) + .toEqual(ɵmakeParsedTranslation([''])); + }); + + it('should extract translations with deeply nested ICUs', () => { + /** + * Source HTML: + * + * ``` + * Test: { count, plural, =0 { { sex, select, other {

deeply nested

}} } =other {a lot}} + * ``` + * + * Note that the message gets split into two translation units: + * * The first one contains the outer message with an `ICU` placeholder + * * The second one is the ICU expansion itself + * + * Note that special markers `VAR_PLURAL` and `VAR_SELECT` are added, which are then replaced + * by IVY at runtime with the actual values being rendered by the ICU expansion. + */ + const XLIFF = ` + + + + + file.ts:10 + + + Test: + Le test: + + + + + file.ts:10 + + + {VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {deeply nested}}} =other {a lot}} + {VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {profondément imbriqué}}} =other {beaucoup}} + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations[ɵcomputeMsgId('Test: {$ICU}')]) + .toEqual(ɵmakeParsedTranslation(['Le test: ', ''], ['ICU'])); + + expect( + result.translations[ɵcomputeMsgId( + '{VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {{START_PARAGRAPH}deeply nested{CLOSE_PARAGRAPH}}}} =other {beaucoup}}')]) + .toEqual(ɵmakeParsedTranslation([ + '{VAR_PLURAL, plural, =0 {{VAR_SELECT, select, other {{START_PARAGRAPH}profondément imbriqué{CLOSE_PARAGRAPH}}}} =other {beaucoup}}' + ])); + }); + + it('should extract translations containing multiple lines', () => { + /** + * Source HTML: + * + * ``` + *
multi + * lines
+ * ``` + */ + const XLIFF = ` + + + + + file.ts:11,12 + + + multi\nlines + multi\nlignes + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations[ɵcomputeMsgId('multi\nlines')]) + .toEqual(ɵmakeParsedTranslation(['multi\nlignes'])); + }); + + it('should extract translations with elements', () => { + const XLIFF = ` + + + + + First sentence. + Translated first sentence. + + + + + First sentence. Second sentence. + Translated first sentence. + + + + `; + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + + expect(result.translations['mrk-test']) + .toEqual(ɵmakeParsedTranslation(['Translated first sentence.'])); + + expect(result.translations['mrk-test2']) + .toEqual(ɵmakeParsedTranslation(['Translated first sentence.'])); + }); + + describe('[structure errors]', () => { + it('should provide a diagnostic error when a trans-unit has no translation', () => { + const XLIFF = ` + + + + + + + + + `; + + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message).toEqual(`Missing required element (" + + + [ERROR ->] + + +"): /some/file.xlf@4:12`); + }); + + + it('should provide a diagnostic error when a trans-unit has no id attribute', () => { + const XLIFF = ` + + + + + + + + + + `; + + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message) + .toEqual( + `Missing required "id" attribute on element. ("ocument:2.0" srcLang="en" trgLang="fr"> + + [ERROR ->] + + +"): /some/file.xlf@3:10`); + }); + + it('should provide a diagnostic error on duplicate trans-unit id', () => { + const XLIFF = ` + + + + + + + + + + + + + + + + `; + + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message) + .toEqual(`Duplicated translations for message "deadbeef" (" + + + [ERROR ->] + + +"): /some/file.xlf@9:10`); + }); + }); + + describe('[message errors]', () => { + it('should provide a diagnostic error on unknown message tags', () => { + const XLIFF = ` + + + + + + msg should contain only ph and pc tags + + + + `; + + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message).toEqual(`Invalid element found in message. (" + + + [ERROR ->]msg should contain only ph and pc tags + + +"): /some/file.xlf@6:22`); + }); + + it('should provide a diagnostic error when a placeholder misses an id attribute', () => { + const XLIFF = ` + + + + + + + + + + `; + + const parser = new Xliff2TranslationParser(); + const hint = parser.canParse('/some/file.xlf', XLIFF); + if (!hint) { + return fail('expected XLIFF to be valid'); + } + const result = parser.parse('/some/file.xlf', XLIFF, hint); + expect(result.diagnostics.messages.length).toEqual(1); + expect(result.diagnostics.messages[0].message) + .toEqual(`Missing required "equiv" attribute: (" + + + [ERROR ->] + + +"): /some/file.xlf@6:22`); + }); + }); + }); });