fix(localize): render ICU placeholders in extracted translation files (#38484)

Previously placeholders were only rendered for dynamic interpolation
expressons in `$localize` tagged strings. But there are also potentially
dynamic values in ICU expressions too, so we need to render these as
placeholders when extracting i18n messages into translation files.

PR Close #38484
This commit is contained in:
Pete Bacon Darwin 2020-08-16 12:43:41 +01:00 committed by Andrew Scott
parent be96510ce9
commit 81c3e809aa
10 changed files with 437 additions and 32 deletions

View File

@ -0,0 +1,216 @@
/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
/**
* Split the given `text` into an array of "static strings" and ICU "placeholder names".
*
* This is required because ICU expressions in `$localize` tagged messages may contain "dynamic"
* piece (e.g. interpolations or element markers). These markers need to be translated to
* placeholders in extracted translation files. So we must parse ICUs to identify them and separate
* them out so that the translation serializers can render them appropriately.
*
* An example of an ICU with interpolations:
*
* ```
* {VAR_PLURAL, plural, one {{INTERPOLATION}} other {{INTERPOLATION_1} post}}
* ```
*
* In this ICU, `INTERPOLATION` and `INTERPOLATION_1` are actually placeholders that will be
* replaced with dynamic content at runtime.
*
* Such placeholders are identifiable as text wrapped in curly braces, within an ICU case
* expression.
*
* To complicate matters, it is possible for ICUs to be nested indefinitely within each other. In
* such cases, the nested ICU expression appears enclosed in a set of curly braces in the same way
* as a placeholder. The nested ICU expressions can be differentiated from placeholders as they
* contain a comma `,`, which separates the ICU value from the ICU type.
*
* Furthermore, nested ICUs can have placeholders of their own, which need to be extracted.
*
* An example of a nested ICU containing its own placeholders:
*
* ```
* {VAR_SELECT_1, select,
* invoice {Invoice for {INTERPOLATION}}
* payment {{VAR_SELECT, select,
* processor {Payment gateway}
* other {{INTERPOLATION_1}}
* }}
* ```
*
* @param text Text to be broken.
* @returns an array of strings, where
* - even values are static strings (e.g. 0, 2, 4, etc)
* - odd values are placeholder names (e.g. 1, 3, 5, etc)
*/
export function extractIcuPlaceholders(text: string): string[] {
const state = new StateStack();
const pieces = new IcuPieces();
const braces = /[{}]/g;
let lastPos = 0;
let match: RegExpMatchArray|null;
while (match = braces.exec(text)) {
if (match[0] == '{') {
state.enterBlock();
} else {
// We must have hit a `}`
state.leaveBlock();
}
if (state.getCurrent() === 'placeholder') {
const name = tryParsePlaceholder(text, braces.lastIndex);
if (name) {
// We found a placeholder so store it in the pieces;
// store the current static text (minus the opening curly brace);
// skip the closing brace and leave the placeholder block.
pieces.addText(text.substring(lastPos, braces.lastIndex - 1));
pieces.addPlaceholder(name);
braces.lastIndex += name.length + 1;
state.leaveBlock();
} else {
// This is not a placeholder, so it must be a nested ICU;
// store the current static text (including the opening curly brace).
pieces.addText(text.substring(lastPos, braces.lastIndex));
state.nestedIcu();
}
} else {
pieces.addText(text.substring(lastPos, braces.lastIndex));
}
lastPos = braces.lastIndex;
}
// Capture the last piece of text after the ICUs (if any).
pieces.addText(text.substring(lastPos));
return pieces.toArray();
}
/**
* A helper class to store the pieces ("static text" or "placeholder name") in an ICU.
*/
class IcuPieces {
private pieces: string[] = [''];
/**
* Add the given `text` to the current "static text" piece.
*
* Sequential calls to `addText()` will append to the current text piece.
*/
addText(text: string): void {
this.pieces[this.pieces.length - 1] += text;
}
/**
* Add the given placeholder `name` to the stored pieces.
*/
addPlaceholder(name: string): void {
this.pieces.push(name);
this.pieces.push('');
}
/**
* Return the stored pieces as an array of strings.
*
* Even values are static strings (e.g. 0, 2, 4, etc)
* Odd values are placeholder names (e.g. 1, 3, 5, etc)
*/
toArray(): string[] {
return this.pieces;
}
}
/**
* A helper class to track the current state of parsing the strings for ICU placeholders.
*
* State changes happen when we enter or leave a curly brace block.
* Since ICUs can be nested the state is stored as a stack.
*/
class StateStack {
private stack: ParserState[] = [];
/**
* Update the state upon entering a block.
*
* The new state is computed from the current state and added to the stack.
*/
enterBlock(): void {
const current = this.getCurrent();
switch (current) {
case 'icu':
this.stack.push('case');
break;
case 'case':
this.stack.push('placeholder');
break;
case 'placeholder':
this.stack.push('case');
break;
default:
this.stack.push('icu');
break;
}
}
/**
* Update the state upon leaving a block.
*
* The previous state is popped off the stack.
*/
leaveBlock(): ParserState {
return this.stack.pop();
}
/**
* Update the state upon arriving at a nested ICU.
*
* In this case, the current state of "placeholder" is incorrect, so this is popped off and the
* correct "icu" state is stored.
*/
nestedIcu(): void {
const current = this.stack.pop();
assert(current === 'placeholder', 'A nested ICU must replace a placeholder but got ' + current);
this.stack.push('icu');
}
/**
* Get the current (most recent) state from the stack.
*/
getCurrent() {
return this.stack[this.stack.length - 1];
}
}
type ParserState = 'icu'|'case'|'placeholder'|undefined;
/**
* Attempt to parse a simple placeholder name from a curly braced block.
*
* If the block contains a comma `,` then it cannot be a placeholder - and is probably a nest ICU
* instead.
*
* @param text the whole string that is being parsed.
* @param start the index of the character in the `text` string where this placeholder may start.
* @returns the placeholder name or `null` if it is not a placeholder.
*/
function tryParsePlaceholder(text: string, start: number): string|null {
for (let i = start; i < text.length; i++) {
if (text[i] === ',') {
break;
}
if (text[i] === '}') {
return text.substring(start, i);
}
}
return null;
}
function assert(test: boolean, message: string): void {
if (!test) {
throw new Error('Assertion failure: ' + message);
}
}

View File

@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';
import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
@ -63,11 +64,22 @@ export class Xliff1TranslationSerializer implements TranslationSerializer {
}
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
xml.text(message.messageParts[0]);
for (let i = 1; i < message.messageParts.length; i++) {
xml.startTag('x', {id: message.placeholderNames[i - 1]}, {selfClosing: true});
xml.text(message.messageParts[i]);
const length = message.messageParts.length - 1;
for (let i = 0; i < length; i++) {
this.serializeTextPart(xml, message.messageParts[i]);
xml.startTag('x', {id: message.placeholderNames[i]}, {selfClosing: true});
}
this.serializeTextPart(xml, message.messageParts[length]);
}
private serializeTextPart(xml: XmlFile, text: string): void {
const pieces = extractIcuPlaceholders(text);
const length = pieces.length - 1;
for (let i = 0; i < length; i += 2) {
xml.text(pieces[i]);
xml.startTag('x', {id: pieces[i + 1]}, {selfClosing: true});
}
xml.text(pieces[length]);
}
private serializeNote(xml: XmlFile, name: string, value: string): void {

View File

@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage} from '@angular/localize';
import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
@ -22,6 +23,7 @@ const MAX_LEGACY_XLIFF_2_MESSAGE_LENGTH = 20;
* @see Xliff2TranslationParser
*/
export class Xliff2TranslationSerializer implements TranslationSerializer {
private currentPlaceholderId = 0;
constructor(
private sourceLocale: string, private basePath: AbsoluteFsPath,
private useLegacyIds: boolean) {}
@ -74,21 +76,38 @@ export class Xliff2TranslationSerializer implements TranslationSerializer {
}
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
xml.text(message.messageParts[0]);
for (let i = 1; i < message.messageParts.length; i++) {
const placeholderName = message.placeholderNames[i - 1];
if (placeholderName.startsWith('START_')) {
xml.startTag('pc', {
id: `${i}`,
equivStart: placeholderName,
equivEnd: placeholderName.replace(/^START/, 'CLOSE')
});
} else if (placeholderName.startsWith('CLOSE_')) {
xml.endTag('pc');
} else {
xml.startTag('ph', {id: `${i}`, equiv: placeholderName}, {selfClosing: true});
}
xml.text(message.messageParts[i]);
this.currentPlaceholderId = 0;
const length = message.messageParts.length - 1;
for (let i = 0; i < length; i++) {
this.serializeTextPart(xml, message.messageParts[i]);
this.serializePlaceholder(xml, message.placeholderNames[i]);
}
this.serializeTextPart(xml, message.messageParts[length]);
}
private serializeTextPart(xml: XmlFile, text: string): void {
const pieces = extractIcuPlaceholders(text);
const length = pieces.length - 1;
for (let i = 0; i < length; i += 2) {
xml.text(pieces[i]);
this.serializePlaceholder(xml, pieces[i + 1]);
}
xml.text(pieces[length]);
}
private serializePlaceholder(xml: XmlFile, placeholderName: string): void {
if (placeholderName.startsWith('START_')) {
xml.startTag('pc', {
id: `${this.currentPlaceholderId++}`,
equivStart: placeholderName,
equivEnd: placeholderName.replace(/^START/, 'CLOSE')
});
} else if (placeholderName.startsWith('CLOSE_')) {
xml.endTag('pc');
} else {
xml.startTag(
'ph', {id: `${this.currentPlaceholderId++}`, equiv: placeholderName},
{selfClosing: true});
}
}

View File

@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';
import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
@ -77,11 +78,22 @@ export class XmbTranslationSerializer implements TranslationSerializer {
}
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
xml.text(message.messageParts[0]);
for (let i = 1; i < message.messageParts.length; i++) {
xml.startTag('ph', {name: message.placeholderNames[i - 1]}, {selfClosing: true});
xml.text(message.messageParts[i]);
const length = message.messageParts.length - 1;
for (let i = 0; i < length; i++) {
this.serializeTextPart(xml, message.messageParts[i]);
xml.startTag('ph', {name: message.placeholderNames[i]}, {selfClosing: true});
}
this.serializeTextPart(xml, message.messageParts[length]);
}
private serializeTextPart(xml: XmlFile, text: string): void {
const pieces = extractIcuPlaceholders(text);
const length = pieces.length - 1;
for (let i = 0; i < length; i += 2) {
xml.text(pieces[i]);
xml.startTag('ph', {name: pieces[i + 1]}, {selfClosing: true});
}
xml.text(pieces[length]);
}
/**

View File

@ -175,12 +175,12 @@ runInEachFileSystem(() => {
` <file>`,
` <unit id="3291030485717846467">`,
` <segment>`,
` <source>Hello, <ph id="1" equiv="PH"/>!</source>`,
` <source>Hello, <ph id="0" equiv="PH"/>!</source>`,
` </segment>`,
` </unit>`,
` <unit id="8669027859022295761">`,
` <segment>`,
` <source>try<ph id="1" equiv="PH"/>me</source>`,
` <source>try<ph id="0" equiv="PH"/>me</source>`,
` </segment>`,
` </unit>`,
` </file>`,

View File

@ -0,0 +1,76 @@
/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
import {extractIcuPlaceholders} from '../../../src/extract/translation_files/icu_parsing';
describe('extractIcuPlaceholders()', () => {
it('should return a single string if there is no ICU', () => {
expect(extractIcuPlaceholders('')).toEqual(['']);
expect(extractIcuPlaceholders('some text')).toEqual(['some text']);
expect(extractIcuPlaceholders('some } text')).toEqual(['some } text']);
expect(extractIcuPlaceholders('this is {not an ICU}')).toEqual(['this is {not an ICU}']);
});
it('should return a single string if there are no ICU placeholders', () => {
expect(extractIcuPlaceholders('{VAR_PLURAL, plural, one {SOME} few {FEW} other {OTHER}}'))
.toEqual(['{VAR_PLURAL, plural, one {SOME} few {FEW} other {OTHER}}']);
expect(extractIcuPlaceholders('{VAR_SELECT, select, male {HE} female {SHE} other {XE}}'))
.toEqual(['{VAR_SELECT, select, male {HE} female {SHE} other {XE}}']);
});
it('should split out simple interpolation placeholders', () => {
expect(
extractIcuPlaceholders(
'{VAR_PLURAL, plural, one {{INTERPOLATION}} few {pre {INTERPOLATION_1}} other {{INTERPOLATION_2} post}}'))
.toEqual([
'{VAR_PLURAL, plural, one {',
'INTERPOLATION',
'} few {pre ',
'INTERPOLATION_1',
'} other {',
'INTERPOLATION_2',
' post}}',
]);
});
it('should split out element placeholders', () => {
expect(
extractIcuPlaceholders(
'{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'))
.toEqual([
'{VAR_PLURAL, plural, one {',
'START_BOLD_TEXT',
'something bold',
'CLOSE_BOLD_TEXT',
'} other {pre ',
'START_TAG_SPAN',
'middle',
'CLOSE_TAG_SPAN',
' post}}',
]);
});
it('should handle nested ICUs', () => {
expect(extractIcuPlaceholders([
'{VAR_SELECT_1, select,',
' invoice {Invoice for {INTERPOLATION}}',
' payment {{VAR_SELECT, select,',
' processor {Payment gateway}',
' other {{INTERPOLATION_1}}',
' }}',
'}',
].join('\n')))
.toEqual([
'{VAR_SELECT_1, select,\n invoice {Invoice for ',
'INTERPOLATION',
'}\n payment {{VAR_SELECT, select,\n processor {Payment gateway}\n other {',
'INTERPOLATION_1',
'}\n }}\n}',
]);
});
});

View File

@ -22,6 +22,19 @@ describe('JsonTranslationSerializer', () => {
mockMessage('13579', ['', 'b', ''], ['START_BOLD_TEXT', 'CLOSE_BOLD_TEXT'], {}),
mockMessage('24680', ['a'], [], {meaning: 'meaning', description: 'and description'}),
mockMessage('80808', ['multi\nlines'], [], {}),
mockMessage('90000', ['<escape', 'me>'], ['double-quotes-"'], {}),
mockMessage(
'100000',
[
'pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU'
],
[], {}),
mockMessage(
'100001',
[
'{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'
],
[], {}),
];
const serializer = new SimpleJsonTranslationSerializer('xx');
const output = serializer.serialize(messages);
@ -33,7 +46,10 @@ describe('JsonTranslationSerializer', () => {
` "13579": "{$START_BOLD_TEXT}b{$CLOSE_BOLD_TEXT}",`,
` "24680": "a",`,
` "67890": "a{$START_TAG_SPAN}{$CLOSE_TAG_SPAN}c",`,
` "80808": "multi\\nlines"`,
` "80808": "multi\\nlines",`,
` "90000": "<escape{$double-quotes-\\"}me>",`,
` "100000": "pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU",`,
` "100001": "{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}"`,
` }`,
`}`,
].join('\n'));

View File

@ -34,7 +34,19 @@ runInEachFileSystem(() => {
mockMessage('13579', ['', 'b', ''], ['START_BOLD_TEXT', 'CLOSE_BOLD_TEXT'], {}),
mockMessage('24680', ['a'], [], {meaning: 'meaning', description: 'and description'}),
mockMessage('80808', ['multi\nlines'], [], {}),
mockMessage('90000', ['<escape', 'me>'], ['double-quotes-"'], {})
mockMessage('90000', ['<escape', 'me>'], ['double-quotes-"'], {}),
mockMessage(
'100000',
[
'pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU'
],
[], {}),
mockMessage(
'100001',
[
'{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'
],
[], {}),
];
const serializer =
new Xliff1TranslationSerializer('xx', absoluteFrom('/project'), useLegacyIds);
@ -73,6 +85,12 @@ runInEachFileSystem(() => {
` <trans-unit id="90000" datatype="html">`,
` <source>&lt;escape<x id="double-quotes-&quot;"/>me&gt;</source>`,
` </trans-unit>`,
` <trans-unit id="100000" datatype="html">`,
` <source>pre-ICU {VAR_SELECT, select, a {a} b {<x id="INTERPOLATION"/>} c {pre <x id="INTERPOLATION_1"/> post}} post-ICU</source>`,
` </trans-unit>`,
` <trans-unit id="100001" datatype="html">`,
` <source>{VAR_PLURAL, plural, one {<x id="START_BOLD_TEXT"/>something bold<x id="CLOSE_BOLD_TEXT"/>} other {pre <x id="START_TAG_SPAN"/>middle<x id="CLOSE_TAG_SPAN"/> post}}</source>`,
` </trans-unit>`,
` </body>`,
` </file>`,
`</xliff>\n`,

View File

@ -40,7 +40,19 @@ runInEachFileSystem(() => {
mockMessage('13579', ['', 'b', ''], ['START_BOLD_TEXT', 'CLOSE_BOLD_TEXT'], {}),
mockMessage('24680', ['a'], [], {meaning: 'meaning', description: 'and description'}),
mockMessage('80808', ['multi\nlines'], [], {}),
mockMessage('90000', ['<escape', 'me>'], ['double-quotes-"'], {})
mockMessage('90000', ['<escape', 'me>'], ['double-quotes-"'], {}),
mockMessage(
'100000',
[
'pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU'
],
[], {}),
mockMessage(
'100001',
[
'{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'
],
[], {}),
];
const serializer =
new Xliff2TranslationSerializer('xx', absoluteFrom('/project'), useLegacyIds);
@ -55,7 +67,7 @@ runInEachFileSystem(() => {
` <note category="meaning">some meaning</note>`,
` </notes>`,
` <segment>`,
` <source>a<ph id="1" equiv="PH"/>b<ph id="2" equiv="PH_1"/>c</source>`,
` <source>a<ph id="0" equiv="PH"/>b<ph id="1" equiv="PH_1"/>c</source>`,
` </segment>`,
` </unit>`,
` <unit id="67890">`,
@ -64,12 +76,12 @@ runInEachFileSystem(() => {
` <note category="description">some description</note>`,
` </notes>`,
` <segment>`,
` <source>a<pc id="1" equivStart="START_TAG_SPAN" equivEnd="CLOSE_TAG_SPAN"></pc>c</source>`,
` <source>a<pc id="0" equivStart="START_TAG_SPAN" equivEnd="CLOSE_TAG_SPAN"></pc>c</source>`,
` </segment>`,
` </unit>`,
` <unit id="13579">`,
` <segment>`,
` <source><pc id="1" equivStart="START_BOLD_TEXT" equivEnd="CLOSE_BOLD_TEXT">b</pc></source>`,
` <source><pc id="0" equivStart="START_BOLD_TEXT" equivEnd="CLOSE_BOLD_TEXT">b</pc></source>`,
` </segment>`,
` </unit>`,
` <unit id="24680">`,
@ -89,7 +101,17 @@ runInEachFileSystem(() => {
` </unit>`,
` <unit id="90000">`,
` <segment>`,
` <source>&lt;escape<ph id="1" equiv="double-quotes-&quot;"/>me&gt;</source>`,
` <source>&lt;escape<ph id="0" equiv="double-quotes-&quot;"/>me&gt;</source>`,
` </segment>`,
` </unit>`,
` <unit id="100000">`,
` <segment>`,
` <source>pre-ICU {VAR_SELECT, select, a {a} b {<ph id="0" equiv="INTERPOLATION"/>} c {pre <ph id="1" equiv="INTERPOLATION_1"/> post}} post-ICU</source>`,
` </segment>`,
` </unit>`,
` <unit id="100001">`,
` <segment>`,
` <source>{VAR_PLURAL, plural, one {<pc id="0" equivStart="START_BOLD_TEXT" equivEnd="CLOSE_BOLD_TEXT">something bold</pc>} other {pre <pc id="1" equivStart="START_TAG_SPAN" equivEnd="CLOSE_TAG_SPAN">middle</pc> post}}</source>`,
` </segment>`,
` </unit>`,
` </file>`,

View File

@ -30,6 +30,18 @@ runInEachFileSystem(() => {
mockMessage('24680', ['a'], [], {meaning: 'meaning', description: 'and description'}),
mockMessage('80808', ['multi\nlines'], [], {}),
mockMessage('90000', ['<escape', 'me>'], ['double-quotes-"'], {}),
mockMessage(
'100000',
[
'pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU'
],
[], {}),
mockMessage(
'100001',
[
'{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'
],
[], {}),
];
const serializer = new XmbTranslationSerializer(absoluteFrom('/project'), useLegacyIds);
const output = serializer.serialize(messages);
@ -44,6 +56,8 @@ runInEachFileSystem(() => {
` <msg id="24680" desc="and description" meaning="meaning">a</msg>`,
` <msg id="80808">multi`, `lines</msg>`,
` <msg id="90000">&lt;escape<ph name="double-quotes-&quot;"/>me&gt;</msg>`,
` <msg id="100000">pre-ICU {VAR_SELECT, select, a {a} b {<ph name="INTERPOLATION"/>} c {pre <ph name="INTERPOLATION_1"/> post}} post-ICU</msg>`,
` <msg id="100001">{VAR_PLURAL, plural, one {<ph name="START_BOLD_TEXT"/>something bold<ph name="CLOSE_BOLD_TEXT"/>} other {pre <ph name="START_TAG_SPAN"/>middle<ph name="CLOSE_TAG_SPAN"/> post}}</msg>`,
`</messagebundle>\n`
].join('\n'));
});