diff --git a/packages/localize/src/tools/src/extract/translation_files/icu_parsing.ts b/packages/localize/src/tools/src/extract/translation_files/icu_parsing.ts
new file mode 100644
index 0000000000..b2e00f85f5
--- /dev/null
+++ b/packages/localize/src/tools/src/extract/translation_files/icu_parsing.ts
@@ -0,0 +1,216 @@
+/**
+ * @license
+ * Copyright Google LLC All Rights Reserved.
+ *
+ * Use of this source code is governed by an MIT-style license that can be
+ * found in the LICENSE file at https://angular.io/license
+ */
+
+/**
+ * Split the given `text` into an array of "static strings" and ICU "placeholder names".
+ *
+ * This is required because ICU expressions in `$localize` tagged messages may contain "dynamic"
+ * piece (e.g. interpolations or element markers). These markers need to be translated to
+ * placeholders in extracted translation files. So we must parse ICUs to identify them and separate
+ * them out so that the translation serializers can render them appropriately.
+ *
+ * An example of an ICU with interpolations:
+ *
+ * ```
+ * {VAR_PLURAL, plural, one {{INTERPOLATION}} other {{INTERPOLATION_1} post}}
+ * ```
+ *
+ * In this ICU, `INTERPOLATION` and `INTERPOLATION_1` are actually placeholders that will be
+ * replaced with dynamic content at runtime.
+ *
+ * Such placeholders are identifiable as text wrapped in curly braces, within an ICU case
+ * expression.
+ *
+ * To complicate matters, it is possible for ICUs to be nested indefinitely within each other. In
+ * such cases, the nested ICU expression appears enclosed in a set of curly braces in the same way
+ * as a placeholder. The nested ICU expressions can be differentiated from placeholders as they
+ * contain a comma `,`, which separates the ICU value from the ICU type.
+ *
+ * Furthermore, nested ICUs can have placeholders of their own, which need to be extracted.
+ *
+ * An example of a nested ICU containing its own placeholders:
+ *
+ * ```
+ * {VAR_SELECT_1, select,
+ * invoice {Invoice for {INTERPOLATION}}
+ * payment {{VAR_SELECT, select,
+ * processor {Payment gateway}
+ * other {{INTERPOLATION_1}}
+ * }}
+ * ```
+ *
+ * @param text Text to be broken.
+ * @returns an array of strings, where
+ * - even values are static strings (e.g. 0, 2, 4, etc)
+ * - odd values are placeholder names (e.g. 1, 3, 5, etc)
+ */
+export function extractIcuPlaceholders(text: string): string[] {
+ const state = new StateStack();
+ const pieces = new IcuPieces();
+ const braces = /[{}]/g;
+
+ let lastPos = 0;
+ let match: RegExpMatchArray|null;
+ while (match = braces.exec(text)) {
+ if (match[0] == '{') {
+ state.enterBlock();
+ } else {
+ // We must have hit a `}`
+ state.leaveBlock();
+ }
+
+ if (state.getCurrent() === 'placeholder') {
+ const name = tryParsePlaceholder(text, braces.lastIndex);
+ if (name) {
+ // We found a placeholder so store it in the pieces;
+ // store the current static text (minus the opening curly brace);
+ // skip the closing brace and leave the placeholder block.
+ pieces.addText(text.substring(lastPos, braces.lastIndex - 1));
+ pieces.addPlaceholder(name);
+ braces.lastIndex += name.length + 1;
+ state.leaveBlock();
+ } else {
+ // This is not a placeholder, so it must be a nested ICU;
+ // store the current static text (including the opening curly brace).
+ pieces.addText(text.substring(lastPos, braces.lastIndex));
+ state.nestedIcu();
+ }
+ } else {
+ pieces.addText(text.substring(lastPos, braces.lastIndex));
+ }
+ lastPos = braces.lastIndex;
+ }
+
+ // Capture the last piece of text after the ICUs (if any).
+ pieces.addText(text.substring(lastPos));
+ return pieces.toArray();
+}
+
+/**
+ * A helper class to store the pieces ("static text" or "placeholder name") in an ICU.
+ */
+class IcuPieces {
+ private pieces: string[] = [''];
+
+ /**
+ * Add the given `text` to the current "static text" piece.
+ *
+ * Sequential calls to `addText()` will append to the current text piece.
+ */
+ addText(text: string): void {
+ this.pieces[this.pieces.length - 1] += text;
+ }
+
+ /**
+ * Add the given placeholder `name` to the stored pieces.
+ */
+ addPlaceholder(name: string): void {
+ this.pieces.push(name);
+ this.pieces.push('');
+ }
+
+ /**
+ * Return the stored pieces as an array of strings.
+ *
+ * Even values are static strings (e.g. 0, 2, 4, etc)
+ * Odd values are placeholder names (e.g. 1, 3, 5, etc)
+ */
+ toArray(): string[] {
+ return this.pieces;
+ }
+}
+
+/**
+ * A helper class to track the current state of parsing the strings for ICU placeholders.
+ *
+ * State changes happen when we enter or leave a curly brace block.
+ * Since ICUs can be nested the state is stored as a stack.
+ */
+class StateStack {
+ private stack: ParserState[] = [];
+
+ /**
+ * Update the state upon entering a block.
+ *
+ * The new state is computed from the current state and added to the stack.
+ */
+ enterBlock(): void {
+ const current = this.getCurrent();
+ switch (current) {
+ case 'icu':
+ this.stack.push('case');
+ break;
+ case 'case':
+ this.stack.push('placeholder');
+ break;
+ case 'placeholder':
+ this.stack.push('case');
+ break;
+ default:
+ this.stack.push('icu');
+ break;
+ }
+ }
+
+ /**
+ * Update the state upon leaving a block.
+ *
+ * The previous state is popped off the stack.
+ */
+ leaveBlock(): ParserState {
+ return this.stack.pop();
+ }
+
+ /**
+ * Update the state upon arriving at a nested ICU.
+ *
+ * In this case, the current state of "placeholder" is incorrect, so this is popped off and the
+ * correct "icu" state is stored.
+ */
+ nestedIcu(): void {
+ const current = this.stack.pop();
+ assert(current === 'placeholder', 'A nested ICU must replace a placeholder but got ' + current);
+ this.stack.push('icu');
+ }
+
+ /**
+ * Get the current (most recent) state from the stack.
+ */
+ getCurrent() {
+ return this.stack[this.stack.length - 1];
+ }
+}
+type ParserState = 'icu'|'case'|'placeholder'|undefined;
+
+/**
+ * Attempt to parse a simple placeholder name from a curly braced block.
+ *
+ * If the block contains a comma `,` then it cannot be a placeholder - and is probably a nest ICU
+ * instead.
+ *
+ * @param text the whole string that is being parsed.
+ * @param start the index of the character in the `text` string where this placeholder may start.
+ * @returns the placeholder name or `null` if it is not a placeholder.
+ */
+function tryParsePlaceholder(text: string, start: number): string|null {
+ for (let i = start; i < text.length; i++) {
+ if (text[i] === ',') {
+ break;
+ }
+ if (text[i] === '}') {
+ return text.substring(start, i);
+ }
+ }
+ return null;
+}
+
+function assert(test: boolean, message: string): void {
+ if (!test) {
+ throw new Error('Assertion failure: ' + message);
+ }
+}
diff --git a/packages/localize/src/tools/src/extract/translation_files/xliff1_translation_serializer.ts b/packages/localize/src/tools/src/extract/translation_files/xliff1_translation_serializer.ts
index 8c1afb69f2..cd0b13ba5d 100644
--- a/packages/localize/src/tools/src/extract/translation_files/xliff1_translation_serializer.ts
+++ b/packages/localize/src/tools/src/extract/translation_files/xliff1_translation_serializer.ts
@@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';
+import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
@@ -63,11 +64,22 @@ export class Xliff1TranslationSerializer implements TranslationSerializer {
}
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
- xml.text(message.messageParts[0]);
- for (let i = 1; i < message.messageParts.length; i++) {
- xml.startTag('x', {id: message.placeholderNames[i - 1]}, {selfClosing: true});
- xml.text(message.messageParts[i]);
+ const length = message.messageParts.length - 1;
+ for (let i = 0; i < length; i++) {
+ this.serializeTextPart(xml, message.messageParts[i]);
+ xml.startTag('x', {id: message.placeholderNames[i]}, {selfClosing: true});
}
+ this.serializeTextPart(xml, message.messageParts[length]);
+ }
+
+ private serializeTextPart(xml: XmlFile, text: string): void {
+ const pieces = extractIcuPlaceholders(text);
+ const length = pieces.length - 1;
+ for (let i = 0; i < length; i += 2) {
+ xml.text(pieces[i]);
+ xml.startTag('x', {id: pieces[i + 1]}, {selfClosing: true});
+ }
+ xml.text(pieces[length]);
}
private serializeNote(xml: XmlFile, name: string, value: string): void {
diff --git a/packages/localize/src/tools/src/extract/translation_files/xliff2_translation_serializer.ts b/packages/localize/src/tools/src/extract/translation_files/xliff2_translation_serializer.ts
index 31f2a42eeb..d3aa29d5fc 100644
--- a/packages/localize/src/tools/src/extract/translation_files/xliff2_translation_serializer.ts
+++ b/packages/localize/src/tools/src/extract/translation_files/xliff2_translation_serializer.ts
@@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage} from '@angular/localize';
+import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
@@ -22,6 +23,7 @@ const MAX_LEGACY_XLIFF_2_MESSAGE_LENGTH = 20;
* @see Xliff2TranslationParser
*/
export class Xliff2TranslationSerializer implements TranslationSerializer {
+ private currentPlaceholderId = 0;
constructor(
private sourceLocale: string, private basePath: AbsoluteFsPath,
private useLegacyIds: boolean) {}
@@ -74,21 +76,38 @@ export class Xliff2TranslationSerializer implements TranslationSerializer {
}
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
- xml.text(message.messageParts[0]);
- for (let i = 1; i < message.messageParts.length; i++) {
- const placeholderName = message.placeholderNames[i - 1];
- if (placeholderName.startsWith('START_')) {
- xml.startTag('pc', {
- id: `${i}`,
- equivStart: placeholderName,
- equivEnd: placeholderName.replace(/^START/, 'CLOSE')
- });
- } else if (placeholderName.startsWith('CLOSE_')) {
- xml.endTag('pc');
- } else {
- xml.startTag('ph', {id: `${i}`, equiv: placeholderName}, {selfClosing: true});
- }
- xml.text(message.messageParts[i]);
+ this.currentPlaceholderId = 0;
+ const length = message.messageParts.length - 1;
+ for (let i = 0; i < length; i++) {
+ this.serializeTextPart(xml, message.messageParts[i]);
+ this.serializePlaceholder(xml, message.placeholderNames[i]);
+ }
+ this.serializeTextPart(xml, message.messageParts[length]);
+ }
+
+ private serializeTextPart(xml: XmlFile, text: string): void {
+ const pieces = extractIcuPlaceholders(text);
+ const length = pieces.length - 1;
+ for (let i = 0; i < length; i += 2) {
+ xml.text(pieces[i]);
+ this.serializePlaceholder(xml, pieces[i + 1]);
+ }
+ xml.text(pieces[length]);
+ }
+
+ private serializePlaceholder(xml: XmlFile, placeholderName: string): void {
+ if (placeholderName.startsWith('START_')) {
+ xml.startTag('pc', {
+ id: `${this.currentPlaceholderId++}`,
+ equivStart: placeholderName,
+ equivEnd: placeholderName.replace(/^START/, 'CLOSE')
+ });
+ } else if (placeholderName.startsWith('CLOSE_')) {
+ xml.endTag('pc');
+ } else {
+ xml.startTag(
+ 'ph', {id: `${this.currentPlaceholderId++}`, equiv: placeholderName},
+ {selfClosing: true});
}
}
diff --git a/packages/localize/src/tools/src/extract/translation_files/xmb_translation_serializer.ts b/packages/localize/src/tools/src/extract/translation_files/xmb_translation_serializer.ts
index ffb4383409..574173ed56 100644
--- a/packages/localize/src/tools/src/extract/translation_files/xmb_translation_serializer.ts
+++ b/packages/localize/src/tools/src/extract/translation_files/xmb_translation_serializer.ts
@@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';
+import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
@@ -77,11 +78,22 @@ export class XmbTranslationSerializer implements TranslationSerializer {
}
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
- xml.text(message.messageParts[0]);
- for (let i = 1; i < message.messageParts.length; i++) {
- xml.startTag('ph', {name: message.placeholderNames[i - 1]}, {selfClosing: true});
- xml.text(message.messageParts[i]);
+ const length = message.messageParts.length - 1;
+ for (let i = 0; i < length; i++) {
+ this.serializeTextPart(xml, message.messageParts[i]);
+ xml.startTag('ph', {name: message.placeholderNames[i]}, {selfClosing: true});
}
+ this.serializeTextPart(xml, message.messageParts[length]);
+ }
+
+ private serializeTextPart(xml: XmlFile, text: string): void {
+ const pieces = extractIcuPlaceholders(text);
+ const length = pieces.length - 1;
+ for (let i = 0; i < length; i += 2) {
+ xml.text(pieces[i]);
+ xml.startTag('ph', {name: pieces[i + 1]}, {selfClosing: true});
+ }
+ xml.text(pieces[length]);
}
/**
diff --git a/packages/localize/src/tools/test/extract/integration/main_spec.ts b/packages/localize/src/tools/test/extract/integration/main_spec.ts
index 7e5fd37fc6..ae2d27dd16 100644
--- a/packages/localize/src/tools/test/extract/integration/main_spec.ts
+++ b/packages/localize/src/tools/test/extract/integration/main_spec.ts
@@ -175,12 +175,12 @@ runInEachFileSystem(() => {
` `,
` `,
` `,
- ` `,
+ ` `,
` `,
` `,
` `,
` `,
- ` `,
+ ` `,
` `,
` `,
` `,
diff --git a/packages/localize/src/tools/test/extract/translation_files/icu_parsing_spec.ts b/packages/localize/src/tools/test/extract/translation_files/icu_parsing_spec.ts
new file mode 100644
index 0000000000..092cdf6bc5
--- /dev/null
+++ b/packages/localize/src/tools/test/extract/translation_files/icu_parsing_spec.ts
@@ -0,0 +1,76 @@
+/**
+ * @license
+ * Copyright Google LLC All Rights Reserved.
+ *
+ * Use of this source code is governed by an MIT-style license that can be
+ * found in the LICENSE file at https://angular.io/license
+ */
+
+import {extractIcuPlaceholders} from '../../../src/extract/translation_files/icu_parsing';
+
+describe('extractIcuPlaceholders()', () => {
+ it('should return a single string if there is no ICU', () => {
+ expect(extractIcuPlaceholders('')).toEqual(['']);
+ expect(extractIcuPlaceholders('some text')).toEqual(['some text']);
+ expect(extractIcuPlaceholders('some } text')).toEqual(['some } text']);
+ expect(extractIcuPlaceholders('this is {not an ICU}')).toEqual(['this is {not an ICU}']);
+ });
+
+ it('should return a single string if there are no ICU placeholders', () => {
+ expect(extractIcuPlaceholders('{VAR_PLURAL, plural, one {SOME} few {FEW} other {OTHER}}'))
+ .toEqual(['{VAR_PLURAL, plural, one {SOME} few {FEW} other {OTHER}}']);
+ expect(extractIcuPlaceholders('{VAR_SELECT, select, male {HE} female {SHE} other {XE}}'))
+ .toEqual(['{VAR_SELECT, select, male {HE} female {SHE} other {XE}}']);
+ });
+
+ it('should split out simple interpolation placeholders', () => {
+ expect(
+ extractIcuPlaceholders(
+ '{VAR_PLURAL, plural, one {{INTERPOLATION}} few {pre {INTERPOLATION_1}} other {{INTERPOLATION_2} post}}'))
+ .toEqual([
+ '{VAR_PLURAL, plural, one {',
+ 'INTERPOLATION',
+ '} few {pre ',
+ 'INTERPOLATION_1',
+ '} other {',
+ 'INTERPOLATION_2',
+ ' post}}',
+ ]);
+ });
+
+ it('should split out element placeholders', () => {
+ expect(
+ extractIcuPlaceholders(
+ '{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'))
+ .toEqual([
+ '{VAR_PLURAL, plural, one {',
+ 'START_BOLD_TEXT',
+ 'something bold',
+ 'CLOSE_BOLD_TEXT',
+ '} other {pre ',
+ 'START_TAG_SPAN',
+ 'middle',
+ 'CLOSE_TAG_SPAN',
+ ' post}}',
+ ]);
+ });
+
+ it('should handle nested ICUs', () => {
+ expect(extractIcuPlaceholders([
+ '{VAR_SELECT_1, select,',
+ ' invoice {Invoice for {INTERPOLATION}}',
+ ' payment {{VAR_SELECT, select,',
+ ' processor {Payment gateway}',
+ ' other {{INTERPOLATION_1}}',
+ ' }}',
+ '}',
+ ].join('\n')))
+ .toEqual([
+ '{VAR_SELECT_1, select,\n invoice {Invoice for ',
+ 'INTERPOLATION',
+ '}\n payment {{VAR_SELECT, select,\n processor {Payment gateway}\n other {',
+ 'INTERPOLATION_1',
+ '}\n }}\n}',
+ ]);
+ });
+});
\ No newline at end of file
diff --git a/packages/localize/src/tools/test/extract/translation_files/json_translation_serializer_spec.ts b/packages/localize/src/tools/test/extract/translation_files/json_translation_serializer_spec.ts
index bcfa7bd36e..bd4bb50e88 100644
--- a/packages/localize/src/tools/test/extract/translation_files/json_translation_serializer_spec.ts
+++ b/packages/localize/src/tools/test/extract/translation_files/json_translation_serializer_spec.ts
@@ -22,6 +22,19 @@ describe('JsonTranslationSerializer', () => {
mockMessage('13579', ['', 'b', ''], ['START_BOLD_TEXT', 'CLOSE_BOLD_TEXT'], {}),
mockMessage('24680', ['a'], [], {meaning: 'meaning', description: 'and description'}),
mockMessage('80808', ['multi\nlines'], [], {}),
+ mockMessage('90000', [''], ['double-quotes-"'], {}),
+ mockMessage(
+ '100000',
+ [
+ 'pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU'
+ ],
+ [], {}),
+ mockMessage(
+ '100001',
+ [
+ '{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'
+ ],
+ [], {}),
];
const serializer = new SimpleJsonTranslationSerializer('xx');
const output = serializer.serialize(messages);
@@ -33,7 +46,10 @@ describe('JsonTranslationSerializer', () => {
` "13579": "{$START_BOLD_TEXT}b{$CLOSE_BOLD_TEXT}",`,
` "24680": "a",`,
` "67890": "a{$START_TAG_SPAN}{$CLOSE_TAG_SPAN}c",`,
- ` "80808": "multi\\nlines"`,
+ ` "80808": "multi\\nlines",`,
+ ` "90000": "",`,
+ ` "100000": "pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU",`,
+ ` "100001": "{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}"`,
` }`,
`}`,
].join('\n'));
diff --git a/packages/localize/src/tools/test/extract/translation_files/xliff1_translation_serializer_spec.ts b/packages/localize/src/tools/test/extract/translation_files/xliff1_translation_serializer_spec.ts
index 2b89404fe2..a0e832ffb0 100644
--- a/packages/localize/src/tools/test/extract/translation_files/xliff1_translation_serializer_spec.ts
+++ b/packages/localize/src/tools/test/extract/translation_files/xliff1_translation_serializer_spec.ts
@@ -34,7 +34,19 @@ runInEachFileSystem(() => {
mockMessage('13579', ['', 'b', ''], ['START_BOLD_TEXT', 'CLOSE_BOLD_TEXT'], {}),
mockMessage('24680', ['a'], [], {meaning: 'meaning', description: 'and description'}),
mockMessage('80808', ['multi\nlines'], [], {}),
- mockMessage('90000', [''], ['double-quotes-"'], {})
+ mockMessage('90000', [''], ['double-quotes-"'], {}),
+ mockMessage(
+ '100000',
+ [
+ 'pre-ICU {VAR_SELECT, select, a {a} b {{INTERPOLATION}} c {pre {INTERPOLATION_1} post}} post-ICU'
+ ],
+ [], {}),
+ mockMessage(
+ '100001',
+ [
+ '{VAR_PLURAL, plural, one {{START_BOLD_TEXT}something bold{CLOSE_BOLD_TEXT}} other {pre {START_TAG_SPAN}middle{CLOSE_TAG_SPAN} post}}'
+ ],
+ [], {}),
];
const serializer =
new Xliff1TranslationSerializer('xx', absoluteFrom('/project'), useLegacyIds);
@@ -73,6 +85,12 @@ runInEachFileSystem(() => {
` `,
` `,
` `,
+ ` `,
+ ` `,
+ ` `,
+ ` `,
+ ` `,
+ ` `,
`