diff --git a/packages/compiler/src/i18n/digest.ts b/packages/compiler/src/i18n/digest.ts index f3f5e9d02f..c60b853fb2 100644 --- a/packages/compiler/src/i18n/digest.ts +++ b/packages/compiler/src/i18n/digest.ts @@ -6,6 +6,8 @@ * found in the LICENSE file at https://angular.io/license */ +import {utf8Encode} from '../util'; + import * as i18n from './i18n_ast'; export function digest(message: i18n.Message): string { @@ -210,49 +212,6 @@ enum Endian { Big, } -// TODO(vicb): move this to some shared place, as we also need it -// for SourceMaps. -export function utf8Encode(str: string): string { - let encoded: string = ''; - - for (let index = 0; index < str.length; index++) { - const codePoint = decodeSurrogatePairs(str, index); - - if (codePoint <= 0x7f) { - encoded += String.fromCharCode(codePoint); - } else if (codePoint <= 0x7ff) { - encoded += String.fromCharCode(0xc0 | codePoint >>> 6, 0x80 | codePoint & 0x3f); - } else if (codePoint <= 0xffff) { - encoded += String.fromCharCode( - 0xe0 | codePoint >>> 12, 0x80 | codePoint >>> 6 & 0x3f, 0x80 | codePoint & 0x3f); - } else if (codePoint <= 0x1fffff) { - encoded += String.fromCharCode( - 0xf0 | codePoint >>> 18, 0x80 | codePoint >>> 12 & 0x3f, 0x80 | codePoint >>> 6 & 0x3f, - 0x80 | codePoint & 0x3f); - } - } - - return encoded; -} - -// see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae -function decodeSurrogatePairs(str: string, index: number): number { - if (index < 0 || index >= str.length) { - throw new Error(`index=${index} is out of range in "${str}"`); - } - - const high = str.charCodeAt(index); - - if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) { - const low = byteAt(str, index + 1); - if (low >= 0xdc00 && low <= 0xdfff) { - return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; - } - } - - return high; -} - function add32(a: number, b: number): number { return add32to64(a, b)[1]; } diff --git a/packages/compiler/src/output/source_map.ts b/packages/compiler/src/output/source_map.ts index 20892ea58a..0c940e45e5 100644 --- a/packages/compiler/src/output/source_map.ts +++ b/packages/compiler/src/output/source_map.ts @@ -6,7 +6,7 @@ * found in the LICENSE file at https://angular.io/license */ -import {utf8Encode} from '../i18n/digest'; +import {utf8Encode} from '../util'; // https://docs.google.com/document/d/1U1RGAehQwRypUTovF1KRlpiOFze0b-_2gc6fAH0KY0k/edit const VERSION = 3; diff --git a/packages/compiler/src/util.ts b/packages/compiler/src/util.ts index 2207201790..47896101e1 100644 --- a/packages/compiler/src/util.ts +++ b/packages/compiler/src/util.ts @@ -98,3 +98,35 @@ const STRING_MAP_PROTO = Object.getPrototypeOf({}); function isStrictStringMap(obj: any): boolean { return typeof obj === 'object' && obj !== null && Object.getPrototypeOf(obj) === STRING_MAP_PROTO; } + +export function utf8Encode(str: string): string { + let encoded = ''; + for (let index = 0; index < str.length; index++) { + let codePoint = str.charCodeAt(index); + + // decode surrogate + // see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + if (codePoint >= 0xd800 && codePoint <= 0xdbff && str.length > (index + 1)) { + const low = str.charCodeAt(index + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + index++; + codePoint = ((codePoint - 0xd800) << 10) + low - 0xdc00 + 0x10000; + } + } + + if (codePoint <= 0x7f) { + encoded += String.fromCharCode(codePoint); + } else if (codePoint <= 0x7ff) { + encoded += String.fromCharCode(((codePoint >> 6) & 0x1F) | 0xc0, (codePoint & 0x3f) | 0x80); + } else if (codePoint <= 0xffff) { + encoded += String.fromCharCode( + (codePoint >> 12) | 0xe0, ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80); + } else if (codePoint <= 0x1fffff) { + encoded += String.fromCharCode( + ((codePoint >> 18) & 0x07) | 0xf0, ((codePoint >> 12) & 0x3f) | 0x80, + ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80); + } + } + + return encoded; +} diff --git a/packages/compiler/test/util_spec.ts b/packages/compiler/test/util_spec.ts index 9c9d883228..24c959d795 100644 --- a/packages/compiler/test/util_spec.ts +++ b/packages/compiler/test/util_spec.ts @@ -7,9 +7,7 @@ */ import {fakeAsync} from '@angular/core/testing/src/fake_async'; -import {describe, expect, it} from '@angular/core/testing/src/testing_internal'; - -import {SyncAsyncResult, escapeRegExp, splitAtColon} from '../src/util'; +import {SyncAsyncResult, escapeRegExp, splitAtColon, utf8Encode} from '../src/util'; export function main() { describe('util', () => { @@ -45,5 +43,45 @@ export function main() { expect(new RegExp(escapeRegExp('a.b')).exec('axb')).toBeFalsy(); }); }); + + describe('utf8encode', () => { + // tests from https://github.com/mathiasbynens/wtf-8 + it('should encode to utf8', () => { + const tests = [ + ['abc', 'abc'], + // // 1-byte + ['\0', '\0'], + // // 2-byte + ['\u0080', '\xc2\x80'], + ['\u05ca', '\xd7\x8a'], + ['\u07ff', '\xdf\xbf'], + // // 3-byte + ['\u0800', '\xe0\xa0\x80'], + ['\u2c3c', '\xe2\xb0\xbc'], + ['\uffff', '\xef\xbf\xbf'], + // //4-byte + ['\uD800\uDC00', '\xF0\x90\x80\x80'], + ['\uD834\uDF06', '\xF0\x9D\x8C\x86'], + ['\uDBFF\uDFFF', '\xF4\x8F\xBF\xBF'], + // unmatched surrogate halves + // high surrogates: 0xD800 to 0xDBFF + ['\uD800', '\xED\xA0\x80'], + ['\uD800\uD800', '\xED\xA0\x80\xED\xA0\x80'], + ['\uD800A', '\xED\xA0\x80A'], + ['\uD800\uD834\uDF06\uD800', '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80'], + ['\uD9AF', '\xED\xA6\xAF'], + ['\uDBFF', '\xED\xAF\xBF'], + // low surrogates: 0xDC00 to 0xDFFF + ['\uDC00', '\xED\xB0\x80'], + ['\uDC00\uDC00', '\xED\xB0\x80\xED\xB0\x80'], + ['\uDC00A', '\xED\xB0\x80A'], + ['\uDC00\uD834\uDF06\uDC00', '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80'], + ['\uDEEE', '\xED\xBB\xAE'], + ['\uDFFF', '\xED\xBF\xBF'], + ]; + tests.forEach( + ([input, output]: [string, string]) => { expect(utf8Encode(input)).toEqual(output); }); + }); + }); }); -} +} \ No newline at end of file