fix(compiler): fix utf8encode, move to sharted utils, add tests (#15076)
This commit is contained in:
parent
3b1956bbf2
commit
959a03a61f
|
@ -6,6 +6,8 @@
|
|||
* found in the LICENSE file at https://angular.io/license
|
||||
*/
|
||||
|
||||
import {utf8Encode} from '../util';
|
||||
|
||||
import * as i18n from './i18n_ast';
|
||||
|
||||
export function digest(message: i18n.Message): string {
|
||||
|
@ -210,49 +212,6 @@ enum Endian {
|
|||
Big,
|
||||
}
|
||||
|
||||
// TODO(vicb): move this to some shared place, as we also need it
|
||||
// for SourceMaps.
|
||||
export function utf8Encode(str: string): string {
|
||||
let encoded: string = '';
|
||||
|
||||
for (let index = 0; index < str.length; index++) {
|
||||
const codePoint = decodeSurrogatePairs(str, index);
|
||||
|
||||
if (codePoint <= 0x7f) {
|
||||
encoded += String.fromCharCode(codePoint);
|
||||
} else if (codePoint <= 0x7ff) {
|
||||
encoded += String.fromCharCode(0xc0 | codePoint >>> 6, 0x80 | codePoint & 0x3f);
|
||||
} else if (codePoint <= 0xffff) {
|
||||
encoded += String.fromCharCode(
|
||||
0xe0 | codePoint >>> 12, 0x80 | codePoint >>> 6 & 0x3f, 0x80 | codePoint & 0x3f);
|
||||
} else if (codePoint <= 0x1fffff) {
|
||||
encoded += String.fromCharCode(
|
||||
0xf0 | codePoint >>> 18, 0x80 | codePoint >>> 12 & 0x3f, 0x80 | codePoint >>> 6 & 0x3f,
|
||||
0x80 | codePoint & 0x3f);
|
||||
}
|
||||
}
|
||||
|
||||
return encoded;
|
||||
}
|
||||
|
||||
// see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||
function decodeSurrogatePairs(str: string, index: number): number {
|
||||
if (index < 0 || index >= str.length) {
|
||||
throw new Error(`index=${index} is out of range in "${str}"`);
|
||||
}
|
||||
|
||||
const high = str.charCodeAt(index);
|
||||
|
||||
if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) {
|
||||
const low = byteAt(str, index + 1);
|
||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||
return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
|
||||
}
|
||||
}
|
||||
|
||||
return high;
|
||||
}
|
||||
|
||||
function add32(a: number, b: number): number {
|
||||
return add32to64(a, b)[1];
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
* found in the LICENSE file at https://angular.io/license
|
||||
*/
|
||||
|
||||
import {utf8Encode} from '../i18n/digest';
|
||||
import {utf8Encode} from '../util';
|
||||
|
||||
// https://docs.google.com/document/d/1U1RGAehQwRypUTovF1KRlpiOFze0b-_2gc6fAH0KY0k/edit
|
||||
const VERSION = 3;
|
||||
|
|
|
@ -98,3 +98,35 @@ const STRING_MAP_PROTO = Object.getPrototypeOf({});
|
|||
function isStrictStringMap(obj: any): boolean {
|
||||
return typeof obj === 'object' && obj !== null && Object.getPrototypeOf(obj) === STRING_MAP_PROTO;
|
||||
}
|
||||
|
||||
export function utf8Encode(str: string): string {
|
||||
let encoded = '';
|
||||
for (let index = 0; index < str.length; index++) {
|
||||
let codePoint = str.charCodeAt(index);
|
||||
|
||||
// decode surrogate
|
||||
// see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||
if (codePoint >= 0xd800 && codePoint <= 0xdbff && str.length > (index + 1)) {
|
||||
const low = str.charCodeAt(index + 1);
|
||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||
index++;
|
||||
codePoint = ((codePoint - 0xd800) << 10) + low - 0xdc00 + 0x10000;
|
||||
}
|
||||
}
|
||||
|
||||
if (codePoint <= 0x7f) {
|
||||
encoded += String.fromCharCode(codePoint);
|
||||
} else if (codePoint <= 0x7ff) {
|
||||
encoded += String.fromCharCode(((codePoint >> 6) & 0x1F) | 0xc0, (codePoint & 0x3f) | 0x80);
|
||||
} else if (codePoint <= 0xffff) {
|
||||
encoded += String.fromCharCode(
|
||||
(codePoint >> 12) | 0xe0, ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80);
|
||||
} else if (codePoint <= 0x1fffff) {
|
||||
encoded += String.fromCharCode(
|
||||
((codePoint >> 18) & 0x07) | 0xf0, ((codePoint >> 12) & 0x3f) | 0x80,
|
||||
((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80);
|
||||
}
|
||||
}
|
||||
|
||||
return encoded;
|
||||
}
|
||||
|
|
|
@ -7,9 +7,7 @@
|
|||
*/
|
||||
|
||||
import {fakeAsync} from '@angular/core/testing/src/fake_async';
|
||||
import {describe, expect, it} from '@angular/core/testing/src/testing_internal';
|
||||
|
||||
import {SyncAsyncResult, escapeRegExp, splitAtColon} from '../src/util';
|
||||
import {SyncAsyncResult, escapeRegExp, splitAtColon, utf8Encode} from '../src/util';
|
||||
|
||||
export function main() {
|
||||
describe('util', () => {
|
||||
|
@ -45,5 +43,45 @@ export function main() {
|
|||
expect(new RegExp(escapeRegExp('a.b')).exec('axb')).toBeFalsy();
|
||||
});
|
||||
});
|
||||
|
||||
describe('utf8encode', () => {
|
||||
// tests from https://github.com/mathiasbynens/wtf-8
|
||||
it('should encode to utf8', () => {
|
||||
const tests = [
|
||||
['abc', 'abc'],
|
||||
// // 1-byte
|
||||
['\0', '\0'],
|
||||
// // 2-byte
|
||||
['\u0080', '\xc2\x80'],
|
||||
['\u05ca', '\xd7\x8a'],
|
||||
['\u07ff', '\xdf\xbf'],
|
||||
// // 3-byte
|
||||
['\u0800', '\xe0\xa0\x80'],
|
||||
['\u2c3c', '\xe2\xb0\xbc'],
|
||||
['\uffff', '\xef\xbf\xbf'],
|
||||
// //4-byte
|
||||
['\uD800\uDC00', '\xF0\x90\x80\x80'],
|
||||
['\uD834\uDF06', '\xF0\x9D\x8C\x86'],
|
||||
['\uDBFF\uDFFF', '\xF4\x8F\xBF\xBF'],
|
||||
// unmatched surrogate halves
|
||||
// high surrogates: 0xD800 to 0xDBFF
|
||||
['\uD800', '\xED\xA0\x80'],
|
||||
['\uD800\uD800', '\xED\xA0\x80\xED\xA0\x80'],
|
||||
['\uD800A', '\xED\xA0\x80A'],
|
||||
['\uD800\uD834\uDF06\uD800', '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80'],
|
||||
['\uD9AF', '\xED\xA6\xAF'],
|
||||
['\uDBFF', '\xED\xAF\xBF'],
|
||||
// low surrogates: 0xDC00 to 0xDFFF
|
||||
['\uDC00', '\xED\xB0\x80'],
|
||||
['\uDC00\uDC00', '\xED\xB0\x80\xED\xB0\x80'],
|
||||
['\uDC00A', '\xED\xB0\x80A'],
|
||||
['\uDC00\uD834\uDF06\uDC00', '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80'],
|
||||
['\uDEEE', '\xED\xBB\xAE'],
|
||||
['\uDFFF', '\xED\xBF\xBF'],
|
||||
];
|
||||
tests.forEach(
|
||||
([input, output]: [string, string]) => { expect(utf8Encode(input)).toEqual(output); });
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
Loading…
Reference in New Issue