From d281ea820b15a90d73cb0ba6916ad689707ad98a Mon Sep 17 00:00:00 2001 From: JoostK Date: Sun, 15 Nov 2020 15:53:05 +0100 Subject: [PATCH] perf(compiler): use raw bytes to represent utf-8 encoded strings (#39694) The result of utf-8 encoding a string was represented in a string, where each individual character represented a single byte according to its character code. All usages of this data were interested in the byte itself, so this required conversion from a character back to its code. This commit simply stores the individual bytes in array to avoid the conversion. This yields a ~10% performance improvement for i18n message ID computation. PR Close #39694 --- packages/compiler/src/i18n/digest.ts | 56 +++++++++++----------- packages/compiler/src/output/source_map.ts | 16 +++---- packages/compiler/src/util.ts | 14 +++--- packages/compiler/test/util_spec.ts | 2 +- 4 files changed, 45 insertions(+), 43 deletions(-) diff --git a/packages/compiler/src/i18n/digest.ts b/packages/compiler/src/i18n/digest.ts index cd58fab27a..85a6d629bc 100644 --- a/packages/compiler/src/i18n/digest.ts +++ b/packages/compiler/src/i18n/digest.ts @@ -6,7 +6,7 @@ * found in the LICENSE file at https://angular.io/license */ -import {newArray, utf8Encode} from '../util'; +import {Byte, newArray, utf8Encode} from '../util'; import {BigIntExponentiation} from './big_integer'; import * as i18n from './i18n_ast'; @@ -110,7 +110,7 @@ class _SerializerIgnoreIcuExpVisitor extends _SerializerVisitor { */ export function sha1(str: string): string { const utf8 = utf8Encode(str); - const words32 = stringToWords32(utf8, Endian.Big); + const words32 = bytesToWords32(utf8, Endian.Big); const len = utf8.length * 8; const w = newArray(80); @@ -146,7 +146,7 @@ export function sha1(str: string): string { e = add32(e, h4); } - return byteStringToHexString(words32ToByteString([a, b, c, d, e])); + return bytesToHexString(words32ToByteString([a, b, c, d, e])); } function fk(index: number, b: number, c: number, d: number): [number, number] { @@ -201,25 +201,25 @@ export function computeMsgId(msg: string, meaning: string = ''): string { return wordsToDecimalString(hi & 0x7fffffff, lo); } -function hash32(str: string, c: number): number { +function hash32(bytes: Byte[], c: number): number { let a = 0x9e3779b9, b = 0x9e3779b9; let i: number; - const len = str.length; + const len = bytes.length; for (i = 0; i + 12 <= len; i += 12) { - a = add32(a, wordAt(str, i, Endian.Little)); - b = add32(b, wordAt(str, i + 4, Endian.Little)); - c = add32(c, wordAt(str, i + 8, Endian.Little)); + a = add32(a, wordAt(bytes, i, Endian.Little)); + b = add32(b, wordAt(bytes, i + 4, Endian.Little)); + c = add32(c, wordAt(bytes, i + 8, Endian.Little)); const res = mix(a, b, c); a = res[0], b = res[1], c = res[2]; } - a = add32(a, wordAt(str, i, Endian.Little)); - b = add32(b, wordAt(str, i + 4, Endian.Little)); + a = add32(a, wordAt(bytes, i, Endian.Little)); + b = add32(b, wordAt(bytes, i + 4, Endian.Little)); // the first byte of c is reserved for the length c = add32(c, len); - c = add32(c, wordAt(str, i + 8, Endian.Little) << 8); + c = add32(c, wordAt(bytes, i + 8, Endian.Little) << 8); return mix(a, b, c)[2]; } @@ -285,51 +285,51 @@ function rol64(num: [number, number], count: number): [number, number] { return [h, l]; } -function stringToWords32(str: string, endian: Endian): number[] { - const size = (str.length + 3) >>> 2; +function bytesToWords32(bytes: Byte[], endian: Endian): number[] { + const size = (bytes.length + 3) >>> 2; const words32 = []; for (let i = 0; i < size; i++) { - words32[i] = wordAt(str, i * 4, endian); + words32[i] = wordAt(bytes, i * 4, endian); } return words32; } -function byteAt(str: string, index: number): number { - return index >= str.length ? 0 : str.charCodeAt(index) & 0xff; +function byteAt(bytes: Byte[], index: number): Byte { + return index >= bytes.length ? 0 : bytes[index]; } -function wordAt(str: string, index: number, endian: Endian): number { +function wordAt(bytes: Byte[], index: number, endian: Endian): number { let word = 0; if (endian === Endian.Big) { for (let i = 0; i < 4; i++) { - word += byteAt(str, index + i) << (24 - 8 * i); + word += byteAt(bytes, index + i) << (24 - 8 * i); } } else { for (let i = 0; i < 4; i++) { - word += byteAt(str, index + i) << 8 * i; + word += byteAt(bytes, index + i) << 8 * i; } } return word; } -function words32ToByteString(words32: number[]): string { - return words32.reduce((str, word) => str + word32ToByteString(word), ''); +function words32ToByteString(words32: number[]): Byte[] { + return words32.reduce((bytes, word) => bytes.concat(word32ToByteString(word)), [] as Byte[]); } -function word32ToByteString(word: number): string { - let str = ''; +function word32ToByteString(word: number): Byte[] { + let bytes: Byte[] = []; for (let i = 0; i < 4; i++) { - str += String.fromCharCode((word >>> 8 * (3 - i)) & 0xff); + bytes.push((word >>> 8 * (3 - i)) & 0xff); } - return str; + return bytes; } -function byteStringToHexString(str: string): string { +function bytesToHexString(bytes: Byte[]): string { let hex: string = ''; - for (let i = 0; i < str.length; i++) { - const b = byteAt(str, i); + for (let i = 0; i < bytes.length; i++) { + const b = byteAt(bytes, i); hex += (b >>> 4).toString(16) + (b & 0x0f).toString(16); } return hex.toLowerCase(); diff --git a/packages/compiler/src/output/source_map.ts b/packages/compiler/src/output/source_map.ts index 5bf7a3956c..915f3ea4fa 100644 --- a/packages/compiler/src/output/source_map.ts +++ b/packages/compiler/src/output/source_map.ts @@ -151,15 +151,15 @@ export class SourceMapGenerator { export function toBase64String(value: string): string { let b64 = ''; - value = utf8Encode(value); - for (let i = 0; i < value.length;) { - const i1 = value.charCodeAt(i++); - const i2 = value.charCodeAt(i++); - const i3 = value.charCodeAt(i++); + const encoded = utf8Encode(value); + for (let i = 0; i < encoded.length;) { + const i1 = encoded[i++]; + const i2 = i < encoded.length ? encoded[i++] : null; + const i3 = i < encoded.length ? encoded[i++] : null; b64 += toBase64Digit(i1 >> 2); - b64 += toBase64Digit(((i1 & 3) << 4) | (isNaN(i2) ? 0 : i2 >> 4)); - b64 += isNaN(i2) ? '=' : toBase64Digit(((i2 & 15) << 2) | (i3 >> 6)); - b64 += isNaN(i2) || isNaN(i3) ? '=' : toBase64Digit(i3 & 63); + b64 += toBase64Digit(((i1 & 3) << 4) | (i2 === null ? 0 : i2 >> 4)); + b64 += i2 === null ? '=' : toBase64Digit(((i2 & 15) << 2) | (i3 === null ? 0 : i3 >> 6)); + b64 += i2 === null || i3 === null ? '=' : toBase64Digit(i3 & 63); } return b64; diff --git a/packages/compiler/src/util.ts b/packages/compiler/src/util.ts index 0d58b083e1..8c62653f09 100644 --- a/packages/compiler/src/util.ts +++ b/packages/compiler/src/util.ts @@ -132,8 +132,10 @@ function isStrictStringMap(obj: any): boolean { return typeof obj === 'object' && obj !== null && Object.getPrototypeOf(obj) === STRING_MAP_PROTO; } -export function utf8Encode(str: string): string { - let encoded = ''; +export type Byte = number; + +export function utf8Encode(str: string): Byte[] { + let encoded: Byte[] = []; for (let index = 0; index < str.length; index++) { let codePoint = str.charCodeAt(index); @@ -148,14 +150,14 @@ export function utf8Encode(str: string): string { } if (codePoint <= 0x7f) { - encoded += String.fromCharCode(codePoint); + encoded.push(codePoint); } else if (codePoint <= 0x7ff) { - encoded += String.fromCharCode(((codePoint >> 6) & 0x1F) | 0xc0, (codePoint & 0x3f) | 0x80); + encoded.push(((codePoint >> 6) & 0x1F) | 0xc0, (codePoint & 0x3f) | 0x80); } else if (codePoint <= 0xffff) { - encoded += String.fromCharCode( + encoded.push( (codePoint >> 12) | 0xe0, ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80); } else if (codePoint <= 0x1fffff) { - encoded += String.fromCharCode( + encoded.push( ((codePoint >> 18) & 0x07) | 0xf0, ((codePoint >> 12) & 0x3f) | 0x80, ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80); } diff --git a/packages/compiler/test/util_spec.ts b/packages/compiler/test/util_spec.ts index 43b19d5c6d..0a9d49a720 100644 --- a/packages/compiler/test/util_spec.ts +++ b/packages/compiler/test/util_spec.ts @@ -73,7 +73,7 @@ import {escapeRegExp, partitionArray, splitAtColon, stringify, utf8Encode} from ['\uDFFF', '\xED\xBF\xBF'], ]; tests.forEach(([input, output]) => { - expect(utf8Encode(input)).toEqual(output); + expect(utf8Encode(input).map(byte => String.fromCharCode(byte)).join('')).toEqual(output); }); }); });