perf(compiler): optimize computation of i18n message ids (#39694)

Message ID computation makes extensive use of big integer
multiplications in order to translate the message's fingerprint into
a numerical representation. In large compilations with heavy use of i18n
this was showing up high in profiler sessions.

There are two factors contributing to the bottleneck:

1. a suboptimal big integer representation using strings, which requires
repeated allocation and conversion from a character to numeric digits
and back.
2. repeated computation of the necessary base-256 exponents and their
multiplication factors.

The first bottleneck is addressed using a representation that uses an
array of individual digits. This avoids repeated conversion and
allocation overhead is also greatly reduced, as adding two big integers
can now be done in-place with virtually no memory allocations.

The second point is addressed by a memoized exponentiation pool to
optimize the multiplication of a base-256 exponent.

As an additional optimization are the two 32-bit words now converted to
decimal per word, instead of going through an intermediate byte buffer
and doing the decimal conversion per byte.

The results of these optimizations depend a lot on the number of i18n
messages for which a message should be computed. Benchmarks have shown
that computing message IDs is now ~6x faster for 1,000 messages, ~14x
faster for 10,000 messages, and ~24x faster for 100,000 messages.

PR Close #39694
This commit is contained in:
JoostK 2020-11-14 22:57:26 +01:00 committed by atscott
parent 9f20942f89
commit 604b4e46c8
3 changed files with 430 additions and 38 deletions

View File

@ -0,0 +1,196 @@
/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
/**
* Represents a big integer using a buffer of its individual digits, with the least significant
* digit stored at the beginning of the array (little endian).
*
* For performance reasons, each instance is mutable. The addition operation can be done in-place
* to reduce memory pressure of allocation for the digits array.
*/
export class BigInteger {
static zero(): BigInteger {
return new BigInteger([0]);
}
static one(): BigInteger {
return new BigInteger([1]);
}
/**
* Creates a big integer using its individual digits in little endian storage.
*/
private constructor(private readonly digits: number[]) {}
/**
* Creates a clone of this instance.
*/
clone(): BigInteger {
return new BigInteger(this.digits.slice());
}
/**
* Returns a new big integer with the sum of `this` and `other` as its value. This does not mutate
* `this` but instead returns a new instance, unlike `addToSelf`.
*/
add(other: BigInteger): BigInteger {
const result = this.clone();
result.addToSelf(other);
return result;
}
/**
* Adds `other` to the instance itself, thereby mutating its value.
*/
addToSelf(other: BigInteger): void {
const maxNrOfDigits = Math.max(this.digits.length, other.digits.length);
let carry = 0;
for (let i = 0; i < maxNrOfDigits; i++) {
let digitSum = carry;
if (i < this.digits.length) {
digitSum += this.digits[i];
}
if (i < other.digits.length) {
digitSum += other.digits[i];
}
if (digitSum >= 10) {
this.digits[i] = digitSum - 10;
carry = 1;
} else {
this.digits[i] = digitSum;
carry = 0;
}
}
// Apply a remaining carry if needed.
if (carry > 0) {
this.digits[maxNrOfDigits] = 1;
}
}
/**
* Builds the decimal string representation of the big integer. As this is stored in
* little endian, the digits are concatenated in reverse order.
*/
toString(): string {
let res = '';
for (let i = this.digits.length - 1; i >= 0; i--) {
res += this.digits[i];
}
return res;
}
}
/**
* Represents a big integer which is optimized for multiplication operations, as its power-of-twos
* are memoized. See `multiplyBy()` for details on the multiplication algorithm.
*/
export class BigIntForMultiplication {
/**
* Stores all memoized power-of-twos, where each index represents `this.number * 2^index`.
*/
private readonly powerOfTwos: BigInteger[];
constructor(value: BigInteger) {
this.powerOfTwos = [value];
}
/**
* Returns the big integer itself.
*/
getValue(): BigInteger {
return this.powerOfTwos[0];
}
/**
* Computes the value for `num * b`, where `num` is a JS number and `b` is a big integer. The
* value for `b` is represented by a storage model that is optimized for this computation.
*
* This operation is implemented in N(log2(num)) by continuous halving of the number, where the
* least-significant bit (LSB) is tested in each iteration. If the bit is set, the bit's index is
* used as exponent into the power-of-two multiplication of `b`.
*
* As an example, consider the multiplication num=42, b=1337. In binary 42 is 0b00101010 and the
* algorithm unrolls into the following iterations:
*
* Iteration | num | LSB | b * 2^iter | Add? | product
* -----------|------------|------|------------|------|--------
* 0 | 0b00101010 | 0 | 1337 | No | 0
* 1 | 0b00010101 | 1 | 2674 | Yes | 2674
* 2 | 0b00001010 | 0 | 5348 | No | 2674
* 3 | 0b00000101 | 1 | 10696 | Yes | 13370
* 4 | 0b00000010 | 0 | 21392 | No | 13370
* 5 | 0b00000001 | 1 | 42784 | Yes | 56154
* 6 | 0b00000000 | 0 | 85568 | No | 56154
*
* The computed product of 56154 is indeed the correct result.
*
* The `BigIntForMultiplication` representation for a big integer provides memoized access to the
* power-of-two values to reduce the workload in computing those values.
*/
multiplyBy(num: number): BigInteger {
const product = BigInteger.zero();
this.multiplyByAndAddTo(num, product);
return product;
}
/**
* See `multiplyBy()` for details. This function allows for the computed product to be added
* directly to the provided result big integer.
*/
multiplyByAndAddTo(num: number, result: BigInteger): void {
for (let exponent = 0; num !== 0; num = num >>> 1, exponent++) {
if (num & 1) {
const value = this.getMultipliedByPowerOfTwo(exponent);
result.addToSelf(value);
}
}
}
/**
* Computes and memoizes the big integer value for `this.number * 2^exponent`.
*/
private getMultipliedByPowerOfTwo(exponent: number): BigInteger {
// Compute the powers up until the requested exponent, where each value is computed from its
// predecessor. This is simple as `this.number * 2^(exponent - 1)` only has to be doubled (i.e.
// added to itself) to reach `this.number * 2^exponent`.
for (let i = this.powerOfTwos.length; i <= exponent; i++) {
const previousPower = this.powerOfTwos[i - 1];
this.powerOfTwos[i] = previousPower.add(previousPower);
}
return this.powerOfTwos[exponent];
}
}
/**
* Represents an exponentiation operation for the provided base, of which exponents are computed and
* memoized. The results are represented by a `BigIntForMultiplication` which is tailored for
* multiplication operations by memoizing the power-of-twos. This effectively results in a matrix
* representation that is lazily computed upon request.
*/
export class BigIntExponentiation {
private readonly exponents = [new BigIntForMultiplication(BigInteger.one())];
constructor(private readonly base: number) {}
/**
* Compute the value for `this.base^exponent`, resulting in a big integer that is optimized for
* further multiplication operations.
*/
toThePowerOf(exponent: number): BigIntForMultiplication {
// Compute the results up until the requested exponent, where every value is computed from its
// predecessor. This is because `this.base^(exponent - 1)` only has to be multiplied by `base`
// to reach `this.base^exponent`.
for (let i = this.exponents.length; i <= exponent; i++) {
const value = this.exponents[i - 1].multiplyBy(this.base);
this.exponents[i] = new BigIntForMultiplication(value);
}
return this.exponents[exponent];
}
}

View File

@ -7,6 +7,7 @@
*/ */
import {newArray, utf8Encode} from '../util'; import {newArray, utf8Encode} from '../util';
import {BigIntExponentiation} from './big_integer';
import * as i18n from './i18n_ast'; import * as i18n from './i18n_ast';
@ -197,7 +198,7 @@ export function computeMsgId(msg: string, meaning: string = ''): string {
const hi = msgFingerprint[0]; const hi = msgFingerprint[0];
const lo = msgFingerprint[1]; const lo = msgFingerprint[1];
return byteStringToDecString(words32ToByteString([hi & 0x7fffffff, lo])); return wordsToDecimalString(hi & 0x7fffffff, lo);
} }
function hash32(str: string, c: number): number { function hash32(str: string, c: number): number {
@ -334,43 +335,31 @@ function byteStringToHexString(str: string): string {
return hex.toLowerCase(); return hex.toLowerCase();
} }
// based on http://www.danvk.org/hex2dec.html (JS can not handle more than 56b) /**
function byteStringToDecString(str: string): string { * Create a shared exponentiation pool for base-256 computations. This shared pool provides memoized
let decimal = ''; * power-of-256 results with memoized power-of-two computations for efficient multiplication.
let toThePower = '1'; *
* For our purposes, this can be safely stored as a global without memory concerns. The reason is
* that we encode two words, so only need the 0th (for the low word) and 4th (for the high word)
* exponent.
*/
const base256 = new BigIntExponentiation(256);
for (let i = str.length - 1; i >= 0; i--) { /**
decimal = addBigInt(decimal, numberTimesBigInt(byteAt(str, i), toThePower)); * Represents two 32-bit words as a single decimal number. This requires a big integer storage
toThePower = numberTimesBigInt(256, toThePower); * model as JS numbers are not accurate enough to represent the 64-bit number.
} *
* Based on http://www.danvk.org/hex2dec.html
*/
function wordsToDecimalString(hi: number, lo: number): string {
// Encode the four bytes in lo in the lower digits of the decimal number.
// Note: the multiplication results in lo itself but represented by a big integer using its
// decimal digits.
const decimal = base256.toThePowerOf(0).multiplyBy(lo);
return decimal.split('').reverse().join(''); // Encode the four bytes in hi above the four lo bytes. lo is a maximum of (2^8)^4, which is why
} // this multiplication factor is applied.
base256.toThePowerOf(4).multiplyByAndAddTo(hi, decimal);
// x and y decimal, lowest significant digit first
function addBigInt(x: string, y: string): string { return decimal.toString();
let sum = '';
const len = Math.max(x.length, y.length);
for (let i = 0, carry = 0; i < len || carry; i++) {
const tmpSum = carry + +(x[i] || 0) + +(y[i] || 0);
if (tmpSum >= 10) {
carry = 1;
sum += tmpSum - 10;
} else {
carry = 0;
sum += tmpSum;
}
}
return sum;
}
function numberTimesBigInt(num: number, b: string): string {
let product = '';
let bToThePower = b;
for (; num !== 0; num = num >>> 1) {
if (num & 1) product = addBigInt(product, bToThePower);
bToThePower = addBigInt(bToThePower, bToThePower);
}
return product;
} }

View File

@ -0,0 +1,207 @@
/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
import {BigInteger, BigIntExponentiation, BigIntForMultiplication} from '../../src/i18n/big_integer';
describe('big integers', () => {
describe('add', () => {
it('should add two integers', () => {
const a = createBigInteger(42);
const b = createBigInteger(1337);
expect(a.add(b).toString()).toEqual('1379');
});
it('should add two integers with a carry', () => {
const a = createBigInteger(8);
const b = createBigInteger(995);
expect(a.add(b).toString()).toEqual('1003');
});
it('should add two integers beyond the maximum supported JS integer', () => {
const b31 = createBigInteger(1 << 31);
const b32 = b31.add(b31);
const b33 = b32.add(b32);
const b34 = b33.add(b33);
const b35 = b34.add(b34);
const b36 = b35.add(b35);
const b37 = b36.add(b36);
const b38 = b37.add(b37);
const b39 = b38.add(b38);
const b40 = b39.add(b39);
const b41 = b40.add(b40);
const b42 = b41.add(b41);
const b43 = b42.add(b42);
const b44 = b43.add(b43);
const b45 = b44.add(b44);
const b46 = b45.add(b45);
const b47 = b46.add(b46);
const b48 = b47.add(b47);
const b49 = b48.add(b48);
const b50 = b49.add(b49);
const b51 = b50.add(b50);
const b52 = b51.add(b51);
const b53 = b52.add(b52);
const b54 = b53.add(b53);
const b55 = b54.add(b54);
const b56 = b55.add(b55);
const b57 = b56.add(b56);
const b58 = b57.add(b57);
const b59 = b58.add(b58);
const b60 = b59.add(b59);
const b61 = b60.add(b60);
const b62 = b61.add(b61);
const b63 = b62.add(b62);
const b64 = b63.add(b63);
const b65 = b64.add(b64);
expect(b32.toString()).toEqual('4294967296');
expect(b33.toString()).toEqual('8589934592');
expect(b34.toString()).toEqual('17179869184');
expect(b35.toString()).toEqual('34359738368');
expect(b36.toString()).toEqual('68719476736');
expect(b37.toString()).toEqual('137438953472');
expect(b38.toString()).toEqual('274877906944');
expect(b39.toString()).toEqual('549755813888');
expect(b40.toString()).toEqual('1099511627776');
expect(b41.toString()).toEqual('2199023255552');
expect(b42.toString()).toEqual('4398046511104');
expect(b43.toString()).toEqual('8796093022208');
expect(b44.toString()).toEqual('17592186044416');
expect(b45.toString()).toEqual('35184372088832');
expect(b46.toString()).toEqual('70368744177664');
expect(b47.toString()).toEqual('140737488355328');
expect(b48.toString()).toEqual('281474976710656');
expect(b49.toString()).toEqual('562949953421312');
expect(b50.toString()).toEqual('1125899906842624');
expect(b51.toString()).toEqual('2251799813685248');
expect(b52.toString()).toEqual('4503599627370496');
expect(b53.toString()).toEqual('9007199254740992');
expect(b54.toString()).toEqual('18014398509481984');
// From here onwards would the result be inaccurate with JavaScript numbers.
expect(b55.toString()).toEqual('36028797018963968');
expect(b56.toString()).toEqual('72057594037927936');
expect(b57.toString()).toEqual('144115188075855872');
expect(b58.toString()).toEqual('288230376151711744');
expect(b59.toString()).toEqual('576460752303423488');
expect(b60.toString()).toEqual('1152921504606846976');
expect(b61.toString()).toEqual('2305843009213693952');
expect(b62.toString()).toEqual('4611686018427387904');
expect(b63.toString()).toEqual('9223372036854775808');
expect(b64.toString()).toEqual('18446744073709551616');
expect(b65.toString()).toEqual('36893488147419103232');
});
it('should not mutate the big integer instances', () => {
const a = createBigInteger(42);
const b = createBigInteger(1337);
a.add(b);
expect(a.toString()).toEqual('42');
expect(b.toString()).toEqual('1337');
});
});
describe('addToSelf', () => {
it('should add two integers into the left operand', () => {
const a = createBigInteger(42);
const b = createBigInteger(1337);
a.addToSelf(b);
expect(a.toString()).toEqual('1379');
});
it('should not mutate the right operand', () => {
const a = createBigInteger(42);
const b = createBigInteger(1337);
a.addToSelf(b);
expect(a.toString()).toEqual('1379');
expect(b.toString()).toEqual('1337');
});
});
describe('multiplication', () => {
it('should be correct for 0', () => {
const a = new BigIntForMultiplication(createBigInteger(0));
expect(a.multiplyBy(0).toString()).toEqual('0');
expect(a.multiplyBy(1).toString()).toEqual('0');
expect(a.multiplyBy(42).toString()).toEqual('0');
expect(a.multiplyBy(1 << 31).toString()).toEqual('0');
expect(a.multiplyBy((1 << 31) - 1).toString()).toEqual('0');
});
it('should be correct for 1337', () => {
const a = new BigIntForMultiplication(createBigInteger(1337));
expect(a.multiplyBy(0).toString()).toEqual('0');
expect(a.multiplyBy(1).toString()).toEqual('1337');
expect(a.multiplyBy(8).toString()).toEqual('10696');
expect(a.multiplyBy(42).toString()).toEqual('56154');
expect(a.multiplyBy(1 << 31).toString()).toEqual('2871185637376');
expect(a.multiplyBy((1 << 31) - 1).toString()).toEqual('2871185636039');
});
it('should multiply and add to an existing big integer', () => {
const a = new BigIntForMultiplication(createBigInteger(1337));
const result = createBigInteger(1_000_000);
a.multiplyByAndAddTo(42, result);
expect(result.toString()).toEqual('1056154');
});
});
describe('exponentiation', () => {
it('should be correct for base-0', () => {
const base32 = new BigIntExponentiation(0);
expect(base32.toThePowerOf(0).getValue().toString()).toEqual('1');
expect(base32.toThePowerOf(1).getValue().toString()).toEqual('0');
expect(base32.toThePowerOf(2).getValue().toString()).toEqual('0');
expect(base32.toThePowerOf(3).getValue().toString()).toEqual('0');
expect(base32.toThePowerOf(8).getValue().toString()).toEqual('0');
expect(base32.toThePowerOf(12).getValue().toString()).toEqual('0');
});
it('should be correct for base-1', () => {
const base32 = new BigIntExponentiation(1);
expect(base32.toThePowerOf(0).getValue().toString()).toEqual('1');
expect(base32.toThePowerOf(1).getValue().toString()).toEqual('1');
expect(base32.toThePowerOf(2).getValue().toString()).toEqual('1');
expect(base32.toThePowerOf(3).getValue().toString()).toEqual('1');
expect(base32.toThePowerOf(8).getValue().toString()).toEqual('1');
expect(base32.toThePowerOf(12).getValue().toString()).toEqual('1');
});
it('should be correct for base-42', () => {
const base32 = new BigIntExponentiation(42);
expect(base32.toThePowerOf(0).getValue().toString()).toEqual('1');
expect(base32.toThePowerOf(1).getValue().toString()).toEqual('42');
expect(base32.toThePowerOf(2).getValue().toString()).toEqual('1764');
expect(base32.toThePowerOf(3).getValue().toString()).toEqual('74088');
expect(base32.toThePowerOf(8).getValue().toString()).toEqual('9682651996416');
expect(base32.toThePowerOf(12).getValue().toString()).toEqual('30129469486639681536');
});
it('should cache the exponents', () => {
const base32 = new BigIntExponentiation(32);
const a = base32.toThePowerOf(4);
const b = base32.toThePowerOf(4);
expect(a).toBe(b);
});
});
function createBigInteger(value: number): BigInteger {
return new BigIntForMultiplication(BigInteger.one()).multiplyBy(value);
}
});