perf(compiler): optimize computation of i18n message ids (#39694)

Message ID computation makes extensive use of big integer multiplications in order to translate the message's fingerprint into a numerical representation. In large compilations with heavy use of i18n this was showing up high in profiler sessions. There are two factors contributing to the bottleneck: 1. a suboptimal big integer representation using strings, which requires repeated allocation and conversion from a character to numeric digits and back. 2. repeated computation of the necessary base-256 exponents and their multiplication factors. The first bottleneck is addressed using a representation that uses an array of individual digits. This avoids repeated conversion and allocation overhead is also greatly reduced, as adding two big integers can now be done in-place with virtually no memory allocations. The second point is addressed by a memoized exponentiation pool to optimize the multiplication of a base-256 exponent. As an additional optimization are the two 32-bit words now converted to decimal per word, instead of going through an intermediate byte buffer and doing the decimal conversion per byte. The results of these optimizations depend a lot on the number of i18n messages for which a message should be computed. Benchmarks have shown that computing message IDs is now ~6x faster for 1,000 messages, ~14x faster for 10,000 messages, and ~24x faster for 100,000 messages. PR Close #39694
2020-11-14 22:57:26 +01:00 · 2020-11-14 22:57:26 +01:00 · 604b4e46c8
commit 604b4e46c8
parent 9f20942f89
3 changed files with 430 additions and 38 deletions
--- a/packages/compiler/src/i18n/big_integer.ts
+++ b/packages/compiler/src/i18n/big_integer.ts
@ -0,0 +1,196 @@
+/**
+ * @license
+ * Copyright Google LLC All Rights Reserved.
+ *
+ * Use of this source code is governed by an MIT-style license that can be
+ * found in the LICENSE file at https://angular.io/license
+ */
+
+/**
+ * Represents a big integer using a buffer of its individual digits, with the least significant
+ * digit stored at the beginning of the array (little endian).
+ *
+ * For performance reasons, each instance is mutable. The addition operation can be done in-place
+ * to reduce memory pressure of allocation for the digits array.
+ */
+export class BigInteger {
+  static zero(): BigInteger {
+    return new BigInteger([0]);
+  }
+
+  static one(): BigInteger {
+    return new BigInteger([1]);
+  }
+
+  /**
+   * Creates a big integer using its individual digits in little endian storage.
+   */
+  private constructor(private readonly digits: number[]) {}
+
+  /**
+   * Creates a clone of this instance.
+   */
+  clone(): BigInteger {
+    return new BigInteger(this.digits.slice());
+  }
+
+  /**
+   * Returns a new big integer with the sum of `this` and `other` as its value. This does not mutate
+   * `this` but instead returns a new instance, unlike `addToSelf`.
+   */
+  add(other: BigInteger): BigInteger {
+    const result = this.clone();
+    result.addToSelf(other);
+    return result;
+  }
+
+  /**
+   * Adds `other` to the instance itself, thereby mutating its value.
+   */
+  addToSelf(other: BigInteger): void {
+    const maxNrOfDigits = Math.max(this.digits.length, other.digits.length);
+    let carry = 0;
+    for (let i = 0; i < maxNrOfDigits; i++) {
+      let digitSum = carry;
+      if (i < this.digits.length) {
+        digitSum += this.digits[i];
+      }
+      if (i < other.digits.length) {
+        digitSum += other.digits[i];
+      }
+
+      if (digitSum >= 10) {
+        this.digits[i] = digitSum - 10;
+        carry = 1;
+      } else {
+        this.digits[i] = digitSum;
+        carry = 0;
+      }
+    }
+
+    // Apply a remaining carry if needed.
+    if (carry > 0) {
+      this.digits[maxNrOfDigits] = 1;
+    }
+  }
+
+  /**
+   * Builds the decimal string representation of the big integer. As this is stored in
+   * little endian, the digits are concatenated in reverse order.
+   */
+  toString(): string {
+    let res = '';
+    for (let i = this.digits.length - 1; i >= 0; i--) {
+      res += this.digits[i];
+    }
+    return res;
+  }
+}
+
+/**
+ * Represents a big integer which is optimized for multiplication operations, as its power-of-twos
+ * are memoized. See `multiplyBy()` for details on the multiplication algorithm.
+ */
+export class BigIntForMultiplication {
+  /**
+   * Stores all memoized power-of-twos, where each index represents `this.number * 2^index`.
+   */
+  private readonly powerOfTwos: BigInteger[];
+
+  constructor(value: BigInteger) {
+    this.powerOfTwos = [value];
+  }
+
+  /**
+   * Returns the big integer itself.
+   */
+  getValue(): BigInteger {
+    return this.powerOfTwos[0];
+  }
+
+  /**
+   * Computes the value for `num * b`, where `num` is a JS number and `b` is a big integer. The
+   * value for `b` is represented by a storage model that is optimized for this computation.
+   *
+   * This operation is implemented in N(log2(num)) by continuous halving of the number, where the
+   * least-significant bit (LSB) is tested in each iteration. If the bit is set, the bit's index is
+   * used as exponent into the power-of-two multiplication of `b`.
+   *
+   * As an example, consider the multiplication num=42, b=1337. In binary 42 is 0b00101010 and the
+   * algorithm unrolls into the following iterations:
+   *
+   *  Iteration | num        | LSB  | b * 2^iter | Add? | product
+   * -----------|------------|------|------------|------|--------
+   *  0         | 0b00101010 | 0    | 1337       | No   | 0
+   *  1         | 0b00010101 | 1    | 2674       | Yes  | 2674
+   *  2         | 0b00001010 | 0    | 5348       | No   | 2674
+   *  3         | 0b00000101 | 1    | 10696      | Yes  | 13370
+   *  4         | 0b00000010 | 0    | 21392      | No   | 13370
+   *  5         | 0b00000001 | 1    | 42784      | Yes  | 56154
+   *  6         | 0b00000000 | 0    | 85568      | No   | 56154
+   *
+   * The computed product of 56154 is indeed the correct result.
+   *
+   * The `BigIntForMultiplication` representation for a big integer provides memoized access to the
+   * power-of-two values to reduce the workload in computing those values.
+   */
+  multiplyBy(num: number): BigInteger {
+    const product = BigInteger.zero();
+    this.multiplyByAndAddTo(num, product);
+    return product;
+  }
+
+  /**
+   * See `multiplyBy()` for details. This function allows for the computed product to be added
+   * directly to the provided result big integer.
+   */
+  multiplyByAndAddTo(num: number, result: BigInteger): void {
+    for (let exponent = 0; num !== 0; num = num >>> 1, exponent++) {
+      if (num & 1) {
+        const value = this.getMultipliedByPowerOfTwo(exponent);
+        result.addToSelf(value);
+      }
+    }
+  }
+
+  /**
+   * Computes and memoizes the big integer value for `this.number * 2^exponent`.
+   */
+  private getMultipliedByPowerOfTwo(exponent: number): BigInteger {
+    // Compute the powers up until the requested exponent, where each value is computed from its
+    // predecessor. This is simple as `this.number * 2^(exponent - 1)` only has to be doubled (i.e.
+    // added to itself) to reach `this.number * 2^exponent`.
+    for (let i = this.powerOfTwos.length; i <= exponent; i++) {
+      const previousPower = this.powerOfTwos[i - 1];
+      this.powerOfTwos[i] = previousPower.add(previousPower);
+    }
+    return this.powerOfTwos[exponent];
+  }
+}
+
+/**
+ * Represents an exponentiation operation for the provided base, of which exponents are computed and
+ * memoized. The results are represented by a `BigIntForMultiplication` which is tailored for
+ * multiplication operations by memoizing the power-of-twos. This effectively results in a matrix
+ * representation that is lazily computed upon request.
+ */
+export class BigIntExponentiation {
+  private readonly exponents = [new BigIntForMultiplication(BigInteger.one())];
+
+  constructor(private readonly base: number) {}
+
+  /**
+   * Compute the value for `this.base^exponent`, resulting in a big integer that is optimized for
+   * further multiplication operations.
+   */
+  toThePowerOf(exponent: number): BigIntForMultiplication {
+    // Compute the results up until the requested exponent, where every value is computed from its
+    // predecessor. This is because `this.base^(exponent - 1)` only has to be multiplied by `base`
+    // to reach `this.base^exponent`.
+    for (let i = this.exponents.length; i <= exponent; i++) {
+      const value = this.exponents[i - 1].multiplyBy(this.base);
+      this.exponents[i] = new BigIntForMultiplication(value);
+    }
+    return this.exponents[exponent];
+  }
+}
--- a/packages/compiler/src/i18n/digest.ts
+++ b/packages/compiler/src/i18n/digest.ts
@ -7,6 +7,7 @@
 */

 import {newArray, utf8Encode} from '../util';
+import {BigIntExponentiation} from './big_integer';

 import * as i18n from './i18n_ast';

@ -197,7 +198,7 @@ export function computeMsgId(msg: string, meaning: string = ''): string {
  const hi = msgFingerprint[0];
  const lo = msgFingerprint[1];

-  return byteStringToDecString(words32ToByteString([hi & 0x7fffffff, lo]));
+  return wordsToDecimalString(hi & 0x7fffffff, lo);
 }

 function hash32(str: string, c: number): number {
@ -334,43 +335,31 @@ function byteStringToHexString(str: string): string {
  return hex.toLowerCase();
 }

-// based on http://www.danvk.org/hex2dec.html (JS can not handle more than 56b)
-function byteStringToDecString(str: string): string {
-  let decimal = '';
-  let toThePower = '1';
+/**
+ * Create a shared exponentiation pool for base-256 computations. This shared pool provides memoized
+ * power-of-256 results with memoized power-of-two computations for efficient multiplication.
+ *
+ * For our purposes, this can be safely stored as a global without memory concerns. The reason is
+ * that we encode two words, so only need the 0th (for the low word) and 4th (for the high word)
+ * exponent.
+ */
+const base256 = new BigIntExponentiation(256);

-  for (let i = str.length - 1; i >= 0; i--) {
-    decimal = addBigInt(decimal, numberTimesBigInt(byteAt(str, i), toThePower));
-    toThePower = numberTimesBigInt(256, toThePower);
-  }
+/**
+ * Represents two 32-bit words as a single decimal number. This requires a big integer storage
+ * model as JS numbers are not accurate enough to represent the 64-bit number.
+ *
+ * Based on http://www.danvk.org/hex2dec.html
+ */
+function wordsToDecimalString(hi: number, lo: number): string {
+  // Encode the four bytes in lo in the lower digits of the decimal number.
+  // Note: the multiplication results in lo itself but represented by a big integer using its
+  // decimal digits.
+  const decimal = base256.toThePowerOf(0).multiplyBy(lo);

-  return decimal.split('').reverse().join('');
-}
-
-// x and y decimal, lowest significant digit first
-function addBigInt(x: string, y: string): string {
-  let sum = '';
-  const len = Math.max(x.length, y.length);
-  for (let i = 0, carry = 0; i < len || carry; i++) {
-    const tmpSum = carry + +(x[i] || 0) + +(y[i] || 0);
-    if (tmpSum >= 10) {
-      carry = 1;
-      sum += tmpSum - 10;
-    } else {
-      carry = 0;
-      sum += tmpSum;
-    }
-  }
-
-  return sum;
-}
-
-function numberTimesBigInt(num: number, b: string): string {
-  let product = '';
-  let bToThePower = b;
-  for (; num !== 0; num = num >>> 1) {
-    if (num & 1) product = addBigInt(product, bToThePower);
-    bToThePower = addBigInt(bToThePower, bToThePower);
-  }
-  return product;
+  // Encode the four bytes in hi above the four lo bytes. lo is a maximum of (2^8)^4, which is why
+  // this multiplication factor is applied.
+  base256.toThePowerOf(4).multiplyByAndAddTo(hi, decimal);
+
+  return decimal.toString();
 }
--- a/packages/compiler/test/i18n/big_integer_spec.ts
+++ b/packages/compiler/test/i18n/big_integer_spec.ts
@ -0,0 +1,207 @@
+/**
+ * @license
+ * Copyright Google LLC All Rights Reserved.
+ *
+ * Use of this source code is governed by an MIT-style license that can be
+ * found in the LICENSE file at https://angular.io/license
+ */
+
+import {BigInteger, BigIntExponentiation, BigIntForMultiplication} from '../../src/i18n/big_integer';
+
+describe('big integers', () => {
+  describe('add', () => {
+    it('should add two integers', () => {
+      const a = createBigInteger(42);
+      const b = createBigInteger(1337);
+
+      expect(a.add(b).toString()).toEqual('1379');
+    });
+
+    it('should add two integers with a carry', () => {
+      const a = createBigInteger(8);
+      const b = createBigInteger(995);
+
+      expect(a.add(b).toString()).toEqual('1003');
+    });
+
+    it('should add two integers beyond the maximum supported JS integer', () => {
+      const b31 = createBigInteger(1 << 31);
+
+      const b32 = b31.add(b31);
+      const b33 = b32.add(b32);
+      const b34 = b33.add(b33);
+      const b35 = b34.add(b34);
+      const b36 = b35.add(b35);
+      const b37 = b36.add(b36);
+      const b38 = b37.add(b37);
+      const b39 = b38.add(b38);
+      const b40 = b39.add(b39);
+      const b41 = b40.add(b40);
+      const b42 = b41.add(b41);
+      const b43 = b42.add(b42);
+      const b44 = b43.add(b43);
+      const b45 = b44.add(b44);
+      const b46 = b45.add(b45);
+      const b47 = b46.add(b46);
+      const b48 = b47.add(b47);
+      const b49 = b48.add(b48);
+      const b50 = b49.add(b49);
+      const b51 = b50.add(b50);
+      const b52 = b51.add(b51);
+      const b53 = b52.add(b52);
+      const b54 = b53.add(b53);
+      const b55 = b54.add(b54);
+      const b56 = b55.add(b55);
+      const b57 = b56.add(b56);
+      const b58 = b57.add(b57);
+      const b59 = b58.add(b58);
+      const b60 = b59.add(b59);
+      const b61 = b60.add(b60);
+      const b62 = b61.add(b61);
+      const b63 = b62.add(b62);
+      const b64 = b63.add(b63);
+      const b65 = b64.add(b64);
+
+      expect(b32.toString()).toEqual('4294967296');
+      expect(b33.toString()).toEqual('8589934592');
+      expect(b34.toString()).toEqual('17179869184');
+      expect(b35.toString()).toEqual('34359738368');
+      expect(b36.toString()).toEqual('68719476736');
+      expect(b37.toString()).toEqual('137438953472');
+      expect(b38.toString()).toEqual('274877906944');
+      expect(b39.toString()).toEqual('549755813888');
+      expect(b40.toString()).toEqual('1099511627776');
+      expect(b41.toString()).toEqual('2199023255552');
+      expect(b42.toString()).toEqual('4398046511104');
+      expect(b43.toString()).toEqual('8796093022208');
+      expect(b44.toString()).toEqual('17592186044416');
+      expect(b45.toString()).toEqual('35184372088832');
+      expect(b46.toString()).toEqual('70368744177664');
+      expect(b47.toString()).toEqual('140737488355328');
+      expect(b48.toString()).toEqual('281474976710656');
+      expect(b49.toString()).toEqual('562949953421312');
+      expect(b50.toString()).toEqual('1125899906842624');
+      expect(b51.toString()).toEqual('2251799813685248');
+      expect(b52.toString()).toEqual('4503599627370496');
+      expect(b53.toString()).toEqual('9007199254740992');
+      expect(b54.toString()).toEqual('18014398509481984');
+
+      // From here onwards would the result be inaccurate with JavaScript numbers.
+      expect(b55.toString()).toEqual('36028797018963968');
+      expect(b56.toString()).toEqual('72057594037927936');
+      expect(b57.toString()).toEqual('144115188075855872');
+      expect(b58.toString()).toEqual('288230376151711744');
+      expect(b59.toString()).toEqual('576460752303423488');
+      expect(b60.toString()).toEqual('1152921504606846976');
+      expect(b61.toString()).toEqual('2305843009213693952');
+      expect(b62.toString()).toEqual('4611686018427387904');
+      expect(b63.toString()).toEqual('9223372036854775808');
+      expect(b64.toString()).toEqual('18446744073709551616');
+      expect(b65.toString()).toEqual('36893488147419103232');
+    });
+
+    it('should not mutate the big integer instances', () => {
+      const a = createBigInteger(42);
+      const b = createBigInteger(1337);
+
+      a.add(b);
+
+      expect(a.toString()).toEqual('42');
+      expect(b.toString()).toEqual('1337');
+    });
+  });
+
+  describe('addToSelf', () => {
+    it('should add two integers into the left operand', () => {
+      const a = createBigInteger(42);
+      const b = createBigInteger(1337);
+
+      a.addToSelf(b);
+
+      expect(a.toString()).toEqual('1379');
+    });
+
+    it('should not mutate the right operand', () => {
+      const a = createBigInteger(42);
+      const b = createBigInteger(1337);
+
+      a.addToSelf(b);
+
+      expect(a.toString()).toEqual('1379');
+      expect(b.toString()).toEqual('1337');
+    });
+  });
+
+  describe('multiplication', () => {
+    it('should be correct for 0', () => {
+      const a = new BigIntForMultiplication(createBigInteger(0));
+      expect(a.multiplyBy(0).toString()).toEqual('0');
+      expect(a.multiplyBy(1).toString()).toEqual('0');
+      expect(a.multiplyBy(42).toString()).toEqual('0');
+      expect(a.multiplyBy(1 << 31).toString()).toEqual('0');
+      expect(a.multiplyBy((1 << 31) - 1).toString()).toEqual('0');
+    });
+
+    it('should be correct for 1337', () => {
+      const a = new BigIntForMultiplication(createBigInteger(1337));
+      expect(a.multiplyBy(0).toString()).toEqual('0');
+      expect(a.multiplyBy(1).toString()).toEqual('1337');
+      expect(a.multiplyBy(8).toString()).toEqual('10696');
+      expect(a.multiplyBy(42).toString()).toEqual('56154');
+      expect(a.multiplyBy(1 << 31).toString()).toEqual('2871185637376');
+      expect(a.multiplyBy((1 << 31) - 1).toString()).toEqual('2871185636039');
+    });
+
+    it('should multiply and add to an existing big integer', () => {
+      const a = new BigIntForMultiplication(createBigInteger(1337));
+      const result = createBigInteger(1_000_000);
+      a.multiplyByAndAddTo(42, result);
+      expect(result.toString()).toEqual('1056154');
+    });
+  });
+
+  describe('exponentiation', () => {
+    it('should be correct for base-0', () => {
+      const base32 = new BigIntExponentiation(0);
+      expect(base32.toThePowerOf(0).getValue().toString()).toEqual('1');
+      expect(base32.toThePowerOf(1).getValue().toString()).toEqual('0');
+      expect(base32.toThePowerOf(2).getValue().toString()).toEqual('0');
+      expect(base32.toThePowerOf(3).getValue().toString()).toEqual('0');
+      expect(base32.toThePowerOf(8).getValue().toString()).toEqual('0');
+      expect(base32.toThePowerOf(12).getValue().toString()).toEqual('0');
+    });
+
+    it('should be correct for base-1', () => {
+      const base32 = new BigIntExponentiation(1);
+      expect(base32.toThePowerOf(0).getValue().toString()).toEqual('1');
+      expect(base32.toThePowerOf(1).getValue().toString()).toEqual('1');
+      expect(base32.toThePowerOf(2).getValue().toString()).toEqual('1');
+      expect(base32.toThePowerOf(3).getValue().toString()).toEqual('1');
+      expect(base32.toThePowerOf(8).getValue().toString()).toEqual('1');
+      expect(base32.toThePowerOf(12).getValue().toString()).toEqual('1');
+    });
+
+    it('should be correct for base-42', () => {
+      const base32 = new BigIntExponentiation(42);
+      expect(base32.toThePowerOf(0).getValue().toString()).toEqual('1');
+      expect(base32.toThePowerOf(1).getValue().toString()).toEqual('42');
+      expect(base32.toThePowerOf(2).getValue().toString()).toEqual('1764');
+      expect(base32.toThePowerOf(3).getValue().toString()).toEqual('74088');
+      expect(base32.toThePowerOf(8).getValue().toString()).toEqual('9682651996416');
+      expect(base32.toThePowerOf(12).getValue().toString()).toEqual('30129469486639681536');
+    });
+
+    it('should cache the exponents', () => {
+      const base32 = new BigIntExponentiation(32);
+
+      const a = base32.toThePowerOf(4);
+      const b = base32.toThePowerOf(4);
+
+      expect(a).toBe(b);
+    });
+  });
+
+  function createBigInteger(value: number): BigInteger {
+    return new BigIntForMultiplication(BigInteger.one()).multiplyBy(value);
+  }
+});