feat(core): implements a decimal fingerprint for i18n
This commit is contained in:
parent
1d53a870dd
commit
582550a90d
|
@ -63,7 +63,7 @@ export function serializeNodes(nodes: i18n.Node[]): string[] {
|
|||
*/
|
||||
export function sha1(str: string): string {
|
||||
const utf8 = utf8Encode(str);
|
||||
const words32 = stringToWords32(utf8);
|
||||
const words32 = stringToWords32(utf8, Endian.Big);
|
||||
const len = utf8.length * 8;
|
||||
|
||||
const w = new Array(80);
|
||||
|
@ -90,15 +90,90 @@ export function sha1(str: string): string {
|
|||
[a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)];
|
||||
}
|
||||
|
||||
const sha1 = words32ToString([a, b, c, d, e]);
|
||||
|
||||
let hex: string = '';
|
||||
for (let i = 0; i < sha1.length; i++) {
|
||||
const b = sha1.charCodeAt(i);
|
||||
hex += (b >>> 4 & 0x0f).toString(16) + (b & 0x0f).toString(16);
|
||||
return byteStringToHexString(words32ToByteString([a, b, c, d, e]));
|
||||
}
|
||||
|
||||
return hex.toLowerCase();
|
||||
function fk(index: number, b: number, c: number, d: number): [number, number] {
|
||||
if (index < 20) {
|
||||
return [(b & c) | (~b & d), 0x5a827999];
|
||||
}
|
||||
|
||||
if (index < 40) {
|
||||
return [b ^ c ^ d, 0x6ed9eba1];
|
||||
}
|
||||
|
||||
if (index < 60) {
|
||||
return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc];
|
||||
}
|
||||
|
||||
return [b ^ c ^ d, 0xca62c1d6];
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the fingerprint of the given string
|
||||
*
|
||||
* The output is 64 bit number encoded as a decimal string
|
||||
*
|
||||
* based on:
|
||||
* https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/GoogleJsMessageIdGenerator.java
|
||||
*/
|
||||
export function fingerprint(str: string): string {
|
||||
const utf8 = utf8Encode(str);
|
||||
|
||||
let [hi, lo] = [hash32(utf8, 0), hash32(utf8, 102072)];
|
||||
|
||||
if (hi == 0 && (lo == 0 || lo == 1)) {
|
||||
hi = hi ^ 0x130f9bef;
|
||||
lo = lo ^ -0x6b5f56d8;
|
||||
}
|
||||
|
||||
hi = hi & 0x7fffffff;
|
||||
|
||||
return byteStringToDecString(words32ToByteString([hi, lo]));
|
||||
}
|
||||
|
||||
function hash32(str: string, c: number): number {
|
||||
let [a, b] = [0x9e3779b9, 0x9e3779b9];
|
||||
let i: number;
|
||||
|
||||
const len = str.length;
|
||||
|
||||
for (i = 0; i + 12 <= len; i += 12) {
|
||||
a = add32(a, wordAt(str, i, Endian.Little));
|
||||
b = add32(b, wordAt(str, i + 4, Endian.Little));
|
||||
c = add32(c, wordAt(str, i + 8, Endian.Little));
|
||||
[a, b, c] = mix([a, b, c]);
|
||||
}
|
||||
|
||||
a = add32(a, wordAt(str, i, Endian.Little));
|
||||
b = add32(b, wordAt(str, i + 4, Endian.Little));
|
||||
// the first byte of c is reserved for the length
|
||||
c = add32(c, len);
|
||||
c = add32(c, wordAt(str, i + 8, Endian.Little) << 8);
|
||||
|
||||
return mix([a, b, c])[2];
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
function mix([a, b, c]: [number, number, number]): [number, number, number] {
|
||||
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 13;
|
||||
b = sub32(b, c); b = sub32(b, a); b ^= a << 8;
|
||||
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 13;
|
||||
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 12;
|
||||
b = sub32(b, c); b = sub32(b, a); b ^= a << 16;
|
||||
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 5;
|
||||
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 3;
|
||||
b = sub32(b, c); b = sub32(b, a); b ^= a << 10;
|
||||
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 15;
|
||||
return [a, b, c];
|
||||
}
|
||||
// clang-format on
|
||||
|
||||
// Utils
|
||||
|
||||
enum Endian {
|
||||
Little,
|
||||
Big,
|
||||
}
|
||||
|
||||
function utf8Encode(str: string): string {
|
||||
|
@ -131,10 +206,9 @@ function decodeSurrogatePairs(str: string, index: number): number {
|
|||
}
|
||||
|
||||
const high = str.charCodeAt(index);
|
||||
let low: number;
|
||||
|
||||
if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) {
|
||||
low = str.charCodeAt(index + 1);
|
||||
const low = byteAt(str, index + 1);
|
||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||
return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
|
||||
}
|
||||
|
@ -143,50 +217,108 @@ function decodeSurrogatePairs(str: string, index: number): number {
|
|||
return high;
|
||||
}
|
||||
|
||||
function stringToWords32(str: string): number[] {
|
||||
const words32 = Array(str.length >>> 2);
|
||||
|
||||
for (let i = 0; i < words32.length; i++) {
|
||||
words32[i] = 0;
|
||||
}
|
||||
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
words32[i >>> 2] |= (str.charCodeAt(i) & 0xff) << 8 * (3 - i & 0x3);
|
||||
}
|
||||
|
||||
return words32;
|
||||
}
|
||||
|
||||
function words32ToString(words32: number[]): string {
|
||||
let str = '';
|
||||
for (let i = 0; i < words32.length * 4; i++) {
|
||||
str += String.fromCharCode((words32[i >>> 2] >>> 8 * (3 - i & 0x3)) & 0xff);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
function fk(index: number, b: number, c: number, d: number): [number, number] {
|
||||
if (index < 20) {
|
||||
return [(b & c) | (~b & d), 0x5a827999];
|
||||
}
|
||||
|
||||
if (index < 40) {
|
||||
return [b ^ c ^ d, 0x6ed9eba1];
|
||||
}
|
||||
|
||||
if (index < 60) {
|
||||
return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc];
|
||||
}
|
||||
|
||||
return [b ^ c ^ d, 0xca62c1d6];
|
||||
}
|
||||
|
||||
function add32(a: number, b: number): number {
|
||||
const low = (a & 0xffff) + (b & 0xffff);
|
||||
const high = (a >> 16) + (b >> 16) + (low >> 16);
|
||||
return (high << 16) | (low & 0xffff);
|
||||
}
|
||||
|
||||
function sub32(a: number, b: number): number {
|
||||
const low = (a & 0xffff) - (b & 0xffff);
|
||||
const high = (a >> 16) - (b >> 16) + (low >> 16);
|
||||
return (high << 16) | (low & 0xffff);
|
||||
}
|
||||
|
||||
// Rotate a 32b number left `count` position
|
||||
function rol32(a: number, count: number): number {
|
||||
return (a << count) | (a >>> (32 - count));
|
||||
}
|
||||
|
||||
function stringToWords32(str: string, endian: Endian): number[] {
|
||||
const words32 = Array((str.length + 3) >>> 2);
|
||||
|
||||
for (let i = 0; i < words32.length; i++) {
|
||||
words32[i] = wordAt(str, i * 4, endian);
|
||||
}
|
||||
|
||||
return words32;
|
||||
}
|
||||
|
||||
function byteAt(str: string, index: number): number {
|
||||
return index >= str.length ? 0 : str.charCodeAt(index) & 0xff;
|
||||
}
|
||||
|
||||
function wordAt(str: string, index: number, endian: Endian): number {
|
||||
let word = 0;
|
||||
if (endian === Endian.Big) {
|
||||
for (let i = 0; i < 4; i++) {
|
||||
word += byteAt(str, index + i) << (24 - 8 * i);
|
||||
}
|
||||
} else {
|
||||
for (let i = 0; i < 4; i++) {
|
||||
word += byteAt(str, index + i) << 8 * i;
|
||||
}
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
function words32ToByteString(words32: number[]): string {
|
||||
return words32.reduce((str, word) => str + word32ToByteString(word), '');
|
||||
}
|
||||
|
||||
function word32ToByteString(word: number): string {
|
||||
let str = '';
|
||||
for (let i = 0; i < 4; i++) {
|
||||
str += String.fromCharCode((word >>> 8 * (3 - i)) & 0xff);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
function byteStringToHexString(str: string): string {
|
||||
let hex: string = '';
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const b = byteAt(str, i);
|
||||
hex += (b >>> 4).toString(16) + (b & 0x0f).toString(16);
|
||||
}
|
||||
return hex.toLowerCase();
|
||||
}
|
||||
|
||||
// based on http://www.danvk.org/hex2dec.html (JS can not handle more than 56b)
|
||||
function byteStringToDecString(str: string): string {
|
||||
let decimal = '';
|
||||
let toThePower = '1';
|
||||
|
||||
for (let i = str.length - 1; i >= 0; i--) {
|
||||
decimal = addBigInt(decimal, numberTimesBigInt(byteAt(str, i), toThePower));
|
||||
toThePower = numberTimesBigInt(256, toThePower);
|
||||
}
|
||||
|
||||
return decimal.split('').reverse().join('');
|
||||
}
|
||||
|
||||
function addBigInt(x: string, y: string): string {
|
||||
let sum = '';
|
||||
const len = Math.max(x.length, y.length);
|
||||
for (let i = 0, carry = 0; i < len || carry; i++) {
|
||||
const tmpSum = carry + +(x[i] || 0) + +(y[i] || 0);
|
||||
if (tmpSum >= 10) {
|
||||
carry = 1;
|
||||
sum += tmpSum - 10;
|
||||
} else {
|
||||
carry = 0;
|
||||
sum += tmpSum;
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
function numberTimesBigInt(num: number, b: string): string {
|
||||
let product = '';
|
||||
let bToThePower = b;
|
||||
for (; num !== 0; num = num >>> 1) {
|
||||
if (num & 1) product = addBigInt(product, bToThePower);
|
||||
bToThePower = addBigInt(bToThePower, bToThePower);
|
||||
}
|
||||
return product;
|
||||
}
|
||||
|
|
|
@ -8,11 +8,12 @@
|
|||
|
||||
import {describe, expect, it} from '@angular/core/testing/testing_internal';
|
||||
|
||||
import {sha1} from '../../src/i18n/digest';
|
||||
import {fingerprint, sha1} from '../../src/i18n/digest';
|
||||
|
||||
export function main(): void {
|
||||
describe('digest', () => {
|
||||
describe('sha1', () => {
|
||||
it('should work on emnpty strings',
|
||||
it('should work on empty strings',
|
||||
() => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); });
|
||||
|
||||
it('should returns the sha1 of "hello world"',
|
||||
|
@ -55,4 +56,41 @@ export function main(): void {
|
|||
expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45');
|
||||
});
|
||||
});
|
||||
|
||||
describe('decimal fingerprint', () => {
|
||||
const fixtures: {[msg: string]: string} = {
|
||||
' Spaced Out ': '3976450302996657536',
|
||||
'Last Name': '4407559560004943843',
|
||||
'First Name': '6028371114637047813',
|
||||
'View': '2509141182388535183',
|
||||
'START_BOLDNUMEND_BOLD of START_BOLDmillionsEND_BOLD': '29997634073898638',
|
||||
'The customer\'s credit card was authorized for AMOUNT and passed all risk checks.':
|
||||
'6836487644149622036',
|
||||
'Hello world!': '3022994926184248873',
|
||||
'Jalape\u00f1o': '8054366208386598941',
|
||||
'The set of SET_NAME is {XXX, ...}.': '135956960462609535',
|
||||
'NAME took a trip to DESTINATION.': '768490705511913603',
|
||||
'by AUTHOR (YEAR)': '7036633296476174078',
|
||||
'': '4416290763660062288',
|
||||
};
|
||||
|
||||
it('should work on well known inputs', () => {
|
||||
Object.keys(fixtures).forEach(msg => { expect(fingerprint(msg)).toEqual(fixtures[msg]); });
|
||||
});
|
||||
|
||||
it('should support arbitrary string size', () => {
|
||||
const prefix = `你好,世界`;
|
||||
let result = fingerprint(prefix);
|
||||
for (let size = prefix.length; size < 5000; size += 101) {
|
||||
result = prefix + fingerprint(result);
|
||||
while (result.length < size) {
|
||||
result += result;
|
||||
}
|
||||
result = result.slice(-size);
|
||||
}
|
||||
expect(fingerprint(result)).toEqual('2122606631351252558');
|
||||
});
|
||||
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue