feat(core): implements a decimal fingerprint for i18n

This commit is contained in:
Victor Berchet 2016-10-28 18:00:31 -07:00
parent 1d53a870dd
commit 582550a90d
2 changed files with 257 additions and 87 deletions

View File

@ -63,7 +63,7 @@ export function serializeNodes(nodes: i18n.Node[]): string[] {
*/ */
export function sha1(str: string): string { export function sha1(str: string): string {
const utf8 = utf8Encode(str); const utf8 = utf8Encode(str);
const words32 = stringToWords32(utf8); const words32 = stringToWords32(utf8, Endian.Big);
const len = utf8.length * 8; const len = utf8.length * 8;
const w = new Array(80); const w = new Array(80);
@ -90,15 +90,90 @@ export function sha1(str: string): string {
[a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)]; [a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)];
} }
const sha1 = words32ToString([a, b, c, d, e]); return byteStringToHexString(words32ToByteString([a, b, c, d, e]));
}
let hex: string = ''; function fk(index: number, b: number, c: number, d: number): [number, number] {
for (let i = 0; i < sha1.length; i++) { if (index < 20) {
const b = sha1.charCodeAt(i); return [(b & c) | (~b & d), 0x5a827999];
hex += (b >>> 4 & 0x0f).toString(16) + (b & 0x0f).toString(16);
} }
return hex.toLowerCase(); if (index < 40) {
return [b ^ c ^ d, 0x6ed9eba1];
}
if (index < 60) {
return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc];
}
return [b ^ c ^ d, 0xca62c1d6];
}
/**
* Compute the fingerprint of the given string
*
* The output is 64 bit number encoded as a decimal string
*
* based on:
* https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/GoogleJsMessageIdGenerator.java
*/
export function fingerprint(str: string): string {
const utf8 = utf8Encode(str);
let [hi, lo] = [hash32(utf8, 0), hash32(utf8, 102072)];
if (hi == 0 && (lo == 0 || lo == 1)) {
hi = hi ^ 0x130f9bef;
lo = lo ^ -0x6b5f56d8;
}
hi = hi & 0x7fffffff;
return byteStringToDecString(words32ToByteString([hi, lo]));
}
function hash32(str: string, c: number): number {
let [a, b] = [0x9e3779b9, 0x9e3779b9];
let i: number;
const len = str.length;
for (i = 0; i + 12 <= len; i += 12) {
a = add32(a, wordAt(str, i, Endian.Little));
b = add32(b, wordAt(str, i + 4, Endian.Little));
c = add32(c, wordAt(str, i + 8, Endian.Little));
[a, b, c] = mix([a, b, c]);
}
a = add32(a, wordAt(str, i, Endian.Little));
b = add32(b, wordAt(str, i + 4, Endian.Little));
// the first byte of c is reserved for the length
c = add32(c, len);
c = add32(c, wordAt(str, i + 8, Endian.Little) << 8);
return mix([a, b, c])[2];
}
// clang-format off
function mix([a, b, c]: [number, number, number]): [number, number, number] {
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 13;
b = sub32(b, c); b = sub32(b, a); b ^= a << 8;
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 13;
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 12;
b = sub32(b, c); b = sub32(b, a); b ^= a << 16;
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 5;
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 3;
b = sub32(b, c); b = sub32(b, a); b ^= a << 10;
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 15;
return [a, b, c];
}
// clang-format on
// Utils
enum Endian {
Little,
Big,
} }
function utf8Encode(str: string): string { function utf8Encode(str: string): string {
@ -131,10 +206,9 @@ function decodeSurrogatePairs(str: string, index: number): number {
} }
const high = str.charCodeAt(index); const high = str.charCodeAt(index);
let low: number;
if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) { if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) {
low = str.charCodeAt(index + 1); const low = byteAt(str, index + 1);
if (low >= 0xdc00 && low <= 0xdfff) { if (low >= 0xdc00 && low <= 0xdfff) {
return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
} }
@ -143,50 +217,108 @@ function decodeSurrogatePairs(str: string, index: number): number {
return high; return high;
} }
function stringToWords32(str: string): number[] {
const words32 = Array(str.length >>> 2);
for (let i = 0; i < words32.length; i++) {
words32[i] = 0;
}
for (let i = 0; i < str.length; i++) {
words32[i >>> 2] |= (str.charCodeAt(i) & 0xff) << 8 * (3 - i & 0x3);
}
return words32;
}
function words32ToString(words32: number[]): string {
let str = '';
for (let i = 0; i < words32.length * 4; i++) {
str += String.fromCharCode((words32[i >>> 2] >>> 8 * (3 - i & 0x3)) & 0xff);
}
return str;
}
function fk(index: number, b: number, c: number, d: number): [number, number] {
if (index < 20) {
return [(b & c) | (~b & d), 0x5a827999];
}
if (index < 40) {
return [b ^ c ^ d, 0x6ed9eba1];
}
if (index < 60) {
return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc];
}
return [b ^ c ^ d, 0xca62c1d6];
}
function add32(a: number, b: number): number { function add32(a: number, b: number): number {
const low = (a & 0xffff) + (b & 0xffff); const low = (a & 0xffff) + (b & 0xffff);
const high = (a >> 16) + (b >> 16) + (low >> 16); const high = (a >> 16) + (b >> 16) + (low >> 16);
return (high << 16) | (low & 0xffff); return (high << 16) | (low & 0xffff);
} }
function sub32(a: number, b: number): number {
const low = (a & 0xffff) - (b & 0xffff);
const high = (a >> 16) - (b >> 16) + (low >> 16);
return (high << 16) | (low & 0xffff);
}
// Rotate a 32b number left `count` position
function rol32(a: number, count: number): number { function rol32(a: number, count: number): number {
return (a << count) | (a >>> (32 - count)); return (a << count) | (a >>> (32 - count));
} }
function stringToWords32(str: string, endian: Endian): number[] {
const words32 = Array((str.length + 3) >>> 2);
for (let i = 0; i < words32.length; i++) {
words32[i] = wordAt(str, i * 4, endian);
}
return words32;
}
function byteAt(str: string, index: number): number {
return index >= str.length ? 0 : str.charCodeAt(index) & 0xff;
}
function wordAt(str: string, index: number, endian: Endian): number {
let word = 0;
if (endian === Endian.Big) {
for (let i = 0; i < 4; i++) {
word += byteAt(str, index + i) << (24 - 8 * i);
}
} else {
for (let i = 0; i < 4; i++) {
word += byteAt(str, index + i) << 8 * i;
}
}
return word;
}
function words32ToByteString(words32: number[]): string {
return words32.reduce((str, word) => str + word32ToByteString(word), '');
}
function word32ToByteString(word: number): string {
let str = '';
for (let i = 0; i < 4; i++) {
str += String.fromCharCode((word >>> 8 * (3 - i)) & 0xff);
}
return str;
}
function byteStringToHexString(str: string): string {
let hex: string = '';
for (let i = 0; i < str.length; i++) {
const b = byteAt(str, i);
hex += (b >>> 4).toString(16) + (b & 0x0f).toString(16);
}
return hex.toLowerCase();
}
// based on http://www.danvk.org/hex2dec.html (JS can not handle more than 56b)
function byteStringToDecString(str: string): string {
let decimal = '';
let toThePower = '1';
for (let i = str.length - 1; i >= 0; i--) {
decimal = addBigInt(decimal, numberTimesBigInt(byteAt(str, i), toThePower));
toThePower = numberTimesBigInt(256, toThePower);
}
return decimal.split('').reverse().join('');
}
function addBigInt(x: string, y: string): string {
let sum = '';
const len = Math.max(x.length, y.length);
for (let i = 0, carry = 0; i < len || carry; i++) {
const tmpSum = carry + +(x[i] || 0) + +(y[i] || 0);
if (tmpSum >= 10) {
carry = 1;
sum += tmpSum - 10;
} else {
carry = 0;
sum += tmpSum;
}
}
return sum;
}
function numberTimesBigInt(num: number, b: string): string {
let product = '';
let bToThePower = b;
for (; num !== 0; num = num >>> 1) {
if (num & 1) product = addBigInt(product, bToThePower);
bToThePower = addBigInt(bToThePower, bToThePower);
}
return product;
}

View File

@ -8,51 +8,89 @@
import {describe, expect, it} from '@angular/core/testing/testing_internal'; import {describe, expect, it} from '@angular/core/testing/testing_internal';
import {sha1} from '../../src/i18n/digest'; import {fingerprint, sha1} from '../../src/i18n/digest';
export function main(): void { export function main(): void {
describe('sha1', () => { describe('digest', () => {
it('should work on emnpty strings', describe('sha1', () => {
() => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); }); it('should work on empty strings',
() => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); });
it('should returns the sha1 of "hello world"', it('should returns the sha1 of "hello world"',
() => { expect(sha1('abc')).toEqual('a9993e364706816aba3e25717850c26c9cd0d89d'); }); () => { expect(sha1('abc')).toEqual('a9993e364706816aba3e25717850c26c9cd0d89d'); });
it('should returns the sha1 of unicode strings', it('should returns the sha1 of unicode strings',
() => { expect(sha1('你好,世界')).toEqual('3becb03b015ed48050611c8d7afe4b88f70d5a20'); }); () => { expect(sha1('你好,世界')).toEqual('3becb03b015ed48050611c8d7afe4b88f70d5a20'); });
it('should support arbitrary string size', () => { it('should support arbitrary string size', () => {
// node.js reference code: // node.js reference code:
// //
// var crypto = require('crypto'); // var crypto = require('crypto');
// //
// function sha1(string) { // function sha1(string) {
// var shasum = crypto.createHash('sha1'); // var shasum = crypto.createHash('sha1');
// shasum.update(string, 'utf8'); // shasum.update(string, 'utf8');
// return shasum.digest('hex', 'utf8'); // return shasum.digest('hex', 'utf8');
// } // }
// //
// var prefix = `你好,世界`; // var prefix = `你好,世界`;
// var result = sha1(prefix); // var result = sha1(prefix);
// for (var size = prefix.length; size < 5000; size += 101) { // for (var size = prefix.length; size < 5000; size += 101) {
// result = prefix + sha1(result); // result = prefix + sha1(result);
// while (result.length < size) { // while (result.length < size) {
// result += result; // result += result;
// } // }
// result = result.slice(-size); // result = result.slice(-size);
// } // }
// //
// console.log(sha1(result)); // console.log(sha1(result));
const prefix = `你好,世界`; const prefix = `你好,世界`;
let result = sha1(prefix); let result = sha1(prefix);
for (let size = prefix.length; size < 5000; size += 101) { for (let size = prefix.length; size < 5000; size += 101) {
result = prefix + sha1(result); result = prefix + sha1(result);
while (result.length < size) { while (result.length < size) {
result += result; result += result;
}
result = result.slice(-size);
} }
result = result.slice(-size); expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45');
} });
expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45'); });
describe('decimal fingerprint', () => {
const fixtures: {[msg: string]: string} = {
' Spaced Out ': '3976450302996657536',
'Last Name': '4407559560004943843',
'First Name': '6028371114637047813',
'View': '2509141182388535183',
'START_BOLDNUMEND_BOLD of START_BOLDmillionsEND_BOLD': '29997634073898638',
'The customer\'s credit card was authorized for AMOUNT and passed all risk checks.':
'6836487644149622036',
'Hello world!': '3022994926184248873',
'Jalape\u00f1o': '8054366208386598941',
'The set of SET_NAME is {XXX, ...}.': '135956960462609535',
'NAME took a trip to DESTINATION.': '768490705511913603',
'by AUTHOR (YEAR)': '7036633296476174078',
'': '4416290763660062288',
};
it('should work on well known inputs', () => {
Object.keys(fixtures).forEach(msg => { expect(fingerprint(msg)).toEqual(fixtures[msg]); });
});
it('should support arbitrary string size', () => {
const prefix = `你好,世界`;
let result = fingerprint(prefix);
for (let size = prefix.length; size < 5000; size += 101) {
result = prefix + fingerprint(result);
while (result.length < size) {
result += result;
}
result = result.slice(-size);
}
expect(fingerprint(result)).toEqual('2122606631351252558');
});
}); });
}); });
} }