feat(core): implements a decimal fingerprint for i18n
This commit is contained in:
		
							parent
							
								
									1d53a870dd
								
							
						
					
					
						commit
						582550a90d
					
				| @ -63,7 +63,7 @@ export function serializeNodes(nodes: i18n.Node[]): string[] { | ||||
|  */ | ||||
| export function sha1(str: string): string { | ||||
|   const utf8 = utf8Encode(str); | ||||
|   const words32 = stringToWords32(utf8); | ||||
|   const words32 = stringToWords32(utf8, Endian.Big); | ||||
|   const len = utf8.length * 8; | ||||
| 
 | ||||
|   const w = new Array(80); | ||||
| @ -90,15 +90,90 @@ export function sha1(str: string): string { | ||||
|     [a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)]; | ||||
|   } | ||||
| 
 | ||||
|   const sha1 = words32ToString([a, b, c, d, e]); | ||||
| 
 | ||||
|   let hex: string = ''; | ||||
|   for (let i = 0; i < sha1.length; i++) { | ||||
|     const b = sha1.charCodeAt(i); | ||||
|     hex += (b >>> 4 & 0x0f).toString(16) + (b & 0x0f).toString(16); | ||||
|   return byteStringToHexString(words32ToByteString([a, b, c, d, e])); | ||||
| } | ||||
| 
 | ||||
|   return hex.toLowerCase(); | ||||
| function fk(index: number, b: number, c: number, d: number): [number, number] { | ||||
|   if (index < 20) { | ||||
|     return [(b & c) | (~b & d), 0x5a827999]; | ||||
|   } | ||||
| 
 | ||||
|   if (index < 40) { | ||||
|     return [b ^ c ^ d, 0x6ed9eba1]; | ||||
|   } | ||||
| 
 | ||||
|   if (index < 60) { | ||||
|     return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc]; | ||||
|   } | ||||
| 
 | ||||
|   return [b ^ c ^ d, 0xca62c1d6]; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Compute the fingerprint of the given string | ||||
|  * | ||||
|  * The output is 64 bit number encoded as a decimal string | ||||
|  * | ||||
|  * based on: | ||||
|  * https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/GoogleJsMessageIdGenerator.java
 | ||||
|  */ | ||||
| export function fingerprint(str: string): string { | ||||
|   const utf8 = utf8Encode(str); | ||||
| 
 | ||||
|   let [hi, lo] = [hash32(utf8, 0), hash32(utf8, 102072)]; | ||||
| 
 | ||||
|   if (hi == 0 && (lo == 0 || lo == 1)) { | ||||
|     hi = hi ^ 0x130f9bef; | ||||
|     lo = lo ^ -0x6b5f56d8; | ||||
|   } | ||||
| 
 | ||||
|   hi = hi & 0x7fffffff; | ||||
| 
 | ||||
|   return byteStringToDecString(words32ToByteString([hi, lo])); | ||||
| } | ||||
| 
 | ||||
| function hash32(str: string, c: number): number { | ||||
|   let [a, b] = [0x9e3779b9, 0x9e3779b9]; | ||||
|   let i: number; | ||||
| 
 | ||||
|   const len = str.length; | ||||
| 
 | ||||
|   for (i = 0; i + 12 <= len; i += 12) { | ||||
|     a = add32(a, wordAt(str, i, Endian.Little)); | ||||
|     b = add32(b, wordAt(str, i + 4, Endian.Little)); | ||||
|     c = add32(c, wordAt(str, i + 8, Endian.Little)); | ||||
|     [a, b, c] = mix([a, b, c]); | ||||
|   } | ||||
| 
 | ||||
|   a = add32(a, wordAt(str, i, Endian.Little)); | ||||
|   b = add32(b, wordAt(str, i + 4, Endian.Little)); | ||||
|   // the first byte of c is reserved for the length
 | ||||
|   c = add32(c, len); | ||||
|   c = add32(c, wordAt(str, i + 8, Endian.Little) << 8); | ||||
| 
 | ||||
|   return mix([a, b, c])[2]; | ||||
| } | ||||
| 
 | ||||
| // clang-format off
 | ||||
| function mix([a, b, c]: [number, number, number]): [number, number, number] { | ||||
|   a = sub32(a, b); a = sub32(a, c); a ^= c >>> 13; | ||||
|   b = sub32(b, c); b = sub32(b, a); b ^= a << 8; | ||||
|   c = sub32(c, a); c = sub32(c, b); c ^= b >>> 13; | ||||
|   a = sub32(a, b); a = sub32(a, c); a ^= c >>> 12; | ||||
|   b = sub32(b, c); b = sub32(b, a); b ^= a << 16; | ||||
|   c = sub32(c, a); c = sub32(c, b); c ^= b >>> 5; | ||||
|   a = sub32(a, b); a = sub32(a, c); a ^= c >>> 3; | ||||
|   b = sub32(b, c); b = sub32(b, a); b ^= a << 10; | ||||
|   c = sub32(c, a); c = sub32(c, b); c ^= b >>> 15; | ||||
|   return [a, b, c]; | ||||
| } | ||||
| // clang-format on
 | ||||
| 
 | ||||
| // Utils
 | ||||
| 
 | ||||
| enum Endian { | ||||
|   Little, | ||||
|   Big, | ||||
| } | ||||
| 
 | ||||
| function utf8Encode(str: string): string { | ||||
| @ -131,10 +206,9 @@ function decodeSurrogatePairs(str: string, index: number): number { | ||||
|   } | ||||
| 
 | ||||
|   const high = str.charCodeAt(index); | ||||
|   let low: number; | ||||
| 
 | ||||
|   if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) { | ||||
|     low = str.charCodeAt(index + 1); | ||||
|     const low = byteAt(str, index + 1); | ||||
|     if (low >= 0xdc00 && low <= 0xdfff) { | ||||
|       return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; | ||||
|     } | ||||
| @ -143,50 +217,108 @@ function decodeSurrogatePairs(str: string, index: number): number { | ||||
|   return high; | ||||
| } | ||||
| 
 | ||||
| function stringToWords32(str: string): number[] { | ||||
|   const words32 = Array(str.length >>> 2); | ||||
| 
 | ||||
|   for (let i = 0; i < words32.length; i++) { | ||||
|     words32[i] = 0; | ||||
|   } | ||||
| 
 | ||||
|   for (let i = 0; i < str.length; i++) { | ||||
|     words32[i >>> 2] |= (str.charCodeAt(i) & 0xff) << 8 * (3 - i & 0x3); | ||||
|   } | ||||
| 
 | ||||
|   return words32; | ||||
| } | ||||
| 
 | ||||
| function words32ToString(words32: number[]): string { | ||||
|   let str = ''; | ||||
|   for (let i = 0; i < words32.length * 4; i++) { | ||||
|     str += String.fromCharCode((words32[i >>> 2] >>> 8 * (3 - i & 0x3)) & 0xff); | ||||
|   } | ||||
|   return str; | ||||
| } | ||||
| 
 | ||||
| function fk(index: number, b: number, c: number, d: number): [number, number] { | ||||
|   if (index < 20) { | ||||
|     return [(b & c) | (~b & d), 0x5a827999]; | ||||
|   } | ||||
| 
 | ||||
|   if (index < 40) { | ||||
|     return [b ^ c ^ d, 0x6ed9eba1]; | ||||
|   } | ||||
| 
 | ||||
|   if (index < 60) { | ||||
|     return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc]; | ||||
|   } | ||||
| 
 | ||||
|   return [b ^ c ^ d, 0xca62c1d6]; | ||||
| } | ||||
| 
 | ||||
| function add32(a: number, b: number): number { | ||||
|   const low = (a & 0xffff) + (b & 0xffff); | ||||
|   const high = (a >> 16) + (b >> 16) + (low >> 16); | ||||
|   return (high << 16) | (low & 0xffff); | ||||
| } | ||||
| 
 | ||||
| function sub32(a: number, b: number): number { | ||||
|   const low = (a & 0xffff) - (b & 0xffff); | ||||
|   const high = (a >> 16) - (b >> 16) + (low >> 16); | ||||
|   return (high << 16) | (low & 0xffff); | ||||
| } | ||||
| 
 | ||||
| // Rotate a 32b number left `count` position
 | ||||
| function rol32(a: number, count: number): number { | ||||
|   return (a << count) | (a >>> (32 - count)); | ||||
| } | ||||
| 
 | ||||
| function stringToWords32(str: string, endian: Endian): number[] { | ||||
|   const words32 = Array((str.length + 3) >>> 2); | ||||
| 
 | ||||
|   for (let i = 0; i < words32.length; i++) { | ||||
|     words32[i] = wordAt(str, i * 4, endian); | ||||
|   } | ||||
| 
 | ||||
|   return words32; | ||||
| } | ||||
| 
 | ||||
| function byteAt(str: string, index: number): number { | ||||
|   return index >= str.length ? 0 : str.charCodeAt(index) & 0xff; | ||||
| } | ||||
| 
 | ||||
| function wordAt(str: string, index: number, endian: Endian): number { | ||||
|   let word = 0; | ||||
|   if (endian === Endian.Big) { | ||||
|     for (let i = 0; i < 4; i++) { | ||||
|       word += byteAt(str, index + i) << (24 - 8 * i); | ||||
|     } | ||||
|   } else { | ||||
|     for (let i = 0; i < 4; i++) { | ||||
|       word += byteAt(str, index + i) << 8 * i; | ||||
|     } | ||||
|   } | ||||
|   return word; | ||||
| } | ||||
| 
 | ||||
| function words32ToByteString(words32: number[]): string { | ||||
|   return words32.reduce((str, word) => str + word32ToByteString(word), ''); | ||||
| } | ||||
| 
 | ||||
| function word32ToByteString(word: number): string { | ||||
|   let str = ''; | ||||
|   for (let i = 0; i < 4; i++) { | ||||
|     str += String.fromCharCode((word >>> 8 * (3 - i)) & 0xff); | ||||
|   } | ||||
|   return str; | ||||
| } | ||||
| 
 | ||||
| function byteStringToHexString(str: string): string { | ||||
|   let hex: string = ''; | ||||
|   for (let i = 0; i < str.length; i++) { | ||||
|     const b = byteAt(str, i); | ||||
|     hex += (b >>> 4).toString(16) + (b & 0x0f).toString(16); | ||||
|   } | ||||
|   return hex.toLowerCase(); | ||||
| } | ||||
| 
 | ||||
| // based on http://www.danvk.org/hex2dec.html (JS can not handle more than 56b)
 | ||||
| function byteStringToDecString(str: string): string { | ||||
|   let decimal = ''; | ||||
|   let toThePower = '1'; | ||||
| 
 | ||||
|   for (let i = str.length - 1; i >= 0; i--) { | ||||
|     decimal = addBigInt(decimal, numberTimesBigInt(byteAt(str, i), toThePower)); | ||||
|     toThePower = numberTimesBigInt(256, toThePower); | ||||
|   } | ||||
| 
 | ||||
|   return decimal.split('').reverse().join(''); | ||||
| } | ||||
| 
 | ||||
| function addBigInt(x: string, y: string): string { | ||||
|   let sum = ''; | ||||
|   const len = Math.max(x.length, y.length); | ||||
|   for (let i = 0, carry = 0; i < len || carry; i++) { | ||||
|     const tmpSum = carry + +(x[i] || 0) + +(y[i] || 0); | ||||
|     if (tmpSum >= 10) { | ||||
|       carry = 1; | ||||
|       sum += tmpSum - 10; | ||||
|     } else { | ||||
|       carry = 0; | ||||
|       sum += tmpSum; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   return sum; | ||||
| } | ||||
| 
 | ||||
| function numberTimesBigInt(num: number, b: string): string { | ||||
|   let product = ''; | ||||
|   let bToThePower = b; | ||||
|   for (; num !== 0; num = num >>> 1) { | ||||
|     if (num & 1) product = addBigInt(product, bToThePower); | ||||
|     bToThePower = addBigInt(bToThePower, bToThePower); | ||||
|   } | ||||
|   return product; | ||||
| } | ||||
|  | ||||
| @ -8,11 +8,12 @@ | ||||
| 
 | ||||
| import {describe, expect, it} from '@angular/core/testing/testing_internal'; | ||||
| 
 | ||||
| import {sha1} from '../../src/i18n/digest'; | ||||
| import {fingerprint, sha1} from '../../src/i18n/digest'; | ||||
| 
 | ||||
| export function main(): void { | ||||
|   describe('digest', () => { | ||||
|     describe('sha1', () => { | ||||
|     it('should work on emnpty strings', | ||||
|       it('should work on empty strings', | ||||
|          () => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); }); | ||||
| 
 | ||||
|       it('should returns the sha1 of "hello world"', | ||||
| @ -55,4 +56,41 @@ export function main(): void { | ||||
|         expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45'); | ||||
|       }); | ||||
|     }); | ||||
| 
 | ||||
|     describe('decimal fingerprint', () => { | ||||
|       const fixtures: {[msg: string]: string} = { | ||||
|         '  Spaced  Out  ': '3976450302996657536', | ||||
|         'Last Name': '4407559560004943843', | ||||
|         'First Name': '6028371114637047813', | ||||
|         'View': '2509141182388535183', | ||||
|         'START_BOLDNUMEND_BOLD of START_BOLDmillionsEND_BOLD': '29997634073898638', | ||||
|         'The customer\'s credit card was authorized for AMOUNT and passed all risk checks.': | ||||
|             '6836487644149622036', | ||||
|         'Hello world!': '3022994926184248873', | ||||
|         'Jalape\u00f1o': '8054366208386598941', | ||||
|         'The set of SET_NAME is {XXX, ...}.': '135956960462609535', | ||||
|         'NAME took a trip to DESTINATION.': '768490705511913603', | ||||
|         'by AUTHOR (YEAR)': '7036633296476174078', | ||||
|         '': '4416290763660062288', | ||||
|       }; | ||||
| 
 | ||||
|       it('should work on well known inputs', () => { | ||||
|         Object.keys(fixtures).forEach(msg => { expect(fingerprint(msg)).toEqual(fixtures[msg]); }); | ||||
|       }); | ||||
| 
 | ||||
|       it('should support arbitrary string size', () => { | ||||
|         const prefix = `你好,世界`; | ||||
|         let result = fingerprint(prefix); | ||||
|         for (let size = prefix.length; size < 5000; size += 101) { | ||||
|           result = prefix + fingerprint(result); | ||||
|           while (result.length < size) { | ||||
|             result += result; | ||||
|           } | ||||
|           result = result.slice(-size); | ||||
|         } | ||||
|         expect(fingerprint(result)).toEqual('2122606631351252558'); | ||||
|       }); | ||||
| 
 | ||||
|     }); | ||||
|   }); | ||||
| } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user