From b65f66feff69d68b8d9015669e3397a927ca7622 Mon Sep 17 00:00:00 2001 From: Victor Berchet Date: Mon, 8 Aug 2016 12:44:48 -0700 Subject: [PATCH] feat(i18n): switch to sha1 for message fingerprinting --- .../integrationtest/test/i18n_spec.ts | 2 +- modules/@angular/compiler/src/i18n/digest.ts | 162 +++++++++++++++--- .../compiler/test/i18n/digest_spec.ts | 68 +++++--- .../test/i18n/extractor_merger_spec.ts | 4 +- .../compiler/test/i18n/message_bundle_spec.ts | 6 +- .../test/i18n/serializers/xmb_spec.ts | 8 +- 6 files changed, 194 insertions(+), 56 deletions(-) diff --git a/modules/@angular/compiler-cli/integrationtest/test/i18n_spec.ts b/modules/@angular/compiler-cli/integrationtest/test/i18n_spec.ts index 152ea3521f..e065f3f742 100644 --- a/modules/@angular/compiler-cli/integrationtest/test/i18n_spec.ts +++ b/modules/@angular/compiler-cli/integrationtest/test/i18n_spec.ts @@ -38,7 +38,7 @@ describe('template i18n extraction output', () => { ]> - translate me + translate me `; const xmbOutput = path.join(outDir, 'messages.xmb'); diff --git a/modules/@angular/compiler/src/i18n/digest.ts b/modules/@angular/compiler/src/i18n/digest.ts index 798e192dfc..dd37340a9d 100644 --- a/modules/@angular/compiler/src/i18n/digest.ts +++ b/modules/@angular/compiler/src/i18n/digest.ts @@ -9,29 +9,7 @@ import * as i18n from './i18n_ast'; export function digestMessage(message: i18n.Message): string { - return strHash(serializeNodes(message.nodes).join('') + `[${message.meaning}]`); -} - -/** - * String hash function similar to java.lang.String.hashCode(). - * The hash code for a string is computed as - * s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1], - * where s[i] is the ith character of the string and n is the length of - * the string. We mod the result to make it between 0 (inclusive) and 2^32 (exclusive). - * - * Based on goog.string.hashCode from the Google Closure library - * https://github.com/google/closure-library/ - * - * @internal - */ -// TODO(vicb): better algo (less collisions) ? -export function strHash(str: string): string { - let result: number = 0; - for (var i = 0; i < str.length; ++i) { - // Normalize to 4 byte range, 0 ... 2^32. - result = (31 * result + str.charCodeAt(i)) >>> 0; - } - return result.toString(16); + return sha1(serializeNodes(message.nodes).join('') + `[${message.meaning}]`); } /** @@ -73,3 +51,141 @@ const serializerVisitor = new _SerializerVisitor(); export function serializeNodes(nodes: i18n.Node[]): string[] { return nodes.map(a => a.visit(serializerVisitor, null)); } + +/** + * Compute the SHA1 of the given string + * + * see http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf + * + * WARNING: this function has not been designed not tested with security in mind. + * DO NOT USE IT IN A SECURITY SENSITIVE CONTEXT. + */ +export function sha1(str: string): string { + const utf8 = utf8Encode(str); + const words32 = stringToWords32(utf8); + const len = utf8.length * 8; + + const w = new Array(80); + let [a, b, c, d, e]: number[] = [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0]; + + words32[len >> 5] |= 0x80 << (24 - len % 32); + words32[((len + 64 >> 9) << 4) + 15] = len; + + for (let i = 0; i < words32.length; i += 16) { + const [h0, h1, h2, h3, h4]: number[] = [a, b, c, d, e]; + + for (let j = 0; j < 80; j++) { + if (j < 16) { + w[j] = words32[i + j]; + } else { + w[j] = rol32(w[j - 3] ^ w[j - 8] ^ w[j - 14] ^ w[j - 16], 1); + } + + const [f, k] = fk(j, b, c, d); + const temp = [rol32(a, 5), f, e, k, w[j]].reduce(add32); + [e, d, c, b, a] = [d, c, rol32(b, 30), a, temp]; + } + + [a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)]; + } + + const sha1 = words32ToString([a, b, c, d, e]); + + let hex: string = ''; + for (let i = 0; i < sha1.length; i++) { + const b = sha1.charCodeAt(i); + hex += (b >>> 4 & 0x0f).toString(16) + (b & 0x0f).toString(16); + } + + return hex; +} + +function utf8Encode(str: string): string { + let encoded: string = ''; + + for (let index = 0; index < str.length; index++) { + const codePoint = decodeSurrogatePairs(str, index); + + if (codePoint <= 0x7f) { + encoded += String.fromCharCode(codePoint); + } else if (codePoint <= 0x7ff) { + encoded += String.fromCharCode(0xc0 | codePoint >>> 6, 0x80 | codePoint & 0x3f); + } else if (codePoint <= 0xffff) { + encoded += String.fromCharCode( + 0xe0 | codePoint >>> 12, 0x80 | codePoint >>> 6 & 0x3f, 0x80 | codePoint & 0x3f); + } else if (codePoint <= 0x1fffff) { + encoded += String.fromCharCode( + 0xf0 | codePoint >>> 18, 0x80 | codePoint >>> 12 & 0x3f, 0x80 | codePoint >>> 6 & 0x3f, + 0x80 | codePoint & 0x3f); + } + } + + return encoded; +} + +// see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae +function decodeSurrogatePairs(str: string, index: number): number { + if (index < 0 || index >= str.length) { + throw new Error(`index=${index} is out of range in "${str}"`); + } + + const high = str.charCodeAt(index); + let low: number; + + if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) { + low = str.charCodeAt(index + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; + } + } + + return high; +} + +function stringToWords32(str: string): number[] { + const words32 = Array(str.length >>> 2); + + for (let i = 0; i < words32.length; i++) { + words32[i] = 0; + } + + for (let i = 0; i < str.length; i++) { + words32[i >>> 2] |= (str.charCodeAt(i) & 0xff) << 8 * (3 - i & 0x3); + } + + return words32; +} + +function words32ToString(words32: number[]): string { + let str = ''; + for (let i = 0; i < words32.length * 4; i++) { + str += String.fromCharCode((words32[i >>> 2] >>> 8 * (3 - i & 0x3)) & 0xff); + } + return str; +} + +function fk(index: number, b: number, c: number, d: number): [number, number] { + if (index < 20) { + return [(b & c) | (~b & d), 0x5a827999]; + } + + if (index < 40) { + return [b ^ c ^ d, 0x6ed9eba1]; + } + + if (index < 60) { + return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc]; + } + + return [b ^ c ^ d, 0xca62c1d6]; +} + +function add32(a: number, b: number): number { + const low = (a & 0xffff) + (b & 0xffff); + const high = (a >> 16) + (b >> 16) + (low >> 16); + return (high << 16) | (low & 0xffff); +} + +function rol32(a: number, count: number): number { + return (a << count) | (a >>> (32 - count)); +} \ No newline at end of file diff --git a/modules/@angular/compiler/test/i18n/digest_spec.ts b/modules/@angular/compiler/test/i18n/digest_spec.ts index e84477a8ff..08e45480a6 100644 --- a/modules/@angular/compiler/test/i18n/digest_spec.ts +++ b/modules/@angular/compiler/test/i18n/digest_spec.ts @@ -7,32 +7,52 @@ */ import {beforeEach, ddescribe, describe, expect, iit, inject, it, xdescribe, xit} from '@angular/core/testing/testing_internal'; -import {strHash} from '../../src/i18n/digest'; + +import {sha1} from '../../src/i18n/digest'; export function main(): void { - describe('strHash', () => { - it('should return a hash value', () => { - // https://github.com/google/closure-library/blob/1fb19a857b96b74e6523f3e9d33080baf25be046/closure/goog/string/string_test.js#L1115 - expectHash('', 0); - expectHash('foo', 101574); - expectHash('\uAAAAfoo', 1301670364); - expectHash('a', 92567585, 5); - expectHash('a', 2869595232, 6); - expectHash('a', 3058106369, 7); - expectHash('a', 312017024, 8); - expectHash('a', 2929737728, 1024); + describe('sha1', () => { + it('should work on emnpty strings', + () => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); }); + + it('should returns the sha1 of "hello world"', + () => { expect(sha1('abc')).toEqual('a9993e364706816aba3e25717850c26c9cd0d89d'); }); + + it('should returns the sha1 of unicode strings', + () => { expect(sha1('你好,世界')).toEqual('3becb03b015ed48050611c8d7afe4b88f70d5a20'); }); + + it('should support arbitrary string size', () => { + // node.js reference code: + // + // var crypto = require('crypto'); + // + // function sha1(string) { + // var shasum = crypto.createHash('sha1'); + // shasum.update(string, 'utf8'); + // return shasum.digest('hex', 'utf8'); + // } + // + // var prefix = `你好,世界`; + // var result = sha1(prefix); + // for (var size = prefix.length; size < 5000; size += 101) { + // result = prefix + sha1(result); + // while (result.length < size) { + // result += result; + // } + // result = result.slice(-size); + // } + // + // console.log(sha1(result)); + const prefix = `你好,世界`; + let result = sha1(prefix); + for (let size = prefix.length; size < 5000; size += 101) { + result = prefix + sha1(result); + while (result.length < size) { + result += result; + } + result = result.slice(-size); + } + expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45'); }); }); } - -function expectHash(text: string, decimal: number, repeat: number = 1) { - let acc = text; - for (let i = 1; i < repeat; i++) { - acc += text; - } - - const hash = strHash(acc); - expect(typeof(hash)).toEqual('string'); - expect(hash.length > 0).toBe(true); - expect(parseInt(hash, 16)).toEqual(decimal); -} \ No newline at end of file diff --git a/modules/@angular/compiler/test/i18n/extractor_merger_spec.ts b/modules/@angular/compiler/test/i18n/extractor_merger_spec.ts index ddd8ca8c18..5972746962 100644 --- a/modules/@angular/compiler/test/i18n/extractor_merger_spec.ts +++ b/modules/@angular/compiler/test/i18n/extractor_merger_spec.ts @@ -248,7 +248,9 @@ export function main() { it('should allow nested implicit elements', () => { let result: any[]; - expect(() => {result = extract('
outer
inner
', ['div'])}).not.toThrow(); + expect(() => { + result = extract('
outer
inner
', ['div']); + }).not.toThrow(); expect(result).toEqual([ [['outer', 'inner'], '', ''], diff --git a/modules/@angular/compiler/test/i18n/message_bundle_spec.ts b/modules/@angular/compiler/test/i18n/message_bundle_spec.ts index 7204ebec04..2848bda17c 100644 --- a/modules/@angular/compiler/test/i18n/message_bundle_spec.ts +++ b/modules/@angular/compiler/test/i18n/message_bundle_spec.ts @@ -26,7 +26,7 @@ export function main(): void { messages.updateFromTemplate( '

Translate Me

', 'url', DEFAULT_INTERPOLATION_CONFIG); expect(humanizeMessages(messages)).toEqual([ - 'a486901=Translate Me', + '2e791a68a3324ecdd29e252198638dafacec46e9=Translate Me', ]); }); @@ -35,8 +35,8 @@ export function main(): void { '

Translate Me

Translate Me

', 'url', DEFAULT_INTERPOLATION_CONFIG); expect(humanizeMessages(messages)).toEqual([ - 'a486901=Translate Me', - '8475f2cc=Translate Me', + '2e791a68a3324ecdd29e252198638dafacec46e9=Translate Me', + '8ca133f957845af1b1868da1b339180d1f519644=Translate Me', ]); }); }); diff --git a/modules/@angular/compiler/test/i18n/serializers/xmb_spec.ts b/modules/@angular/compiler/test/i18n/serializers/xmb_spec.ts index 1e4f6ab3a5..e6cebae272 100644 --- a/modules/@angular/compiler/test/i18n/serializers/xmb_spec.ts +++ b/modules/@angular/compiler/test/i18n/serializers/xmb_spec.ts @@ -45,10 +45,10 @@ export function main(): void { ]> - translatable element <b>with placeholders</b> - { count, plural, =0 {<p>test</p>}} - foo - { count, plural, =0 {{ sex, gender, other {<p>deeply nested</p>}} }} + translatable element <b>with placeholders</b> + { count, plural, =0 {<p>test</p>}} + foo + { count, plural, =0 {{ sex, gender, other {<p>deeply nested</p>}} }} `; it('should write a valid xmb file', () => { expect(toXmb(HTML)).toEqual(XMB); });