feat(i18n): switch to sha1 for message fingerprinting

This commit is contained in:
Victor Berchet 2016-08-08 12:44:48 -07:00 committed by Alex Rickabaugh
parent dd68ae3ef1
commit b65f66feff
6 changed files with 194 additions and 56 deletions

View File

@ -38,7 +38,7 @@ describe('template i18n extraction output', () => {
<!ELEMENT ex (#PCDATA)> <!ELEMENT ex (#PCDATA)>
]> ]>
<messagebundle> <messagebundle>
<msg id="5a2858f1" desc="desc" meaning="meaning">translate me</msg> <msg id="76e1eccb1b772fa9f294ef9c146ea6d0efa8a2d4" desc="desc" meaning="meaning">translate me</msg>
</messagebundle>`; </messagebundle>`;
const xmbOutput = path.join(outDir, 'messages.xmb'); const xmbOutput = path.join(outDir, 'messages.xmb');

View File

@ -9,29 +9,7 @@
import * as i18n from './i18n_ast'; import * as i18n from './i18n_ast';
export function digestMessage(message: i18n.Message): string { export function digestMessage(message: i18n.Message): string {
return strHash(serializeNodes(message.nodes).join('') + `[${message.meaning}]`); return sha1(serializeNodes(message.nodes).join('') + `[${message.meaning}]`);
}
/**
* String hash function similar to java.lang.String.hashCode().
* The hash code for a string is computed as
* s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1],
* where s[i] is the ith character of the string and n is the length of
* the string. We mod the result to make it between 0 (inclusive) and 2^32 (exclusive).
*
* Based on goog.string.hashCode from the Google Closure library
* https://github.com/google/closure-library/
*
* @internal
*/
// TODO(vicb): better algo (less collisions) ?
export function strHash(str: string): string {
let result: number = 0;
for (var i = 0; i < str.length; ++i) {
// Normalize to 4 byte range, 0 ... 2^32.
result = (31 * result + str.charCodeAt(i)) >>> 0;
}
return result.toString(16);
} }
/** /**
@ -73,3 +51,141 @@ const serializerVisitor = new _SerializerVisitor();
export function serializeNodes(nodes: i18n.Node[]): string[] { export function serializeNodes(nodes: i18n.Node[]): string[] {
return nodes.map(a => a.visit(serializerVisitor, null)); return nodes.map(a => a.visit(serializerVisitor, null));
} }
/**
* Compute the SHA1 of the given string
*
* see http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
*
* WARNING: this function has not been designed not tested with security in mind.
* DO NOT USE IT IN A SECURITY SENSITIVE CONTEXT.
*/
export function sha1(str: string): string {
const utf8 = utf8Encode(str);
const words32 = stringToWords32(utf8);
const len = utf8.length * 8;
const w = new Array(80);
let [a, b, c, d, e]: number[] = [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0];
words32[len >> 5] |= 0x80 << (24 - len % 32);
words32[((len + 64 >> 9) << 4) + 15] = len;
for (let i = 0; i < words32.length; i += 16) {
const [h0, h1, h2, h3, h4]: number[] = [a, b, c, d, e];
for (let j = 0; j < 80; j++) {
if (j < 16) {
w[j] = words32[i + j];
} else {
w[j] = rol32(w[j - 3] ^ w[j - 8] ^ w[j - 14] ^ w[j - 16], 1);
}
const [f, k] = fk(j, b, c, d);
const temp = [rol32(a, 5), f, e, k, w[j]].reduce(add32);
[e, d, c, b, a] = [d, c, rol32(b, 30), a, temp];
}
[a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)];
}
const sha1 = words32ToString([a, b, c, d, e]);
let hex: string = '';
for (let i = 0; i < sha1.length; i++) {
const b = sha1.charCodeAt(i);
hex += (b >>> 4 & 0x0f).toString(16) + (b & 0x0f).toString(16);
}
return hex;
}
function utf8Encode(str: string): string {
let encoded: string = '';
for (let index = 0; index < str.length; index++) {
const codePoint = decodeSurrogatePairs(str, index);
if (codePoint <= 0x7f) {
encoded += String.fromCharCode(codePoint);
} else if (codePoint <= 0x7ff) {
encoded += String.fromCharCode(0xc0 | codePoint >>> 6, 0x80 | codePoint & 0x3f);
} else if (codePoint <= 0xffff) {
encoded += String.fromCharCode(
0xe0 | codePoint >>> 12, 0x80 | codePoint >>> 6 & 0x3f, 0x80 | codePoint & 0x3f);
} else if (codePoint <= 0x1fffff) {
encoded += String.fromCharCode(
0xf0 | codePoint >>> 18, 0x80 | codePoint >>> 12 & 0x3f, 0x80 | codePoint >>> 6 & 0x3f,
0x80 | codePoint & 0x3f);
}
}
return encoded;
}
// see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
function decodeSurrogatePairs(str: string, index: number): number {
if (index < 0 || index >= str.length) {
throw new Error(`index=${index} is out of range in "${str}"`);
}
const high = str.charCodeAt(index);
let low: number;
if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) {
low = str.charCodeAt(index + 1);
if (low >= 0xdc00 && low <= 0xdfff) {
return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
}
}
return high;
}
function stringToWords32(str: string): number[] {
const words32 = Array(str.length >>> 2);
for (let i = 0; i < words32.length; i++) {
words32[i] = 0;
}
for (let i = 0; i < str.length; i++) {
words32[i >>> 2] |= (str.charCodeAt(i) & 0xff) << 8 * (3 - i & 0x3);
}
return words32;
}
function words32ToString(words32: number[]): string {
let str = '';
for (let i = 0; i < words32.length * 4; i++) {
str += String.fromCharCode((words32[i >>> 2] >>> 8 * (3 - i & 0x3)) & 0xff);
}
return str;
}
function fk(index: number, b: number, c: number, d: number): [number, number] {
if (index < 20) {
return [(b & c) | (~b & d), 0x5a827999];
}
if (index < 40) {
return [b ^ c ^ d, 0x6ed9eba1];
}
if (index < 60) {
return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc];
}
return [b ^ c ^ d, 0xca62c1d6];
}
function add32(a: number, b: number): number {
const low = (a & 0xffff) + (b & 0xffff);
const high = (a >> 16) + (b >> 16) + (low >> 16);
return (high << 16) | (low & 0xffff);
}
function rol32(a: number, count: number): number {
return (a << count) | (a >>> (32 - count));
}

View File

@ -7,32 +7,52 @@
*/ */
import {beforeEach, ddescribe, describe, expect, iit, inject, it, xdescribe, xit} from '@angular/core/testing/testing_internal'; import {beforeEach, ddescribe, describe, expect, iit, inject, it, xdescribe, xit} from '@angular/core/testing/testing_internal';
import {strHash} from '../../src/i18n/digest';
import {sha1} from '../../src/i18n/digest';
export function main(): void { export function main(): void {
describe('strHash', () => { describe('sha1', () => {
it('should return a hash value', () => { it('should work on emnpty strings',
// https://github.com/google/closure-library/blob/1fb19a857b96b74e6523f3e9d33080baf25be046/closure/goog/string/string_test.js#L1115 () => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); });
expectHash('', 0);
expectHash('foo', 101574); it('should returns the sha1 of "hello world"',
expectHash('\uAAAAfoo', 1301670364); () => { expect(sha1('abc')).toEqual('a9993e364706816aba3e25717850c26c9cd0d89d'); });
expectHash('a', 92567585, 5);
expectHash('a', 2869595232, 6); it('should returns the sha1 of unicode strings',
expectHash('a', 3058106369, 7); () => { expect(sha1('你好,世界')).toEqual('3becb03b015ed48050611c8d7afe4b88f70d5a20'); });
expectHash('a', 312017024, 8);
expectHash('a', 2929737728, 1024); it('should support arbitrary string size', () => {
// node.js reference code:
//
// var crypto = require('crypto');
//
// function sha1(string) {
// var shasum = crypto.createHash('sha1');
// shasum.update(string, 'utf8');
// return shasum.digest('hex', 'utf8');
// }
//
// var prefix = `你好,世界`;
// var result = sha1(prefix);
// for (var size = prefix.length; size < 5000; size += 101) {
// result = prefix + sha1(result);
// while (result.length < size) {
// result += result;
// }
// result = result.slice(-size);
// }
//
// console.log(sha1(result));
const prefix = `你好,世界`;
let result = sha1(prefix);
for (let size = prefix.length; size < 5000; size += 101) {
result = prefix + sha1(result);
while (result.length < size) {
result += result;
}
result = result.slice(-size);
}
expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45');
}); });
}); });
} }
function expectHash(text: string, decimal: number, repeat: number = 1) {
let acc = text;
for (let i = 1; i < repeat; i++) {
acc += text;
}
const hash = strHash(acc);
expect(typeof(hash)).toEqual('string');
expect(hash.length > 0).toBe(true);
expect(parseInt(hash, 16)).toEqual(decimal);
}

View File

@ -248,7 +248,9 @@ export function main() {
it('should allow nested implicit elements', () => { it('should allow nested implicit elements', () => {
let result: any[]; let result: any[];
expect(() => {result = extract('<div>outer<div>inner</div></div>', ['div'])}).not.toThrow(); expect(() => {
result = extract('<div>outer<div>inner</div></div>', ['div']);
}).not.toThrow();
expect(result).toEqual([ expect(result).toEqual([
[['outer', '<ph tag name="START_TAG_DIV">inner</ph name="CLOSE_TAG_DIV">'], '', ''], [['outer', '<ph tag name="START_TAG_DIV">inner</ph name="CLOSE_TAG_DIV">'], '', ''],

View File

@ -26,7 +26,7 @@ export function main(): void {
messages.updateFromTemplate( messages.updateFromTemplate(
'<p i18n="m|d">Translate Me</p>', 'url', DEFAULT_INTERPOLATION_CONFIG); '<p i18n="m|d">Translate Me</p>', 'url', DEFAULT_INTERPOLATION_CONFIG);
expect(humanizeMessages(messages)).toEqual([ expect(humanizeMessages(messages)).toEqual([
'a486901=Translate Me', '2e791a68a3324ecdd29e252198638dafacec46e9=Translate Me',
]); ]);
}); });
@ -35,8 +35,8 @@ export function main(): void {
'<p i18n="m|d">Translate Me</p><p i18n>Translate Me</p>', 'url', '<p i18n="m|d">Translate Me</p><p i18n>Translate Me</p>', 'url',
DEFAULT_INTERPOLATION_CONFIG); DEFAULT_INTERPOLATION_CONFIG);
expect(humanizeMessages(messages)).toEqual([ expect(humanizeMessages(messages)).toEqual([
'a486901=Translate Me', '2e791a68a3324ecdd29e252198638dafacec46e9=Translate Me',
'8475f2cc=Translate Me', '8ca133f957845af1b1868da1b339180d1f519644=Translate Me',
]); ]);
}); });
}); });

View File

@ -45,10 +45,10 @@ export function main(): void {
<!ELEMENT ex (#PCDATA)> <!ELEMENT ex (#PCDATA)>
]> ]>
<messagebundle> <messagebundle>
<msg id="834fa53b">translatable element <ph name="START_BOLD_TEXT"><ex>&lt;b&gt;</ex></ph>with placeholders<ph name="CLOSE_BOLD_TEXT"><ex>&lt;/b&gt;</ex></ph> <ph name="INTERPOLATION"/></msg> <msg id="ec1d033f2436133c14ab038286c4f5df4697484a">translatable element <ph name="START_BOLD_TEXT"><ex>&lt;b&gt;</ex></ph>with placeholders<ph name="CLOSE_BOLD_TEXT"><ex>&lt;/b&gt;</ex></ph> <ph name="INTERPOLATION"/></msg>
<msg id="7a2843db">{ count, plural, =0 {<ph name="START_PARAGRAPH"><ex>&lt;p&gt;</ex></ph>test<ph name="CLOSE_PARAGRAPH"><ex>&lt;/p&gt;</ex></ph>}}</msg> <msg id="e2ccf3d131b15f54aa1fcf1314b1ca77c14bfcc2">{ count, plural, =0 {<ph name="START_PARAGRAPH"><ex>&lt;p&gt;</ex></ph>test<ph name="CLOSE_PARAGRAPH"><ex>&lt;/p&gt;</ex></ph>}}</msg>
<msg id="b45e58a5" desc="d" meaning="m">foo</msg> <msg id="db3e0a6a5a96481f60aec61d98c3eecddef5ac23" desc="d" meaning="m">foo</msg>
<msg id="18ea85bc">{ count, plural, =0 {{ sex, gender, other {<ph name="START_PARAGRAPH"><ex>&lt;p&gt;</ex></ph>deeply nested<ph name="CLOSE_PARAGRAPH"><ex>&lt;/p&gt;</ex></ph>}} }}</msg> <msg id="83dd87699b8c1779dd72277ef6e2d46ca58be042">{ count, plural, =0 {{ sex, gender, other {<ph name="START_PARAGRAPH"><ex>&lt;p&gt;</ex></ph>deeply nested<ph name="CLOSE_PARAGRAPH"><ex>&lt;/p&gt;</ex></ph>}} }}</msg>
</messagebundle>`; </messagebundle>`;
it('should write a valid xmb file', () => { expect(toXmb(HTML)).toEqual(XMB); }); it('should write a valid xmb file', () => { expect(toXmb(HTML)).toEqual(XMB); });