feat(i18n): switch to sha1 for message fingerprinting
This commit is contained in:
parent
dd68ae3ef1
commit
b65f66feff
|
@ -38,7 +38,7 @@ describe('template i18n extraction output', () => {
|
||||||
<!ELEMENT ex (#PCDATA)>
|
<!ELEMENT ex (#PCDATA)>
|
||||||
]>
|
]>
|
||||||
<messagebundle>
|
<messagebundle>
|
||||||
<msg id="5a2858f1" desc="desc" meaning="meaning">translate me</msg>
|
<msg id="76e1eccb1b772fa9f294ef9c146ea6d0efa8a2d4" desc="desc" meaning="meaning">translate me</msg>
|
||||||
</messagebundle>`;
|
</messagebundle>`;
|
||||||
|
|
||||||
const xmbOutput = path.join(outDir, 'messages.xmb');
|
const xmbOutput = path.join(outDir, 'messages.xmb');
|
||||||
|
|
|
@ -9,29 +9,7 @@
|
||||||
import * as i18n from './i18n_ast';
|
import * as i18n from './i18n_ast';
|
||||||
|
|
||||||
export function digestMessage(message: i18n.Message): string {
|
export function digestMessage(message: i18n.Message): string {
|
||||||
return strHash(serializeNodes(message.nodes).join('') + `[${message.meaning}]`);
|
return sha1(serializeNodes(message.nodes).join('') + `[${message.meaning}]`);
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* String hash function similar to java.lang.String.hashCode().
|
|
||||||
* The hash code for a string is computed as
|
|
||||||
* s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1],
|
|
||||||
* where s[i] is the ith character of the string and n is the length of
|
|
||||||
* the string. We mod the result to make it between 0 (inclusive) and 2^32 (exclusive).
|
|
||||||
*
|
|
||||||
* Based on goog.string.hashCode from the Google Closure library
|
|
||||||
* https://github.com/google/closure-library/
|
|
||||||
*
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
// TODO(vicb): better algo (less collisions) ?
|
|
||||||
export function strHash(str: string): string {
|
|
||||||
let result: number = 0;
|
|
||||||
for (var i = 0; i < str.length; ++i) {
|
|
||||||
// Normalize to 4 byte range, 0 ... 2^32.
|
|
||||||
result = (31 * result + str.charCodeAt(i)) >>> 0;
|
|
||||||
}
|
|
||||||
return result.toString(16);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -73,3 +51,141 @@ const serializerVisitor = new _SerializerVisitor();
|
||||||
export function serializeNodes(nodes: i18n.Node[]): string[] {
|
export function serializeNodes(nodes: i18n.Node[]): string[] {
|
||||||
return nodes.map(a => a.visit(serializerVisitor, null));
|
return nodes.map(a => a.visit(serializerVisitor, null));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the SHA1 of the given string
|
||||||
|
*
|
||||||
|
* see http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
|
||||||
|
*
|
||||||
|
* WARNING: this function has not been designed not tested with security in mind.
|
||||||
|
* DO NOT USE IT IN A SECURITY SENSITIVE CONTEXT.
|
||||||
|
*/
|
||||||
|
export function sha1(str: string): string {
|
||||||
|
const utf8 = utf8Encode(str);
|
||||||
|
const words32 = stringToWords32(utf8);
|
||||||
|
const len = utf8.length * 8;
|
||||||
|
|
||||||
|
const w = new Array(80);
|
||||||
|
let [a, b, c, d, e]: number[] = [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0];
|
||||||
|
|
||||||
|
words32[len >> 5] |= 0x80 << (24 - len % 32);
|
||||||
|
words32[((len + 64 >> 9) << 4) + 15] = len;
|
||||||
|
|
||||||
|
for (let i = 0; i < words32.length; i += 16) {
|
||||||
|
const [h0, h1, h2, h3, h4]: number[] = [a, b, c, d, e];
|
||||||
|
|
||||||
|
for (let j = 0; j < 80; j++) {
|
||||||
|
if (j < 16) {
|
||||||
|
w[j] = words32[i + j];
|
||||||
|
} else {
|
||||||
|
w[j] = rol32(w[j - 3] ^ w[j - 8] ^ w[j - 14] ^ w[j - 16], 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const [f, k] = fk(j, b, c, d);
|
||||||
|
const temp = [rol32(a, 5), f, e, k, w[j]].reduce(add32);
|
||||||
|
[e, d, c, b, a] = [d, c, rol32(b, 30), a, temp];
|
||||||
|
}
|
||||||
|
|
||||||
|
[a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)];
|
||||||
|
}
|
||||||
|
|
||||||
|
const sha1 = words32ToString([a, b, c, d, e]);
|
||||||
|
|
||||||
|
let hex: string = '';
|
||||||
|
for (let i = 0; i < sha1.length; i++) {
|
||||||
|
const b = sha1.charCodeAt(i);
|
||||||
|
hex += (b >>> 4 & 0x0f).toString(16) + (b & 0x0f).toString(16);
|
||||||
|
}
|
||||||
|
|
||||||
|
return hex;
|
||||||
|
}
|
||||||
|
|
||||||
|
function utf8Encode(str: string): string {
|
||||||
|
let encoded: string = '';
|
||||||
|
|
||||||
|
for (let index = 0; index < str.length; index++) {
|
||||||
|
const codePoint = decodeSurrogatePairs(str, index);
|
||||||
|
|
||||||
|
if (codePoint <= 0x7f) {
|
||||||
|
encoded += String.fromCharCode(codePoint);
|
||||||
|
} else if (codePoint <= 0x7ff) {
|
||||||
|
encoded += String.fromCharCode(0xc0 | codePoint >>> 6, 0x80 | codePoint & 0x3f);
|
||||||
|
} else if (codePoint <= 0xffff) {
|
||||||
|
encoded += String.fromCharCode(
|
||||||
|
0xe0 | codePoint >>> 12, 0x80 | codePoint >>> 6 & 0x3f, 0x80 | codePoint & 0x3f);
|
||||||
|
} else if (codePoint <= 0x1fffff) {
|
||||||
|
encoded += String.fromCharCode(
|
||||||
|
0xf0 | codePoint >>> 18, 0x80 | codePoint >>> 12 & 0x3f, 0x80 | codePoint >>> 6 & 0x3f,
|
||||||
|
0x80 | codePoint & 0x3f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return encoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
// see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||||
|
function decodeSurrogatePairs(str: string, index: number): number {
|
||||||
|
if (index < 0 || index >= str.length) {
|
||||||
|
throw new Error(`index=${index} is out of range in "${str}"`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const high = str.charCodeAt(index);
|
||||||
|
let low: number;
|
||||||
|
|
||||||
|
if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) {
|
||||||
|
low = str.charCodeAt(index + 1);
|
||||||
|
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||||
|
return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return high;
|
||||||
|
}
|
||||||
|
|
||||||
|
function stringToWords32(str: string): number[] {
|
||||||
|
const words32 = Array(str.length >>> 2);
|
||||||
|
|
||||||
|
for (let i = 0; i < words32.length; i++) {
|
||||||
|
words32[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < str.length; i++) {
|
||||||
|
words32[i >>> 2] |= (str.charCodeAt(i) & 0xff) << 8 * (3 - i & 0x3);
|
||||||
|
}
|
||||||
|
|
||||||
|
return words32;
|
||||||
|
}
|
||||||
|
|
||||||
|
function words32ToString(words32: number[]): string {
|
||||||
|
let str = '';
|
||||||
|
for (let i = 0; i < words32.length * 4; i++) {
|
||||||
|
str += String.fromCharCode((words32[i >>> 2] >>> 8 * (3 - i & 0x3)) & 0xff);
|
||||||
|
}
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
function fk(index: number, b: number, c: number, d: number): [number, number] {
|
||||||
|
if (index < 20) {
|
||||||
|
return [(b & c) | (~b & d), 0x5a827999];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index < 40) {
|
||||||
|
return [b ^ c ^ d, 0x6ed9eba1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index < 60) {
|
||||||
|
return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [b ^ c ^ d, 0xca62c1d6];
|
||||||
|
}
|
||||||
|
|
||||||
|
function add32(a: number, b: number): number {
|
||||||
|
const low = (a & 0xffff) + (b & 0xffff);
|
||||||
|
const high = (a >> 16) + (b >> 16) + (low >> 16);
|
||||||
|
return (high << 16) | (low & 0xffff);
|
||||||
|
}
|
||||||
|
|
||||||
|
function rol32(a: number, count: number): number {
|
||||||
|
return (a << count) | (a >>> (32 - count));
|
||||||
|
}
|
|
@ -7,32 +7,52 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import {beforeEach, ddescribe, describe, expect, iit, inject, it, xdescribe, xit} from '@angular/core/testing/testing_internal';
|
import {beforeEach, ddescribe, describe, expect, iit, inject, it, xdescribe, xit} from '@angular/core/testing/testing_internal';
|
||||||
import {strHash} from '../../src/i18n/digest';
|
|
||||||
|
import {sha1} from '../../src/i18n/digest';
|
||||||
|
|
||||||
export function main(): void {
|
export function main(): void {
|
||||||
describe('strHash', () => {
|
describe('sha1', () => {
|
||||||
it('should return a hash value', () => {
|
it('should work on emnpty strings',
|
||||||
// https://github.com/google/closure-library/blob/1fb19a857b96b74e6523f3e9d33080baf25be046/closure/goog/string/string_test.js#L1115
|
() => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); });
|
||||||
expectHash('', 0);
|
|
||||||
expectHash('foo', 101574);
|
it('should returns the sha1 of "hello world"',
|
||||||
expectHash('\uAAAAfoo', 1301670364);
|
() => { expect(sha1('abc')).toEqual('a9993e364706816aba3e25717850c26c9cd0d89d'); });
|
||||||
expectHash('a', 92567585, 5);
|
|
||||||
expectHash('a', 2869595232, 6);
|
it('should returns the sha1 of unicode strings',
|
||||||
expectHash('a', 3058106369, 7);
|
() => { expect(sha1('你好,世界')).toEqual('3becb03b015ed48050611c8d7afe4b88f70d5a20'); });
|
||||||
expectHash('a', 312017024, 8);
|
|
||||||
expectHash('a', 2929737728, 1024);
|
it('should support arbitrary string size', () => {
|
||||||
|
// node.js reference code:
|
||||||
|
//
|
||||||
|
// var crypto = require('crypto');
|
||||||
|
//
|
||||||
|
// function sha1(string) {
|
||||||
|
// var shasum = crypto.createHash('sha1');
|
||||||
|
// shasum.update(string, 'utf8');
|
||||||
|
// return shasum.digest('hex', 'utf8');
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// var prefix = `你好,世界`;
|
||||||
|
// var result = sha1(prefix);
|
||||||
|
// for (var size = prefix.length; size < 5000; size += 101) {
|
||||||
|
// result = prefix + sha1(result);
|
||||||
|
// while (result.length < size) {
|
||||||
|
// result += result;
|
||||||
|
// }
|
||||||
|
// result = result.slice(-size);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// console.log(sha1(result));
|
||||||
|
const prefix = `你好,世界`;
|
||||||
|
let result = sha1(prefix);
|
||||||
|
for (let size = prefix.length; size < 5000; size += 101) {
|
||||||
|
result = prefix + sha1(result);
|
||||||
|
while (result.length < size) {
|
||||||
|
result += result;
|
||||||
|
}
|
||||||
|
result = result.slice(-size);
|
||||||
|
}
|
||||||
|
expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function expectHash(text: string, decimal: number, repeat: number = 1) {
|
|
||||||
let acc = text;
|
|
||||||
for (let i = 1; i < repeat; i++) {
|
|
||||||
acc += text;
|
|
||||||
}
|
|
||||||
|
|
||||||
const hash = strHash(acc);
|
|
||||||
expect(typeof(hash)).toEqual('string');
|
|
||||||
expect(hash.length > 0).toBe(true);
|
|
||||||
expect(parseInt(hash, 16)).toEqual(decimal);
|
|
||||||
}
|
|
|
@ -248,7 +248,9 @@ export function main() {
|
||||||
it('should allow nested implicit elements', () => {
|
it('should allow nested implicit elements', () => {
|
||||||
let result: any[];
|
let result: any[];
|
||||||
|
|
||||||
expect(() => {result = extract('<div>outer<div>inner</div></div>', ['div'])}).not.toThrow();
|
expect(() => {
|
||||||
|
result = extract('<div>outer<div>inner</div></div>', ['div']);
|
||||||
|
}).not.toThrow();
|
||||||
|
|
||||||
expect(result).toEqual([
|
expect(result).toEqual([
|
||||||
[['outer', '<ph tag name="START_TAG_DIV">inner</ph name="CLOSE_TAG_DIV">'], '', ''],
|
[['outer', '<ph tag name="START_TAG_DIV">inner</ph name="CLOSE_TAG_DIV">'], '', ''],
|
||||||
|
|
|
@ -26,7 +26,7 @@ export function main(): void {
|
||||||
messages.updateFromTemplate(
|
messages.updateFromTemplate(
|
||||||
'<p i18n="m|d">Translate Me</p>', 'url', DEFAULT_INTERPOLATION_CONFIG);
|
'<p i18n="m|d">Translate Me</p>', 'url', DEFAULT_INTERPOLATION_CONFIG);
|
||||||
expect(humanizeMessages(messages)).toEqual([
|
expect(humanizeMessages(messages)).toEqual([
|
||||||
'a486901=Translate Me',
|
'2e791a68a3324ecdd29e252198638dafacec46e9=Translate Me',
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ export function main(): void {
|
||||||
'<p i18n="m|d">Translate Me</p><p i18n>Translate Me</p>', 'url',
|
'<p i18n="m|d">Translate Me</p><p i18n>Translate Me</p>', 'url',
|
||||||
DEFAULT_INTERPOLATION_CONFIG);
|
DEFAULT_INTERPOLATION_CONFIG);
|
||||||
expect(humanizeMessages(messages)).toEqual([
|
expect(humanizeMessages(messages)).toEqual([
|
||||||
'a486901=Translate Me',
|
'2e791a68a3324ecdd29e252198638dafacec46e9=Translate Me',
|
||||||
'8475f2cc=Translate Me',
|
'8ca133f957845af1b1868da1b339180d1f519644=Translate Me',
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -45,10 +45,10 @@ export function main(): void {
|
||||||
<!ELEMENT ex (#PCDATA)>
|
<!ELEMENT ex (#PCDATA)>
|
||||||
]>
|
]>
|
||||||
<messagebundle>
|
<messagebundle>
|
||||||
<msg id="834fa53b">translatable element <ph name="START_BOLD_TEXT"><ex><b></ex></ph>with placeholders<ph name="CLOSE_BOLD_TEXT"><ex></b></ex></ph> <ph name="INTERPOLATION"/></msg>
|
<msg id="ec1d033f2436133c14ab038286c4f5df4697484a">translatable element <ph name="START_BOLD_TEXT"><ex><b></ex></ph>with placeholders<ph name="CLOSE_BOLD_TEXT"><ex></b></ex></ph> <ph name="INTERPOLATION"/></msg>
|
||||||
<msg id="7a2843db">{ count, plural, =0 {<ph name="START_PARAGRAPH"><ex><p></ex></ph>test<ph name="CLOSE_PARAGRAPH"><ex></p></ex></ph>}}</msg>
|
<msg id="e2ccf3d131b15f54aa1fcf1314b1ca77c14bfcc2">{ count, plural, =0 {<ph name="START_PARAGRAPH"><ex><p></ex></ph>test<ph name="CLOSE_PARAGRAPH"><ex></p></ex></ph>}}</msg>
|
||||||
<msg id="b45e58a5" desc="d" meaning="m">foo</msg>
|
<msg id="db3e0a6a5a96481f60aec61d98c3eecddef5ac23" desc="d" meaning="m">foo</msg>
|
||||||
<msg id="18ea85bc">{ count, plural, =0 {{ sex, gender, other {<ph name="START_PARAGRAPH"><ex><p></ex></ph>deeply nested<ph name="CLOSE_PARAGRAPH"><ex></p></ex></ph>}} }}</msg>
|
<msg id="83dd87699b8c1779dd72277ef6e2d46ca58be042">{ count, plural, =0 {{ sex, gender, other {<ph name="START_PARAGRAPH"><ex><p></ex></ph>deeply nested<ph name="CLOSE_PARAGRAPH"><ex></p></ex></ph>}} }}</msg>
|
||||||
</messagebundle>`;
|
</messagebundle>`;
|
||||||
|
|
||||||
it('should write a valid xmb file', () => { expect(toXmb(HTML)).toEqual(XMB); });
|
it('should write a valid xmb file', () => { expect(toXmb(HTML)).toEqual(XMB); });
|
||||||
|
|
Loading…
Reference in New Issue