fix(compiler): fix utf8encode, move to sharted utils, add tests (#15076)
This commit is contained in:
		
							parent
							
								
									3b1956bbf2
								
							
						
					
					
						commit
						959a03a61f
					
				| @ -6,6 +6,8 @@ | ||||
|  * found in the LICENSE file at https://angular.io/license
 | ||||
|  */ | ||||
| 
 | ||||
| import {utf8Encode} from '../util'; | ||||
| 
 | ||||
| import * as i18n from './i18n_ast'; | ||||
| 
 | ||||
| export function digest(message: i18n.Message): string { | ||||
| @ -210,49 +212,6 @@ enum Endian { | ||||
|   Big, | ||||
| } | ||||
| 
 | ||||
| // TODO(vicb): move this to some shared place, as we also need it
 | ||||
| // for SourceMaps.
 | ||||
| export function utf8Encode(str: string): string { | ||||
|   let encoded: string = ''; | ||||
| 
 | ||||
|   for (let index = 0; index < str.length; index++) { | ||||
|     const codePoint = decodeSurrogatePairs(str, index); | ||||
| 
 | ||||
|     if (codePoint <= 0x7f) { | ||||
|       encoded += String.fromCharCode(codePoint); | ||||
|     } else if (codePoint <= 0x7ff) { | ||||
|       encoded += String.fromCharCode(0xc0 | codePoint >>> 6, 0x80 | codePoint & 0x3f); | ||||
|     } else if (codePoint <= 0xffff) { | ||||
|       encoded += String.fromCharCode( | ||||
|           0xe0 | codePoint >>> 12, 0x80 | codePoint >>> 6 & 0x3f, 0x80 | codePoint & 0x3f); | ||||
|     } else if (codePoint <= 0x1fffff) { | ||||
|       encoded += String.fromCharCode( | ||||
|           0xf0 | codePoint >>> 18, 0x80 | codePoint >>> 12 & 0x3f, 0x80 | codePoint >>> 6 & 0x3f, | ||||
|           0x80 | codePoint & 0x3f); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   return encoded; | ||||
| } | ||||
| 
 | ||||
| // see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
 | ||||
| function decodeSurrogatePairs(str: string, index: number): number { | ||||
|   if (index < 0 || index >= str.length) { | ||||
|     throw new Error(`index=${index} is out of range in "${str}"`); | ||||
|   } | ||||
| 
 | ||||
|   const high = str.charCodeAt(index); | ||||
| 
 | ||||
|   if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) { | ||||
|     const low = byteAt(str, index + 1); | ||||
|     if (low >= 0xdc00 && low <= 0xdfff) { | ||||
|       return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   return high; | ||||
| } | ||||
| 
 | ||||
| function add32(a: number, b: number): number { | ||||
|   return add32to64(a, b)[1]; | ||||
| } | ||||
|  | ||||
| @ -6,7 +6,7 @@ | ||||
|  * found in the LICENSE file at https://angular.io/license
 | ||||
|  */ | ||||
| 
 | ||||
| import {utf8Encode} from '../i18n/digest'; | ||||
| import {utf8Encode} from '../util'; | ||||
| 
 | ||||
| // https://docs.google.com/document/d/1U1RGAehQwRypUTovF1KRlpiOFze0b-_2gc6fAH0KY0k/edit
 | ||||
| const VERSION = 3; | ||||
|  | ||||
| @ -98,3 +98,35 @@ const STRING_MAP_PROTO = Object.getPrototypeOf({}); | ||||
| function isStrictStringMap(obj: any): boolean { | ||||
|   return typeof obj === 'object' && obj !== null && Object.getPrototypeOf(obj) === STRING_MAP_PROTO; | ||||
| } | ||||
| 
 | ||||
| export function utf8Encode(str: string): string { | ||||
|   let encoded = ''; | ||||
|   for (let index = 0; index < str.length; index++) { | ||||
|     let codePoint = str.charCodeAt(index); | ||||
| 
 | ||||
|     // decode surrogate
 | ||||
|     // see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
 | ||||
|     if (codePoint >= 0xd800 && codePoint <= 0xdbff && str.length > (index + 1)) { | ||||
|       const low = str.charCodeAt(index + 1); | ||||
|       if (low >= 0xdc00 && low <= 0xdfff) { | ||||
|         index++; | ||||
|         codePoint = ((codePoint - 0xd800) << 10) + low - 0xdc00 + 0x10000; | ||||
|       } | ||||
|     } | ||||
| 
 | ||||
|     if (codePoint <= 0x7f) { | ||||
|       encoded += String.fromCharCode(codePoint); | ||||
|     } else if (codePoint <= 0x7ff) { | ||||
|       encoded += String.fromCharCode(((codePoint >> 6) & 0x1F) | 0xc0, (codePoint & 0x3f) | 0x80); | ||||
|     } else if (codePoint <= 0xffff) { | ||||
|       encoded += String.fromCharCode( | ||||
|           (codePoint >> 12) | 0xe0, ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80); | ||||
|     } else if (codePoint <= 0x1fffff) { | ||||
|       encoded += String.fromCharCode( | ||||
|           ((codePoint >> 18) & 0x07) | 0xf0, ((codePoint >> 12) & 0x3f) | 0x80, | ||||
|           ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   return encoded; | ||||
| } | ||||
|  | ||||
| @ -7,9 +7,7 @@ | ||||
|  */ | ||||
| 
 | ||||
| import {fakeAsync} from '@angular/core/testing/src/fake_async'; | ||||
| import {describe, expect, it} from '@angular/core/testing/src/testing_internal'; | ||||
| 
 | ||||
| import {SyncAsyncResult, escapeRegExp, splitAtColon} from '../src/util'; | ||||
| import {SyncAsyncResult, escapeRegExp, splitAtColon, utf8Encode} from '../src/util'; | ||||
| 
 | ||||
| export function main() { | ||||
|   describe('util', () => { | ||||
| @ -45,5 +43,45 @@ export function main() { | ||||
|         expect(new RegExp(escapeRegExp('a.b')).exec('axb')).toBeFalsy(); | ||||
|       }); | ||||
|     }); | ||||
| 
 | ||||
|     describe('utf8encode', () => { | ||||
|       // tests from https://github.com/mathiasbynens/wtf-8
 | ||||
|       it('should encode to utf8', () => { | ||||
|         const tests = [ | ||||
|           ['abc', 'abc'], | ||||
|           // // 1-byte
 | ||||
|           ['\0', '\0'], | ||||
|           // // 2-byte
 | ||||
|           ['\u0080', '\xc2\x80'], | ||||
|           ['\u05ca', '\xd7\x8a'], | ||||
|           ['\u07ff', '\xdf\xbf'], | ||||
|           // // 3-byte
 | ||||
|           ['\u0800', '\xe0\xa0\x80'], | ||||
|           ['\u2c3c', '\xe2\xb0\xbc'], | ||||
|           ['\uffff', '\xef\xbf\xbf'], | ||||
|           // //4-byte
 | ||||
|           ['\uD800\uDC00', '\xF0\x90\x80\x80'], | ||||
|           ['\uD834\uDF06', '\xF0\x9D\x8C\x86'], | ||||
|           ['\uDBFF\uDFFF', '\xF4\x8F\xBF\xBF'], | ||||
|           // unmatched surrogate halves
 | ||||
|           // high surrogates: 0xD800 to 0xDBFF
 | ||||
|           ['\uD800', '\xED\xA0\x80'], | ||||
|           ['\uD800\uD800', '\xED\xA0\x80\xED\xA0\x80'], | ||||
|           ['\uD800A', '\xED\xA0\x80A'], | ||||
|           ['\uD800\uD834\uDF06\uD800', '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80'], | ||||
|           ['\uD9AF', '\xED\xA6\xAF'], | ||||
|           ['\uDBFF', '\xED\xAF\xBF'], | ||||
|           // low surrogates: 0xDC00 to 0xDFFF
 | ||||
|           ['\uDC00', '\xED\xB0\x80'], | ||||
|           ['\uDC00\uDC00', '\xED\xB0\x80\xED\xB0\x80'], | ||||
|           ['\uDC00A', '\xED\xB0\x80A'], | ||||
|           ['\uDC00\uD834\uDF06\uDC00', '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80'], | ||||
|           ['\uDEEE', '\xED\xBB\xAE'], | ||||
|           ['\uDFFF', '\xED\xBF\xBF'], | ||||
|         ]; | ||||
|         tests.forEach( | ||||
|             ([input, output]: [string, string]) => { expect(utf8Encode(input)).toEqual(output); }); | ||||
|       }); | ||||
|     }); | ||||
|   }); | ||||
| } | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user