fix(http): introduce encodingHint for text() for better ArrayBuffer support

Currently, if a Response has an ArrayBuffer body and text() is called, Angular
attempts to convert the ArrayBuffer to a string. Doing this requires knowing
the encoding of the bytes in the buffer, which is context that we don't have.

Instead, we assume that the buffer is encoded in UTF-16, and attempt to process
it that way. Unfortunately the approach chosen (interpret buffer as Uint16Array and
create a Javascript string from each entry using String.fromCharCode) is incorrect
as it does not handle UTF-16 surrogate pairs. What Angular actually implements, then,
is UCS-2 decoding, which is equivalent to UTF-16 with characters restricted to the
base plane.

No standard way of decoding UTF-8 or UTF-16 exists in the browser today. APIs like
TextDecoder are only supported in a few browsers, and although hacks like using the
FileReader API with a Blob to force browsers to do content encoding detection and
decoding exist, they're slow and not compatible with the synchronous text() API.

Thus, this bug is fixed by introducing an encodingHint parameter to text(). The
default value of this parameter is 'legacy', indicating that the existing broken
behavior should be used - this prevents breaking existing apps. The only other
possible value of the hint is 'iso-8859' which interprets each byte of the buffer
with String.fromCharCode. UTF-8 and UTF-16 are not supported - it is up to the
consumer to get the ArrayBuffer and decode it themselves.

The parameter is a hint, as it's not always used (for example, if the conversion
to text doesn't involve an ArrayBuffer source). Additionally, this leaves the door
open for future implementations to perform more sophisticated encoding detection
and ignore the user-provided value if it can be proven to be incorrect.

Fixes #15932.

PR Close #16420
This commit is contained in:
Alex Rickabaugh 2017-04-28 11:54:40 -07:00 committed by Matias Niemelä
parent aef524506b
commit 7ae7a8440c
4 changed files with 47 additions and 4 deletions

View File

@ -37,14 +37,32 @@ export abstract class Body {
/**
* Returns the body as a string, presuming `toString()` can be called on the response body.
*
* When decoding an `ArrayBuffer`, the optional `encodingHint` parameter determines how the
* bytes in the buffer will be interpreted. Valid values are:
*
* - `legacy` - incorrectly interpret the bytes as UTF-16 (technically, UCS-2). Only characters
* in the Basic Multilingual Plane are supported, surrogate pairs are not handled correctly.
* In addition, the endianness of the 16-bit octet pairs in the `ArrayBuffer` is not taken
* into consideration. This is the default behavior to avoid breaking apps, but should be
* considered deprecated.
*
* - `iso-8859` - interpret the bytes as ISO-8859 (which can be used for ASCII encoded text).
*/
text(): string {
text(encodingHint: 'legacy'|'iso-8859' = 'legacy'): string {
if (this._body instanceof URLSearchParams) {
return this._body.toString();
}
if (this._body instanceof ArrayBuffer) {
return String.fromCharCode.apply(null, new Uint16Array(<ArrayBuffer>this._body));
switch (encodingHint) {
case 'legacy':
return String.fromCharCode.apply(null, new Uint16Array(this._body as ArrayBuffer));
case 'iso-8859':
return String.fromCharCode.apply(null, new Uint8Array(this._body as ArrayBuffer));
default:
throw new Error(`Invalid value for encodingHint: ${encodingHint}`);
}
}
if (this._body == null) {

View File

@ -42,6 +42,15 @@ export function getResponseURL(xhr: any): string|null {
return null;
}
export function stringToArrayBuffer8(input: String): ArrayBuffer {
const view = new Uint8Array(input.length);
for (let i = 0, strLen = input.length; i < strLen; i++) {
view[i] = input.charCodeAt(i);
}
return view.buffer;
}
export function stringToArrayBuffer(input: String): ArrayBuffer {
const view = new Uint16Array(input.length);
for (let i = 0, strLen = input.length; i < strLen; i++) {

View File

@ -186,4 +186,5 @@ const noop = function() {};
const w = typeof window == 'object' ? window : noop;
const FormData = (w as any /** TODO #9100 */)['FormData'] || noop;
const Blob = (w as any /** TODO #9100 */)['Blob'] || noop;
export const ArrayBuffer = (w as any /** TODO #9100 */)['ArrayBuffer'] || noop;
export const ArrayBuffer: ArrayBufferConstructor =
(w as any /** TODO #9100 */)['ArrayBuffer'] || noop;

View File

@ -7,10 +7,12 @@
*/
import {describe, expect, it} from '@angular/core/testing/src/testing_internal';
import {ɵgetDOM as getDOM} from '@angular/platform-browser';
import {RequestOptions} from '../src/base_request_options';
import {ContentType} from '../src/enums';
import {Headers} from '../src/headers';
import {stringToArrayBuffer, stringToArrayBuffer8} from '../src/http_utils';
import {ArrayBuffer, Request} from '../src/static_request';
export function main() {
@ -118,6 +120,19 @@ export function main() {
it('should use search if present', () => {
const req = new Request({url: 'http://test.com', search: 'a=1&b=2'});
expect(req.url).toBe('http://test.com?a=1&b=2');
})
});
if (getDOM().supportsWebAnimation()) {
it('should serialize an ArrayBuffer to string via legacy encoding', () => {
const str = '\u89d2\u5ea6';
expect(new Request({body: stringToArrayBuffer(str), url: '/'}).text()).toEqual(str);
});
it('should serialize an ArrayBuffer to string via iso-8859 encoding', () => {
const str = 'abcd';
expect(new Request({body: stringToArrayBuffer8(str), url: '/'}).text('iso-8859'))
.toEqual(str);
});
}
});
}