Speed up XContent Collection Parsing (#61442) (#61617)

1. Get rid of the capturing lambda on the hot path that inlines very badly
2. Remove as many bounds checks as possible, thereby reducing method size and improving inlining
This commit is contained in:
Armin Braun 2020-08-27 14:15:46 +02:00 committed by GitHub
parent ae90ae343a
commit 9be36865ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 79 additions and 55 deletions

View File

@ -263,117 +263,141 @@ public abstract class AbstractXContentParser implements XContentParser {
@Override
public Map<String, Object> map() throws IOException {
return readMap(this);
return readMapSafe(this, SIMPLE_MAP_FACTORY);
}
@Override
public Map<String, Object> mapOrdered() throws IOException {
return readOrderedMap(this);
return readMapSafe(this, ORDERED_MAP_FACTORY);
}
@Override
public Map<String, String> mapStrings() throws IOException {
return readMapStrings(this);
return map(HashMap::new, XContentParser::text);
}
@Override
public <T> Map<String, T> map(
Supplier<Map<String, T>> mapFactory, CheckedFunction<XContentParser, T, IOException> mapValueParser) throws IOException {
return readGenericMap(this, mapFactory, mapValueParser);
final Map<String, T> map = mapFactory.get();
if (findNonEmptyMapStart(this) == false) {
return map;
}
assert currentToken() == Token.FIELD_NAME : "Expected field name but saw [" + currentToken() + "]";
do {
// Must point to field name
String fieldName = currentName();
// And then the value...
nextToken();
T value = mapValueParser.apply(this);
map.put(fieldName, value);
} while (nextToken() == XContentParser.Token.FIELD_NAME);
return map;
}
@Override
public List<Object> list() throws IOException {
return readList(this);
skipToListStart(this);
return readListUnsafe(this, SIMPLE_MAP_FACTORY);
}
@Override
public List<Object> listOrderedMap() throws IOException {
return readListOrderedMap(this);
skipToListStart(this);
return readListUnsafe(this, ORDERED_MAP_FACTORY);
}
static final Supplier<Map<String, Object>> SIMPLE_MAP_FACTORY = HashMap::new;
private static final Supplier<Map<String, Object>> SIMPLE_MAP_FACTORY = HashMap::new;
static final Supplier<Map<String, Object>> ORDERED_MAP_FACTORY = LinkedHashMap::new;
private static final Supplier<Map<String, Object>> ORDERED_MAP_FACTORY = LinkedHashMap::new;
static final Supplier<Map<String, String>> SIMPLE_MAP_STRINGS_FACTORY = HashMap::new;
static Map<String, Object> readMap(XContentParser parser) throws IOException {
return readMap(parser, SIMPLE_MAP_FACTORY);
private static Map<String, Object> readMapSafe(XContentParser parser, Supplier<Map<String, Object>> mapFactory) throws IOException {
final Map<String, Object> map = mapFactory.get();
return findNonEmptyMapStart(parser) ? readMapEntries(parser, mapFactory, map) : map;
}
static Map<String, Object> readOrderedMap(XContentParser parser) throws IOException {
return readMap(parser, ORDERED_MAP_FACTORY);
// Read a map without bounds checks from a parser that is assumed to be at the map's first field's name token
private static Map<String, Object> readMapEntries(XContentParser parser, Supplier<Map<String, Object>> mapFactory,
Map<String, Object> map) throws IOException {
assert parser.currentToken() == Token.FIELD_NAME : "Expected field name but saw [" + parser.currentToken() + "]";
do {
// Must point to field name
String fieldName = parser.currentName();
// And then the value...
Object value = readValueUnsafe(parser.nextToken(), parser, mapFactory);
map.put(fieldName, value);
} while (parser.nextToken() == Token.FIELD_NAME);
return map;
}
static Map<String, String> readMapStrings(XContentParser parser) throws IOException {
return readGenericMap(parser, SIMPLE_MAP_STRINGS_FACTORY, XContentParser::text);
}
static List<Object> readList(XContentParser parser) throws IOException {
return readList(parser, SIMPLE_MAP_FACTORY);
}
static List<Object> readListOrderedMap(XContentParser parser) throws IOException {
return readList(parser, ORDERED_MAP_FACTORY);
}
static Map<String, Object> readMap(XContentParser parser, Supplier<Map<String, Object>> mapFactory) throws IOException {
return readGenericMap(parser, mapFactory, p -> readValue(p, mapFactory));
}
static <T> Map<String, T> readGenericMap(
XContentParser parser,
Supplier<Map<String, T>> mapFactory,
CheckedFunction<XContentParser, T, IOException> mapValueParser) throws IOException {
Map<String, T> map = mapFactory.get();
XContentParser.Token token = parser.currentToken();
/**
* Checks if the next current token in the supplied parser is a map start for a non-empty map.
* Skips to the next token if the parser does not yet have a current token (i.e. {@link #currentToken()} returns {@code null}) and then
* checks it.
*
* @return true if a map start for a non-empty map is found
*/
private static boolean findNonEmptyMapStart(XContentParser parser) throws IOException {
Token token = parser.currentToken();
if (token == null) {
token = parser.nextToken();
}
if (token == XContentParser.Token.START_OBJECT) {
token = parser.nextToken();
}
for (; token == XContentParser.Token.FIELD_NAME; token = parser.nextToken()) {
// Must point to field name
String fieldName = parser.currentName();
// And then the value...
parser.nextToken();
T value = mapValueParser.apply(parser);
map.put(fieldName, value);
}
return map;
return token == Token.FIELD_NAME;
}
static List<Object> readList(XContentParser parser, Supplier<Map<String, Object>> mapFactory) throws IOException {
XContentParser.Token token = parser.currentToken();
// Skips the current parser to the next array start. Assumes that the parser is either positioned before an array field's name token or
// on the start array token.
private static void skipToListStart(XContentParser parser) throws IOException {
Token token = parser.currentToken();
if (token == null) {
token = parser.nextToken();
}
if (token == XContentParser.Token.FIELD_NAME) {
token = parser.nextToken();
}
if (token == XContentParser.Token.START_ARRAY) {
token = parser.nextToken();
} else {
if (token != XContentParser.Token.START_ARRAY) {
throw new XContentParseException(parser.getTokenLocation(), "Failed to parse list: expecting "
+ XContentParser.Token.START_ARRAY + " but got " + token);
}
}
// read a list without bounds checks, assuming the the current parser is always on an array start
private static List<Object> readListUnsafe(XContentParser parser, Supplier<Map<String, Object>> mapFactory) throws IOException {
assert parser.currentToken() == Token.START_ARRAY;
ArrayList<Object> list = new ArrayList<>();
for (; token != null && token != XContentParser.Token.END_ARRAY; token = parser.nextToken()) {
list.add(readValue(parser, mapFactory));
for (Token token = parser.nextToken(); token != null && token != XContentParser.Token.END_ARRAY; token = parser.nextToken()) {
list.add(readValueUnsafe(token, parser, mapFactory));
}
return list;
}
public static Object readValue(XContentParser parser, Supplier<Map<String, Object>> mapFactory) throws IOException {
switch (parser.currentToken()) {
return readValueUnsafe(parser.currentToken(), parser, mapFactory);
}
/**
* Reads next value from the parser that is assumed to be at the given current token without any additional checks.
*
* @param currentToken current token that the parser is at
* @param parser parser to read from
* @param mapFactory map factory to use for reading objects
*/
private static Object readValueUnsafe(Token currentToken, XContentParser parser,
Supplier<Map<String, Object>> mapFactory) throws IOException {
assert currentToken == parser.currentToken() : "Supplied current token [" + currentToken +
"] is different from actual parser current token [" + parser.currentToken() + "]";
switch (currentToken) {
case VALUE_STRING: return parser.text();
case VALUE_NUMBER: return parser.numberValue();
case VALUE_BOOLEAN: return parser.booleanValue();
case START_OBJECT: return readMap(parser, mapFactory);
case START_ARRAY: return readList(parser, mapFactory);
case START_OBJECT: {
final Map<String, Object> map = mapFactory.get();
return parser.nextToken() != Token.FIELD_NAME ? map : readMapEntries(parser, mapFactory, map);
}
case START_ARRAY: return readListUnsafe(parser, mapFactory);
case VALUE_EMBEDDED_OBJECT: return parser.binaryValue();
case VALUE_NULL:
default: return null;