Source filtering should treat dots in field names as sub objects. (#20736)
Mappings treat dots in field names as sub objects, for instance ``` { "a.b": "c" } ``` generates the same dynamic mappings as ``` { "a": { "b": "c" } } ``` Source filtering should be consistent with this behaviour so that an include list containing `a` should include fields whose name is `a.b`. To make this change easier, source filtering was refactored to use automata. The ability to treat dots in field names as sub objects is provided by the `makeMatchDotsInFieldNames` method of `XContentMapValues`. Closes #20719
This commit is contained in:
parent
741ecf80ff
commit
8ab7ca5284
|
@ -19,8 +19,13 @@
|
|||
|
||||
package org.elasticsearch.common.regex;
|
||||
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -46,6 +51,33 @@ public class Regex {
|
|||
return str.equals("*");
|
||||
}
|
||||
|
||||
/** Return an {@link Automaton} that matches the given pattern. */
|
||||
public static Automaton simpleMatchToAutomaton(String pattern) {
|
||||
List<Automaton> automata = new ArrayList<>();
|
||||
int previous = 0;
|
||||
for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
|
||||
automata.add(Automata.makeString(pattern.substring(previous, i)));
|
||||
automata.add(Automata.makeAnyString());
|
||||
previous = i + 1;
|
||||
}
|
||||
automata.add(Automata.makeString(pattern.substring(previous)));
|
||||
return Operations.concatenate(automata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an Automaton that matches the union of the provided patterns.
|
||||
*/
|
||||
public static Automaton simpleMatchToAutomaton(String... patterns) {
|
||||
if (patterns.length < 1) {
|
||||
throw new IllegalArgumentException("There must be at least one pattern, zero given");
|
||||
}
|
||||
List<Automaton> automata = new ArrayList<>();
|
||||
for (String pattern : patterns) {
|
||||
automata.add(simpleMatchToAutomaton(pattern));
|
||||
}
|
||||
return Operations.union(automata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match a String against the given pattern, supporting the following simple
|
||||
* pattern styles: "xxx*", "*xxx", "*xxx*" and "xxx*yyy" matches (with an
|
||||
|
|
|
@ -19,12 +19,17 @@
|
|||
|
||||
package org.elasticsearch.common.xcontent.support;
|
||||
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -134,115 +139,171 @@ public class XContentMapValues {
|
|||
return null;
|
||||
}
|
||||
|
||||
public static Map<String, Object> filter(Map<String, Object> map, String[] includes, String[] excludes) {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
filter(map, result, includes == null ? Strings.EMPTY_ARRAY : includes, excludes == null ? Strings.EMPTY_ARRAY : excludes, new StringBuilder());
|
||||
return result;
|
||||
/**
|
||||
* Only keep properties in {@code map} that match the {@code includes} but
|
||||
* not the {@code excludes}. An empty list of includes is interpreted as a
|
||||
* wildcard while an empty list of excludes does not match anything.
|
||||
*
|
||||
* If a property matches both an include and an exclude, then the exclude
|
||||
* wins.
|
||||
*
|
||||
* If an object matches, then any of its sub properties are automatically
|
||||
* considered as matching as well, both for includes and excludes.
|
||||
*
|
||||
* Dots in field names are treated as sub objects. So for instance if a
|
||||
* document contains {@code a.b} as a property and {@code a} is an include,
|
||||
* then {@code a.b} will be kept in the filtered map.
|
||||
*/
|
||||
public static Map<String, Object> filter(Map<String, ?> map, String[] includes, String[] excludes) {
|
||||
CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());
|
||||
|
||||
CharacterRunAutomaton include;
|
||||
if (includes == null || includes.length == 0) {
|
||||
include = matchAllAutomaton;
|
||||
} else {
|
||||
Automaton includeA = Regex.simpleMatchToAutomaton(includes);
|
||||
includeA = makeMatchDotsInFieldNames(includeA);
|
||||
include = new CharacterRunAutomaton(includeA);
|
||||
}
|
||||
|
||||
private static void filter(Map<String, Object> map, Map<String, Object> into, String[] includes, String[] excludes, StringBuilder sb) {
|
||||
if (includes.length == 0 && excludes.length == 0) {
|
||||
into.putAll(map);
|
||||
return;
|
||||
Automaton excludeA;
|
||||
if (excludes == null || excludes.length == 0) {
|
||||
excludeA = Automata.makeEmpty();
|
||||
} else {
|
||||
excludeA = Regex.simpleMatchToAutomaton(excludes);
|
||||
excludeA = makeMatchDotsInFieldNames(excludeA);
|
||||
}
|
||||
for (Map.Entry<String, Object> entry : map.entrySet()) {
|
||||
CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA);
|
||||
|
||||
// NOTE: We cannot use Operations.minus because of the special case that
|
||||
// we want all sub properties to match as soon as an object matches
|
||||
|
||||
return filter(map,
|
||||
include, include.getInitialState(),
|
||||
exclude, exclude.getInitialState(),
|
||||
matchAllAutomaton);
|
||||
}
|
||||
|
||||
/** Make matches on objects also match dots in field names.
|
||||
* For instance, if the original simple regex is `foo`, this will translate
|
||||
* it into `foo` OR `foo.*`. */
|
||||
private static Automaton makeMatchDotsInFieldNames(Automaton automaton) {
|
||||
return Operations.union(
|
||||
automaton,
|
||||
Operations.concatenate(Arrays.asList(automaton, Automata.makeChar('.'), Automata.makeAnyString())));
|
||||
}
|
||||
|
||||
private static int step(CharacterRunAutomaton automaton, String key, int state) {
|
||||
for (int i = 0; state != -1 && i < key.length(); ++i) {
|
||||
state = automaton.step(state, key.charAt(i));
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
private static Map<String, Object> filter(Map<String, ?> map,
|
||||
CharacterRunAutomaton includeAutomaton, int initialIncludeState,
|
||||
CharacterRunAutomaton excludeAutomaton, int initialExcludeState,
|
||||
CharacterRunAutomaton matchAllAutomaton) {
|
||||
Map<String, Object> filtered = new HashMap<>();
|
||||
for (Map.Entry<String, ?> entry : map.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
int mark = sb.length();
|
||||
if (sb.length() > 0) {
|
||||
sb.append('.');
|
||||
}
|
||||
sb.append(key);
|
||||
String path = sb.toString();
|
||||
|
||||
if (Regex.simpleMatch(excludes, path)) {
|
||||
sb.setLength(mark);
|
||||
int includeState = step(includeAutomaton, key, initialIncludeState);
|
||||
if (includeState == -1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean exactIncludeMatch = false; // true if the current position was specifically mentioned
|
||||
boolean pathIsPrefixOfAnInclude = false; // true if potentially a sub scope can be included
|
||||
if (includes.length == 0) {
|
||||
// implied match anything
|
||||
exactIncludeMatch = true;
|
||||
int excludeState = step(excludeAutomaton, key, initialExcludeState);
|
||||
if (excludeState != -1 && excludeAutomaton.isAccept(excludeState)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Object value = entry.getValue();
|
||||
|
||||
CharacterRunAutomaton subIncludeAutomaton = includeAutomaton;
|
||||
int subIncludeState = includeState;
|
||||
if (includeAutomaton.isAccept(includeState)) {
|
||||
if (excludeState == -1 || excludeAutomaton.step(excludeState, '.') == -1) {
|
||||
// the exclude has no chances to match inner properties
|
||||
filtered.put(key, value);
|
||||
continue;
|
||||
} else {
|
||||
for (String include : includes) {
|
||||
// check for prefix matches as well to see if we need to zero in, something like: obj1.arr1.* or *.field
|
||||
// note, this does not work well with middle matches, like obj1.*.obj3
|
||||
if (include.charAt(0) == '*') {
|
||||
if (Regex.simpleMatch(include, path)) {
|
||||
exactIncludeMatch = true;
|
||||
break;
|
||||
// the object matched, so consider that the include matches every inner property
|
||||
// we only care about excludes now
|
||||
subIncludeAutomaton = matchAllAutomaton;
|
||||
subIncludeState = includeAutomaton.getInitialState();
|
||||
}
|
||||
pathIsPrefixOfAnInclude = true;
|
||||
}
|
||||
|
||||
if (value instanceof Map) {
|
||||
|
||||
subIncludeState = subIncludeAutomaton.step(subIncludeState, '.');
|
||||
if (subIncludeState == -1) {
|
||||
continue;
|
||||
}
|
||||
if (include.startsWith(path)) {
|
||||
if (include.length() == path.length()) {
|
||||
exactIncludeMatch = true;
|
||||
break;
|
||||
} else if (include.length() > path.length() && include.charAt(path.length()) == '.') {
|
||||
// include might may match deeper paths. Dive deeper.
|
||||
pathIsPrefixOfAnInclude = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (Regex.simpleMatch(include, path)) {
|
||||
exactIncludeMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (excludeState != -1) {
|
||||
excludeState = excludeAutomaton.step(excludeState, '.');
|
||||
}
|
||||
|
||||
if (!(pathIsPrefixOfAnInclude || exactIncludeMatch)) {
|
||||
// skip subkeys, not interesting.
|
||||
sb.setLength(mark);
|
||||
continue;
|
||||
Map<String, Object> valueAsMap = (Map<String, Object>) value;
|
||||
Map<String, Object> filteredValue = filter(valueAsMap,
|
||||
subIncludeAutomaton, subIncludeState, excludeAutomaton, excludeState, matchAllAutomaton);
|
||||
if (includeAutomaton.isAccept(includeState) || filteredValue.isEmpty() == false) {
|
||||
filtered.put(key, filteredValue);
|
||||
}
|
||||
|
||||
} else if (value instanceof Iterable) {
|
||||
|
||||
if (entry.getValue() instanceof Map) {
|
||||
Map<String, Object> innerInto = new HashMap<>();
|
||||
// if we had an exact match, we want give deeper excludes their chance
|
||||
filter((Map<String, Object>) entry.getValue(), innerInto, exactIncludeMatch ? Strings.EMPTY_ARRAY : includes, excludes, sb);
|
||||
if (exactIncludeMatch || !innerInto.isEmpty()) {
|
||||
into.put(entry.getKey(), innerInto);
|
||||
}
|
||||
} else if (entry.getValue() instanceof List) {
|
||||
List<Object> list = (List<Object>) entry.getValue();
|
||||
List<Object> innerInto = new ArrayList<>(list.size());
|
||||
// if we had an exact match, we want give deeper excludes their chance
|
||||
filter(list, innerInto, exactIncludeMatch ? Strings.EMPTY_ARRAY : includes, excludes, sb);
|
||||
into.put(entry.getKey(), innerInto);
|
||||
} else if (exactIncludeMatch) {
|
||||
into.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
sb.setLength(mark);
|
||||
}
|
||||
List<Object> filteredValue = filter((Iterable<?>) value,
|
||||
subIncludeAutomaton, subIncludeState, excludeAutomaton, excludeState, matchAllAutomaton);
|
||||
if (includeAutomaton.isAccept(includeState) || filteredValue.isEmpty() == false) {
|
||||
filtered.put(key, filteredValue);
|
||||
}
|
||||
|
||||
private static void filter(List<Object> from, List<Object> to, String[] includes, String[] excludes, StringBuilder sb) {
|
||||
if (includes.length == 0 && excludes.length == 0) {
|
||||
to.addAll(from);
|
||||
return;
|
||||
} else {
|
||||
|
||||
// leaf property
|
||||
if (includeAutomaton.isAccept(includeState)
|
||||
&& (excludeState == -1 || excludeAutomaton.isAccept(excludeState) == false)) {
|
||||
filtered.put(key, value);
|
||||
}
|
||||
|
||||
for (Object o : from) {
|
||||
if (o instanceof Map) {
|
||||
Map<String, Object> innerInto = new HashMap<>();
|
||||
filter((Map<String, Object>) o, innerInto, includes, excludes, sb);
|
||||
if (!innerInto.isEmpty()) {
|
||||
to.add(innerInto);
|
||||
}
|
||||
} else if (o instanceof List) {
|
||||
List<Object> innerInto = new ArrayList<>();
|
||||
filter((List<Object>) o, innerInto, includes, excludes, sb);
|
||||
if (!innerInto.isEmpty()) {
|
||||
to.add(innerInto);
|
||||
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
private static List<Object> filter(Iterable<?> iterable,
|
||||
CharacterRunAutomaton includeAutomaton, int initialIncludeState,
|
||||
CharacterRunAutomaton excludeAutomaton, int initialExcludeState,
|
||||
CharacterRunAutomaton matchAllAutomaton) {
|
||||
List<Object> filtered = new ArrayList<>();
|
||||
for (Object value : iterable) {
|
||||
if (value instanceof Map) {
|
||||
int includeState = includeAutomaton.step(initialIncludeState, '.');
|
||||
int excludeState = initialExcludeState;
|
||||
if (excludeState != -1) {
|
||||
excludeState = excludeAutomaton.step(excludeState, '.');
|
||||
}
|
||||
Map<String, Object> filteredValue = filter((Map<String, ?>)value,
|
||||
includeAutomaton, includeState, excludeAutomaton, excludeState, matchAllAutomaton);
|
||||
if (filteredValue.isEmpty() == false) {
|
||||
filtered.add(filteredValue);
|
||||
}
|
||||
} else if (value instanceof Iterable) {
|
||||
List<Object> filteredValue = filter((Iterable<?>) value,
|
||||
includeAutomaton, initialIncludeState, excludeAutomaton, initialExcludeState, matchAllAutomaton);
|
||||
if (filteredValue.isEmpty() == false) {
|
||||
filtered.add(filteredValue);
|
||||
}
|
||||
} else {
|
||||
to.add(o);
|
||||
// TODO: we have tests relying on this behavior on arrays even
|
||||
// if the path does not match, but this looks like a bug?
|
||||
filtered.add(value);
|
||||
}
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
public static boolean isObject(Object node) {
|
||||
|
|
|
@ -551,4 +551,43 @@ public class XContentMapValuesTests extends ESTestCase {
|
|||
parser.list());
|
||||
}
|
||||
}
|
||||
|
||||
public void testDotsInFieldNames() {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("foo.bar", 2);
|
||||
Map<String, Object> sub = new HashMap<>();
|
||||
sub.put("baz", 3);
|
||||
map.put("foo", sub);
|
||||
map.put("quux", 5);
|
||||
|
||||
// dots in field names in includes
|
||||
Map<String, Object> filtered = XContentMapValues.filter(map, new String[] {"foo"}, new String[0]);
|
||||
Map<String, Object> expected = new HashMap<>(map);
|
||||
expected.remove("quux");
|
||||
assertEquals(expected, filtered);
|
||||
|
||||
// dots in field names in excludes
|
||||
filtered = XContentMapValues.filter(map, new String[0], new String[] {"foo"});
|
||||
expected = new HashMap<>(map);
|
||||
expected.keySet().retainAll(Collections.singleton("quux"));
|
||||
assertEquals(expected, filtered);
|
||||
}
|
||||
|
||||
public void testSupplementaryCharactersInPaths() {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("搜索", 2);
|
||||
map.put("指数", 3);
|
||||
|
||||
assertEquals(Collections.singletonMap("搜索", 2), XContentMapValues.filter(map, new String[] {"搜索"}, new String[0]));
|
||||
assertEquals(Collections.singletonMap("指数", 3), XContentMapValues.filter(map, new String[0], new String[] {"搜索"}));
|
||||
}
|
||||
|
||||
public void testSharedPrefixes() {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("foobar", 2);
|
||||
map.put("foobaz", 3);
|
||||
|
||||
assertEquals(Collections.singletonMap("foobar", 2), XContentMapValues.filter(map, new String[] {"foobar"}, new String[0]));
|
||||
assertEquals(Collections.singletonMap("foobaz", 3), XContentMapValues.filter(map, new String[0], new String[] {"foobar"}));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue