Terms Factes: Allow to provide regex controlling which terms should be included, closes #277.
This commit is contained in:
parent
03cd2f3772
commit
65284ba2ba
|
@ -30,30 +30,65 @@ import java.util.regex.Pattern;
|
|||
public class Regex {
|
||||
|
||||
public static Pattern compile(String regex, String flags) {
|
||||
int pFlags = 0;
|
||||
if (flags == null) {
|
||||
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
|
||||
if ("CASE_INSENSITIVE".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.CASE_INSENSITIVE;
|
||||
} else if ("MULTILINE".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.MULTILINE;
|
||||
} else if ("DOTALL".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.DOTALL;
|
||||
} else if ("UNICODE_CASE".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.UNICODE_CASE;
|
||||
} else if ("CANON_EQ".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.CANON_EQ;
|
||||
} else if ("UNIX_LINES".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.UNIX_LINES;
|
||||
} else if ("LITERAL".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.LITERAL;
|
||||
} else if ("COMMENTS".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.COMMENTS;
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "] to compile [" + regex + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
int pFlags = flags == null ? 0 : flagsFromString(flags);
|
||||
return Pattern.compile(regex, pFlags);
|
||||
}
|
||||
|
||||
public static int flagsFromString(String flags) {
|
||||
int pFlags = 0;
|
||||
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
|
||||
if (s.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
if ("CASE_INSENSITIVE".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.CASE_INSENSITIVE;
|
||||
} else if ("MULTILINE".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.MULTILINE;
|
||||
} else if ("DOTALL".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.DOTALL;
|
||||
} else if ("UNICODE_CASE".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.UNICODE_CASE;
|
||||
} else if ("CANON_EQ".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.CANON_EQ;
|
||||
} else if ("UNIX_LINES".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.UNIX_LINES;
|
||||
} else if ("LITERAL".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.LITERAL;
|
||||
} else if ("COMMENTS".equalsIgnoreCase(s)) {
|
||||
pFlags |= Pattern.COMMENTS;
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "]");
|
||||
}
|
||||
}
|
||||
return pFlags;
|
||||
}
|
||||
|
||||
public static String flagsToString(int flags) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
|
||||
sb.append("CASE_INSENSITIVE|");
|
||||
}
|
||||
if ((flags & Pattern.MULTILINE) != 0) {
|
||||
sb.append("MULTILINE|");
|
||||
}
|
||||
if ((flags & Pattern.DOTALL) != 0) {
|
||||
sb.append("DOTALL|");
|
||||
}
|
||||
if ((flags & Pattern.UNICODE_CASE) != 0) {
|
||||
sb.append("UNICODE_CASE|");
|
||||
}
|
||||
if ((flags & Pattern.CANON_EQ) != 0) {
|
||||
sb.append("CANON_EQ|");
|
||||
}
|
||||
if ((flags & Pattern.UNIX_LINES) != 0) {
|
||||
sb.append("UNIX_LINES|");
|
||||
}
|
||||
if ((flags & Pattern.LITERAL) != 0) {
|
||||
sb.append("LITERAL|");
|
||||
}
|
||||
if ((flags & Pattern.COMMENTS) != 0) {
|
||||
sb.append("COMMENTS|");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,5 +52,5 @@ public interface Facets extends Iterable<Facet> {
|
|||
/**
|
||||
* A facet of the specified name.
|
||||
*/
|
||||
Facet facet(String name);
|
||||
<T extends Facet> T facet(String name);
|
||||
}
|
||||
|
|
|
@ -100,15 +100,15 @@ public class InternalFacets implements Facets, Streamable, ToXContent, Iterable<
|
|||
/**
|
||||
* Returns the facet by name already casted to the specified type.
|
||||
*/
|
||||
public <T extends Facet> T facet(Class<T> facetType, String name) {
|
||||
@Override public <T extends Facet> T facet(Class<T> facetType, String name) {
|
||||
return facetType.cast(facet(name));
|
||||
}
|
||||
|
||||
/**
|
||||
* A facet of the specified name.
|
||||
*/
|
||||
public Facet facet(String name) {
|
||||
return facetsAsMap().get(name);
|
||||
@SuppressWarnings({"unchecked"}) @Override public <T extends Facet> T facet(String name) {
|
||||
return (T) facetsAsMap().get(name);
|
||||
}
|
||||
|
||||
@Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.search.facets.terms;
|
||||
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.xcontent.builder.XContentBuilder;
|
||||
import org.elasticsearch.index.query.xcontent.XContentFilterBuilder;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilderException;
|
||||
|
@ -33,6 +34,8 @@ public class TermsFacetBuilder extends AbstractFacetBuilder {
|
|||
private String fieldName;
|
||||
private int size = 10;
|
||||
private String[] exclude;
|
||||
private String regex;
|
||||
private int regexFlags = 0;
|
||||
|
||||
public TermsFacetBuilder(String name) {
|
||||
super(name);
|
||||
|
@ -63,6 +66,16 @@ public class TermsFacetBuilder extends AbstractFacetBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public TermsFacetBuilder regex(String regex) {
|
||||
return regex(regex, 0);
|
||||
}
|
||||
|
||||
public TermsFacetBuilder regex(String regex, int flags) {
|
||||
this.regex = regex;
|
||||
this.regexFlags = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
if (fieldName == null) {
|
||||
throw new SearchSourceBuilderException("field must be set on terms facet for facet [" + name + "]");
|
||||
|
@ -79,6 +92,12 @@ public class TermsFacetBuilder extends AbstractFacetBuilder {
|
|||
}
|
||||
builder.endArray();
|
||||
}
|
||||
if (regex != null) {
|
||||
builder.field("regex", regex);
|
||||
if (regexFlags != 0) {
|
||||
builder.field("regex_flags", Regex.flagsToString(regexFlags));
|
||||
}
|
||||
}
|
||||
builder.endObject();
|
||||
|
||||
if (filter != null) {
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.elasticsearch.search.facets.support.AbstractFacetCollector;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Deque;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
|
@ -67,12 +69,15 @@ public class TermsFacetCollector extends AbstractFacetCollector {
|
|||
|
||||
private final ImmutableSet<String> excluded;
|
||||
|
||||
public TermsFacetCollector(String facetName, String fieldName, int size, int numberOfShards, FieldDataCache fieldDataCache, MapperService mapperService, ImmutableSet<String> excluded) {
|
||||
private final Pattern pattern;
|
||||
|
||||
public TermsFacetCollector(String facetName, String fieldName, int size, int numberOfShards, FieldDataCache fieldDataCache, MapperService mapperService, ImmutableSet<String> excluded, Pattern pattern) {
|
||||
super(facetName);
|
||||
this.fieldDataCache = fieldDataCache;
|
||||
this.size = size;
|
||||
this.numberOfShards = numberOfShards;
|
||||
this.excluded = excluded;
|
||||
this.pattern = pattern;
|
||||
|
||||
FieldMapper mapper = mapperService.smartNameFieldMapper(fieldName);
|
||||
this.fieldName = fieldName;
|
||||
|
@ -83,10 +88,10 @@ public class TermsFacetCollector extends AbstractFacetCollector {
|
|||
this.indexFieldName = fieldName;
|
||||
this.fieldDataType = FieldData.Type.STRING;
|
||||
}
|
||||
if (excluded.isEmpty()) {
|
||||
if (excluded.isEmpty() && pattern == null) {
|
||||
aggregator = new StaticAggregatorValueProc(popFacets());
|
||||
} else {
|
||||
aggregator = new AggregatorValueProc(popFacets());
|
||||
aggregator = new AggregatorValueProc(popFacets(), excluded, pattern);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,12 +140,21 @@ public class TermsFacetCollector extends AbstractFacetCollector {
|
|||
|
||||
public class AggregatorValueProc extends StaticAggregatorValueProc {
|
||||
|
||||
public AggregatorValueProc(TObjectIntHashMap<String> facets) {
|
||||
private final ImmutableSet<String> excluded;
|
||||
|
||||
private final Matcher matcher;
|
||||
|
||||
public AggregatorValueProc(TObjectIntHashMap<String> facets, ImmutableSet<String> excluded, Pattern pattern) {
|
||||
super(facets);
|
||||
this.excluded = excluded;
|
||||
this.matcher = pattern != null ? pattern.matcher("") : null;
|
||||
}
|
||||
|
||||
@Override public void onValue(int docId, String value) {
|
||||
if (excluded.contains(value)) {
|
||||
if (excluded != null && excluded.contains(value)) {
|
||||
return;
|
||||
}
|
||||
if (matcher != null && !matcher.reset(value).matches()) {
|
||||
return;
|
||||
}
|
||||
super.onValue(docId, value);
|
||||
|
|
|
@ -20,12 +20,14 @@
|
|||
package org.elasticsearch.search.facets.terms;
|
||||
|
||||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.search.facets.collector.FacetCollector;
|
||||
import org.elasticsearch.search.facets.collector.FacetCollectorParser;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
|
@ -45,6 +47,8 @@ public class TermsFacetCollectorParser implements FacetCollectorParser {
|
|||
String fieldName = null;
|
||||
XContentParser.Token token;
|
||||
ImmutableSet<String> excluded = ImmutableSet.of();
|
||||
String regex = null;
|
||||
String regexFlags = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
fieldName = parser.currentName();
|
||||
|
@ -61,9 +65,17 @@ public class TermsFacetCollectorParser implements FacetCollectorParser {
|
|||
field = parser.text();
|
||||
} else if ("size".equals(fieldName)) {
|
||||
size = parser.intValue();
|
||||
} else if ("regex".equals(fieldName)) {
|
||||
regex = parser.text();
|
||||
} else if ("regex_flags".equals(fieldName) || "regexFlags".equals(fieldName)) {
|
||||
regexFlags = parser.text();
|
||||
}
|
||||
}
|
||||
}
|
||||
return new TermsFacetCollector(facetName, field, size, context.numberOfShards(), context.fieldDataCache(), context.mapperService(), excluded);
|
||||
Pattern pattern = null;
|
||||
if (regex != null) {
|
||||
pattern = Regex.compile(regex, regexFlags);
|
||||
}
|
||||
return new TermsFacetCollector(facetName, field, size, context.numberOfShards(), context.fieldDataCache(), context.mapperService(), excluded, pattern);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,7 +92,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
|
||||
assertThat(searchResponse.hits().hits().length, equalTo(0));
|
||||
|
||||
TermsFacet facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
TermsFacet facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(1));
|
||||
assertThat(facet.entries().get(0).term(), equalTo("111"));
|
||||
|
@ -108,7 +108,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
|
||||
assertThat(searchResponse.hits().hits().length, equalTo(0));
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(1));
|
||||
assertThat(facet.entries().get(0).term(), equalTo("111"));
|
||||
|
@ -143,13 +143,13 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet2").field("tag").size(10))
|
||||
.execute().actionGet();
|
||||
|
||||
TermsFacet facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
TermsFacet facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(1));
|
||||
assertThat(facet.entries().get(0).term(), equalTo("111"));
|
||||
assertThat(facet.entries().get(0).count(), equalTo(2));
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet2");
|
||||
facet = searchResponse.facets().facet("facet2");
|
||||
assertThat(facet.name(), equalTo("facet2"));
|
||||
assertThat(facet.entries().size(), equalTo(3));
|
||||
assertThat(facet.entries().get(0).term(), equalTo("yyy"));
|
||||
|
@ -160,7 +160,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet1").field("stag").size(10).filter(termFilter("tag", "xxx")))
|
||||
.execute().actionGet();
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(1));
|
||||
assertThat(facet.entries().get(0).term(), equalTo("111"));
|
||||
|
@ -171,7 +171,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet1").field("tag").size(10))
|
||||
.execute().actionGet();
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(3));
|
||||
assertThat(facet.entries().get(0).term(), equalTo("yyy"));
|
||||
|
@ -186,7 +186,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet1").field("tag").size(2))
|
||||
.execute().actionGet();
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(2));
|
||||
assertThat(facet.entries().get(0).term(), equalTo("yyy"));
|
||||
|
@ -199,7 +199,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet1").field("tag").size(10).exclude("yyy"))
|
||||
.execute().actionGet();
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(2));
|
||||
assertThat(facet.entries().get(0).term(), anyOf(equalTo("xxx"), equalTo("zzz")));
|
||||
|
@ -243,7 +243,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet1").field("text").size(3))
|
||||
.execute().actionGet();
|
||||
|
||||
TermsFacet facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
TermsFacet facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(3));
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
@ -256,7 +256,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet1").field("text").size(2))
|
||||
.execute().actionGet();
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(2));
|
||||
for (int i = 0; i < 2; i++) {
|
||||
|
@ -269,7 +269,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
.addFacet(termsFacet("facet1").field("text").size(1))
|
||||
.execute().actionGet();
|
||||
|
||||
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
|
||||
facet = searchResponse.facets().facet("facet1");
|
||||
assertThat(facet.name(), equalTo("facet1"));
|
||||
assertThat(facet.entries().size(), equalTo(1));
|
||||
for (int i = 0; i < 1; i++) {
|
||||
|
@ -314,7 +314,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
}
|
||||
assertThat(searchResponse.failedShards(), equalTo(0));
|
||||
|
||||
StatisticalFacet facet = searchResponse.facets().facet(StatisticalFacet.class, "stats1");
|
||||
StatisticalFacet facet = searchResponse.facets().facet("stats1");
|
||||
assertThat(facet.name(), equalTo(facet.name()));
|
||||
assertThat(facet.count(), equalTo(2l));
|
||||
assertThat(facet.total(), equalTo(3d));
|
||||
|
@ -323,7 +323,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
assertThat(facet.mean(), equalTo(1.5d));
|
||||
assertThat(facet.sumOfSquares(), equalTo(5d));
|
||||
|
||||
facet = searchResponse.facets().facet(StatisticalFacet.class, "stats2");
|
||||
facet = searchResponse.facets().facet("stats2");
|
||||
assertThat(facet.name(), equalTo(facet.name()));
|
||||
assertThat(facet.count(), equalTo(4l));
|
||||
assertThat(facet.total(), equalTo(10d));
|
||||
|
@ -331,7 +331,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
assertThat(facet.max(), equalTo(4d));
|
||||
assertThat(facet.mean(), equalTo(2.5d));
|
||||
|
||||
facet = searchResponse.facets().facet(StatisticalFacet.class, "stats3");
|
||||
facet = searchResponse.facets().facet("stats3");
|
||||
assertThat(facet.name(), equalTo(facet.name()));
|
||||
assertThat(facet.count(), equalTo(2l));
|
||||
assertThat(facet.total(), equalTo(6d));
|
||||
|
@ -387,7 +387,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
}
|
||||
assertThat(searchResponse.failedShards(), equalTo(0));
|
||||
|
||||
HistogramFacet facet = searchResponse.facets().facet(HistogramFacet.class, "stats1");
|
||||
HistogramFacet facet = searchResponse.facets().facet("stats1");
|
||||
assertThat(facet.name(), equalTo("stats1"));
|
||||
assertThat(facet.entries().size(), equalTo(2));
|
||||
assertThat(facet.entries().get(0).key(), equalTo(1000l));
|
||||
|
@ -399,7 +399,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
assertThat(facet.entries().get(1).total(), equalTo(1175d));
|
||||
assertThat(facet.entries().get(1).mean(), equalTo(1175d));
|
||||
|
||||
facet = searchResponse.facets().facet(HistogramFacet.class, "stats2");
|
||||
facet = searchResponse.facets().facet("stats2");
|
||||
assertThat(facet.name(), equalTo("stats2"));
|
||||
assertThat(facet.entries().size(), equalTo(3));
|
||||
assertThat(facet.entries().get(0).key(), equalTo(10l));
|
||||
|
@ -415,7 +415,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
assertThat(facet.entries().get(2).total(), equalTo(31d));
|
||||
assertThat(facet.entries().get(2).mean(), equalTo(31d));
|
||||
|
||||
facet = searchResponse.facets().facet(HistogramFacet.class, "stats3");
|
||||
facet = searchResponse.facets().facet("stats3");
|
||||
assertThat(facet.name(), equalTo("stats3"));
|
||||
assertThat(facet.entries().size(), equalTo(2));
|
||||
assertThat(facet.entries().get(0).key(), equalTo(1000l));
|
||||
|
@ -427,7 +427,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
|
|||
assertThat(facet.entries().get(1).total(), equalTo(42d));
|
||||
assertThat(facet.entries().get(1).mean(), equalTo(21d));
|
||||
|
||||
facet = searchResponse.facets().facet(HistogramFacet.class, "stats4");
|
||||
facet = searchResponse.facets().facet("stats4");
|
||||
assertThat(facet.name(), equalTo("stats4"));
|
||||
assertThat(facet.entries().size(), equalTo(2));
|
||||
assertThat(facet.entries().get(0).key(), equalTo(0l));
|
||||
|
|
Loading…
Reference in New Issue