Terms Factes: Allow to provide regex controlling which terms should be included, closes #277.

This commit is contained in:
kimchy 2010-07-26 12:23:03 +03:00
parent 03cd2f3772
commit 65284ba2ba
7 changed files with 132 additions and 52 deletions

View File

@ -30,30 +30,65 @@ import java.util.regex.Pattern;
public class Regex {
public static Pattern compile(String regex, String flags) {
int pFlags = 0;
if (flags == null) {
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
if ("CASE_INSENSITIVE".equalsIgnoreCase(s)) {
pFlags |= Pattern.CASE_INSENSITIVE;
} else if ("MULTILINE".equalsIgnoreCase(s)) {
pFlags |= Pattern.MULTILINE;
} else if ("DOTALL".equalsIgnoreCase(s)) {
pFlags |= Pattern.DOTALL;
} else if ("UNICODE_CASE".equalsIgnoreCase(s)) {
pFlags |= Pattern.UNICODE_CASE;
} else if ("CANON_EQ".equalsIgnoreCase(s)) {
pFlags |= Pattern.CANON_EQ;
} else if ("UNIX_LINES".equalsIgnoreCase(s)) {
pFlags |= Pattern.UNIX_LINES;
} else if ("LITERAL".equalsIgnoreCase(s)) {
pFlags |= Pattern.LITERAL;
} else if ("COMMENTS".equalsIgnoreCase(s)) {
pFlags |= Pattern.COMMENTS;
} else {
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "] to compile [" + regex + "]");
}
}
}
int pFlags = flags == null ? 0 : flagsFromString(flags);
return Pattern.compile(regex, pFlags);
}
public static int flagsFromString(String flags) {
int pFlags = 0;
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
if (s.isEmpty()) {
continue;
}
if ("CASE_INSENSITIVE".equalsIgnoreCase(s)) {
pFlags |= Pattern.CASE_INSENSITIVE;
} else if ("MULTILINE".equalsIgnoreCase(s)) {
pFlags |= Pattern.MULTILINE;
} else if ("DOTALL".equalsIgnoreCase(s)) {
pFlags |= Pattern.DOTALL;
} else if ("UNICODE_CASE".equalsIgnoreCase(s)) {
pFlags |= Pattern.UNICODE_CASE;
} else if ("CANON_EQ".equalsIgnoreCase(s)) {
pFlags |= Pattern.CANON_EQ;
} else if ("UNIX_LINES".equalsIgnoreCase(s)) {
pFlags |= Pattern.UNIX_LINES;
} else if ("LITERAL".equalsIgnoreCase(s)) {
pFlags |= Pattern.LITERAL;
} else if ("COMMENTS".equalsIgnoreCase(s)) {
pFlags |= Pattern.COMMENTS;
} else {
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "]");
}
}
return pFlags;
}
public static String flagsToString(int flags) {
StringBuilder sb = new StringBuilder();
if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
sb.append("CASE_INSENSITIVE|");
}
if ((flags & Pattern.MULTILINE) != 0) {
sb.append("MULTILINE|");
}
if ((flags & Pattern.DOTALL) != 0) {
sb.append("DOTALL|");
}
if ((flags & Pattern.UNICODE_CASE) != 0) {
sb.append("UNICODE_CASE|");
}
if ((flags & Pattern.CANON_EQ) != 0) {
sb.append("CANON_EQ|");
}
if ((flags & Pattern.UNIX_LINES) != 0) {
sb.append("UNIX_LINES|");
}
if ((flags & Pattern.LITERAL) != 0) {
sb.append("LITERAL|");
}
if ((flags & Pattern.COMMENTS) != 0) {
sb.append("COMMENTS|");
}
return sb.toString();
}
}

View File

@ -52,5 +52,5 @@ public interface Facets extends Iterable<Facet> {
/**
* A facet of the specified name.
*/
Facet facet(String name);
<T extends Facet> T facet(String name);
}

View File

@ -100,15 +100,15 @@ public class InternalFacets implements Facets, Streamable, ToXContent, Iterable<
/**
* Returns the facet by name already casted to the specified type.
*/
public <T extends Facet> T facet(Class<T> facetType, String name) {
@Override public <T extends Facet> T facet(Class<T> facetType, String name) {
return facetType.cast(facet(name));
}
/**
* A facet of the specified name.
*/
public Facet facet(String name) {
return facetsAsMap().get(name);
@SuppressWarnings({"unchecked"}) @Override public <T extends Facet> T facet(String name) {
return (T) facetsAsMap().get(name);
}
@Override public void toXContent(XContentBuilder builder, Params params) throws IOException {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.facets.terms;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.builder.XContentBuilder;
import org.elasticsearch.index.query.xcontent.XContentFilterBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilderException;
@ -33,6 +34,8 @@ public class TermsFacetBuilder extends AbstractFacetBuilder {
private String fieldName;
private int size = 10;
private String[] exclude;
private String regex;
private int regexFlags = 0;
public TermsFacetBuilder(String name) {
super(name);
@ -63,6 +66,16 @@ public class TermsFacetBuilder extends AbstractFacetBuilder {
return this;
}
public TermsFacetBuilder regex(String regex) {
return regex(regex, 0);
}
public TermsFacetBuilder regex(String regex, int flags) {
this.regex = regex;
this.regexFlags = flags;
return this;
}
@Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
if (fieldName == null) {
throw new SearchSourceBuilderException("field must be set on terms facet for facet [" + name + "]");
@ -79,6 +92,12 @@ public class TermsFacetBuilder extends AbstractFacetBuilder {
}
builder.endArray();
}
if (regex != null) {
builder.field("regex", regex);
if (regexFlags != 0) {
builder.field("regex_flags", Regex.flagsToString(regexFlags));
}
}
builder.endObject();
if (filter != null) {

View File

@ -36,6 +36,8 @@ import org.elasticsearch.search.facets.support.AbstractFacetCollector;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author kimchy (shay.banon)
@ -67,12 +69,15 @@ public class TermsFacetCollector extends AbstractFacetCollector {
private final ImmutableSet<String> excluded;
public TermsFacetCollector(String facetName, String fieldName, int size, int numberOfShards, FieldDataCache fieldDataCache, MapperService mapperService, ImmutableSet<String> excluded) {
private final Pattern pattern;
public TermsFacetCollector(String facetName, String fieldName, int size, int numberOfShards, FieldDataCache fieldDataCache, MapperService mapperService, ImmutableSet<String> excluded, Pattern pattern) {
super(facetName);
this.fieldDataCache = fieldDataCache;
this.size = size;
this.numberOfShards = numberOfShards;
this.excluded = excluded;
this.pattern = pattern;
FieldMapper mapper = mapperService.smartNameFieldMapper(fieldName);
this.fieldName = fieldName;
@ -83,10 +88,10 @@ public class TermsFacetCollector extends AbstractFacetCollector {
this.indexFieldName = fieldName;
this.fieldDataType = FieldData.Type.STRING;
}
if (excluded.isEmpty()) {
if (excluded.isEmpty() && pattern == null) {
aggregator = new StaticAggregatorValueProc(popFacets());
} else {
aggregator = new AggregatorValueProc(popFacets());
aggregator = new AggregatorValueProc(popFacets(), excluded, pattern);
}
}
@ -135,12 +140,21 @@ public class TermsFacetCollector extends AbstractFacetCollector {
public class AggregatorValueProc extends StaticAggregatorValueProc {
public AggregatorValueProc(TObjectIntHashMap<String> facets) {
private final ImmutableSet<String> excluded;
private final Matcher matcher;
public AggregatorValueProc(TObjectIntHashMap<String> facets, ImmutableSet<String> excluded, Pattern pattern) {
super(facets);
this.excluded = excluded;
this.matcher = pattern != null ? pattern.matcher("") : null;
}
@Override public void onValue(int docId, String value) {
if (excluded.contains(value)) {
if (excluded != null && excluded.contains(value)) {
return;
}
if (matcher != null && !matcher.reset(value).matches()) {
return;
}
super.onValue(docId, value);

View File

@ -20,12 +20,14 @@
package org.elasticsearch.search.facets.terms;
import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.facets.collector.FacetCollector;
import org.elasticsearch.search.facets.collector.FacetCollectorParser;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.regex.Pattern;
/**
* @author kimchy (shay.banon)
@ -45,6 +47,8 @@ public class TermsFacetCollectorParser implements FacetCollectorParser {
String fieldName = null;
XContentParser.Token token;
ImmutableSet<String> excluded = ImmutableSet.of();
String regex = null;
String regexFlags = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
fieldName = parser.currentName();
@ -61,9 +65,17 @@ public class TermsFacetCollectorParser implements FacetCollectorParser {
field = parser.text();
} else if ("size".equals(fieldName)) {
size = parser.intValue();
} else if ("regex".equals(fieldName)) {
regex = parser.text();
} else if ("regex_flags".equals(fieldName) || "regexFlags".equals(fieldName)) {
regexFlags = parser.text();
}
}
}
return new TermsFacetCollector(facetName, field, size, context.numberOfShards(), context.fieldDataCache(), context.mapperService(), excluded);
Pattern pattern = null;
if (regex != null) {
pattern = Regex.compile(regex, regexFlags);
}
return new TermsFacetCollector(facetName, field, size, context.numberOfShards(), context.fieldDataCache(), context.mapperService(), excluded, pattern);
}
}

View File

@ -92,7 +92,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(searchResponse.hits().hits().length, equalTo(0));
TermsFacet facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
TermsFacet facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term(), equalTo("111"));
@ -108,7 +108,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(searchResponse.hits().hits().length, equalTo(0));
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term(), equalTo("111"));
@ -143,13 +143,13 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet2").field("tag").size(10))
.execute().actionGet();
TermsFacet facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
TermsFacet facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term(), equalTo("111"));
assertThat(facet.entries().get(0).count(), equalTo(2));
facet = searchResponse.facets().facet(TermsFacet.class, "facet2");
facet = searchResponse.facets().facet("facet2");
assertThat(facet.name(), equalTo("facet2"));
assertThat(facet.entries().size(), equalTo(3));
assertThat(facet.entries().get(0).term(), equalTo("yyy"));
@ -160,7 +160,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet1").field("stag").size(10).filter(termFilter("tag", "xxx")))
.execute().actionGet();
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
assertThat(facet.entries().get(0).term(), equalTo("111"));
@ -171,7 +171,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet1").field("tag").size(10))
.execute().actionGet();
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(3));
assertThat(facet.entries().get(0).term(), equalTo("yyy"));
@ -186,7 +186,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet1").field("tag").size(2))
.execute().actionGet();
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(2));
assertThat(facet.entries().get(0).term(), equalTo("yyy"));
@ -199,7 +199,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet1").field("tag").size(10).exclude("yyy"))
.execute().actionGet();
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(2));
assertThat(facet.entries().get(0).term(), anyOf(equalTo("xxx"), equalTo("zzz")));
@ -243,7 +243,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet1").field("text").size(3))
.execute().actionGet();
TermsFacet facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
TermsFacet facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(3));
for (int i = 0; i < 3; i++) {
@ -256,7 +256,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet1").field("text").size(2))
.execute().actionGet();
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(2));
for (int i = 0; i < 2; i++) {
@ -269,7 +269,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
.addFacet(termsFacet("facet1").field("text").size(1))
.execute().actionGet();
facet = searchResponse.facets().facet(TermsFacet.class, "facet1");
facet = searchResponse.facets().facet("facet1");
assertThat(facet.name(), equalTo("facet1"));
assertThat(facet.entries().size(), equalTo(1));
for (int i = 0; i < 1; i++) {
@ -314,7 +314,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
}
assertThat(searchResponse.failedShards(), equalTo(0));
StatisticalFacet facet = searchResponse.facets().facet(StatisticalFacet.class, "stats1");
StatisticalFacet facet = searchResponse.facets().facet("stats1");
assertThat(facet.name(), equalTo(facet.name()));
assertThat(facet.count(), equalTo(2l));
assertThat(facet.total(), equalTo(3d));
@ -323,7 +323,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.mean(), equalTo(1.5d));
assertThat(facet.sumOfSquares(), equalTo(5d));
facet = searchResponse.facets().facet(StatisticalFacet.class, "stats2");
facet = searchResponse.facets().facet("stats2");
assertThat(facet.name(), equalTo(facet.name()));
assertThat(facet.count(), equalTo(4l));
assertThat(facet.total(), equalTo(10d));
@ -331,7 +331,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.max(), equalTo(4d));
assertThat(facet.mean(), equalTo(2.5d));
facet = searchResponse.facets().facet(StatisticalFacet.class, "stats3");
facet = searchResponse.facets().facet("stats3");
assertThat(facet.name(), equalTo(facet.name()));
assertThat(facet.count(), equalTo(2l));
assertThat(facet.total(), equalTo(6d));
@ -387,7 +387,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
}
assertThat(searchResponse.failedShards(), equalTo(0));
HistogramFacet facet = searchResponse.facets().facet(HistogramFacet.class, "stats1");
HistogramFacet facet = searchResponse.facets().facet("stats1");
assertThat(facet.name(), equalTo("stats1"));
assertThat(facet.entries().size(), equalTo(2));
assertThat(facet.entries().get(0).key(), equalTo(1000l));
@ -399,7 +399,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.entries().get(1).total(), equalTo(1175d));
assertThat(facet.entries().get(1).mean(), equalTo(1175d));
facet = searchResponse.facets().facet(HistogramFacet.class, "stats2");
facet = searchResponse.facets().facet("stats2");
assertThat(facet.name(), equalTo("stats2"));
assertThat(facet.entries().size(), equalTo(3));
assertThat(facet.entries().get(0).key(), equalTo(10l));
@ -415,7 +415,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.entries().get(2).total(), equalTo(31d));
assertThat(facet.entries().get(2).mean(), equalTo(31d));
facet = searchResponse.facets().facet(HistogramFacet.class, "stats3");
facet = searchResponse.facets().facet("stats3");
assertThat(facet.name(), equalTo("stats3"));
assertThat(facet.entries().size(), equalTo(2));
assertThat(facet.entries().get(0).key(), equalTo(1000l));
@ -427,7 +427,7 @@ public class SimpleFacetsTests extends AbstractNodesTests {
assertThat(facet.entries().get(1).total(), equalTo(42d));
assertThat(facet.entries().get(1).mean(), equalTo(21d));
facet = searchResponse.facets().facet(HistogramFacet.class, "stats4");
facet = searchResponse.facets().facet("stats4");
assertThat(facet.name(), equalTo("stats4"));
assertThat(facet.entries().size(), equalTo(2));
assertThat(facet.entries().get(0).key(), equalTo(0l));