Added an extended test for terms facet with a decent number of documents / field values and randomly tests various options. Also fixed an issue where `regex` and `excludes` were ignored when `all_terms` was used.

This commit is contained in:
Martijn van Groningen 2013-04-05 10:26:53 +02:00
parent 4b1ec037f8
commit 224faffead
3 changed files with 343 additions and 4 deletions

View File

@ -18,9 +18,7 @@
*/
package org.elasticsearch.search.facet.terms.strings;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.collect.ImmutableSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CharsRef;
@ -28,7 +26,8 @@ import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.script.SearchScript;
import com.google.common.collect.ImmutableSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class HashedScriptAggregator extends HashedAggregator {
@ -51,6 +50,22 @@ public final class HashedScriptAggregator extends HashedAggregator {
this.convert = script != null || matcher != null;
}
@Override
public void addValue(BytesRef value, int hashCode) {
if (excluded != null && excluded.contains(value)) {
return;
}
UnicodeUtil.UTF8toUTF16(value, spare);
if (matcher != null) {
assert convert : "regexp: [convert == false] but should be true";
assert value.utf8ToString().equals(spare.toString()) : "not converted";
if (!matcher.reset(spare).matches()) {
return;
}
}
super.addValue(value, hashCode);
}
@Override
protected void onValue(int docId, BytesRef value, int hashCode, BytesValues values) {
if (excluded != null && excluded.contains(value)) {

View File

@ -0,0 +1,303 @@
package org.elasticsearch.test.integration.search.facet;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.RandomStringGenerator;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.facet.FacetBuilders;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacetBuilder;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.*;
import java.util.regex.Pattern;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
/**
*/
public class ExtendedFacetsTests extends AbstractNodesTests {
private Client client;
@BeforeClass
public void createNodes() throws Exception {
Settings settings = ImmutableSettings.settingsBuilder().put("index.number_of_shards", numberOfShards()).put("index.number_of_replicas", 0).build();
for (int i = 0; i < numberOfNodes(); i++) {
startNode("node" + i, settings);
}
client = getClient();
}
protected int numberOfShards() {
return 1;
}
protected int numberOfNodes() {
return 1;
}
protected int numDocs() {
return 2500;
}
@AfterClass
public void closeNodes() {
client.close();
closeAllNodes();
}
protected Client getClient() {
return client("node0");
}
@Test
public void testTermFacet_stringFields() throws Throwable {
client.admin().indices().prepareDelete().execute().actionGet();
client.admin().indices().prepareCreate("test")
.addMapping("type1", jsonBuilder().startObject()
.startObject("type1")
.startObject("properties")
.startObject("field1_concrete")
.field("type", "string")
.field("index", "not_analyzed")
.startObject("fielddata")
.field("format", "concrete_bytes")
.endObject()
.endObject()
.startObject("field1_paged")
.field("type", "string")
.field("index", "not_analyzed")
.startObject("fielddata")
.field("format", "paged_bytes")
.endObject()
.endObject()
.startObject("field1_fst")
.field("type", "string")
.field("index", "not_analyzed")
.startObject("fielddata")
.field("format", "fst")
.endObject()
.endObject()
.startObject("field2")
.field("type", "string")
.field("index", "not_analyzed")
.endObject()
.endObject()
.endObject().endObject()
)
.execute().actionGet();
long seed = System.currentTimeMillis(); // LuceneTestCase...
try {
Random random = new Random(seed);
int numOfValuesField1 = 200;
String[] field1Values = new String[numOfValuesField1];
for (int i = 0; i < numOfValuesField1; i++) {
field1Values[i] = RandomStringGenerator.random(10, 0, 0, true, true, null, random);
}
int numOfQueryValues = 50;
String[] queryValues = new String[numOfQueryValues];
for (int i = 0; i < numOfQueryValues; i++) {
queryValues[i] = RandomStringGenerator.random(5, 0, 0, true, true, null, random);
}
Map<String, Map<String, Integer>> controlDataSet = new HashMap<String, Map<String, Integer>>();
for (int i = 1; i <= numDocs(); i++) {
String field1Val = field1Values[random.nextInt(numOfValuesField1)];
String queryVal = queryValues[random.nextInt(numOfQueryValues)];
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource(jsonBuilder().startObject()
.field("field1_concrete", field1Val)
.field("field1_paged", field1Val)
.field("field1_fst", field1Val)
.field("field2", queryVal)
.endObject())
.execute().actionGet();
Map<String, Integer> controlField1Facets = controlDataSet.get(queryVal);
if (controlField1Facets == null) {
controlField1Facets = new HashMap<String, Integer>();
controlDataSet.put(queryVal, controlField1Facets);
}
Integer controlCount = controlField1Facets.get(field1Val);
if (controlCount == null) {
controlCount = 0;
}
controlField1Facets.put(field1Val, ++controlCount);
}
client.admin().indices().prepareRefresh().execute().actionGet();
String[] facetFields = new String[]{"field1_concrete", "field1_paged", "field1_fst"};
TermsFacet.ComparatorType[] compTypes = TermsFacet.ComparatorType.values();
for (String facetField : facetFields) {
for (String queryVal : controlDataSet.keySet()) {
TermsFacet.ComparatorType compType = compTypes[random.nextInt(compTypes.length)];
int size;
if (compType == TermsFacet.ComparatorType.COUNT || compType == TermsFacet.ComparatorType.REVERSE_COUNT) {
// Should always equal to number of unique values b/c of the top n terms problem in case sorting by facet count.
size = numOfValuesField1;
} else {
size = random.nextInt(numOfValuesField1);
}
Map<String, Integer> controlFacets = controlDataSet.get(queryVal);
TermsFacetBuilder termsFacetBuilder = FacetBuilders.termsFacet("facet1").field(facetField)
.order(compType).size(size);
if (random.nextBoolean()) {
termsFacetBuilder.executionHint("map");
}
List<String> excludes = new ArrayList<String>();
if (random.nextBoolean()) {
int numExludes = random.nextInt(5) + 1;
List<String> facetValues = new ArrayList<String>(controlFacets.keySet());
for (int i = 0; i < numExludes; i++) {
excludes.add(facetValues.get(random.nextInt(facetValues.size())));
}
termsFacetBuilder.exclude(excludes.toArray());
}
String regex = null;
if (random.nextBoolean()) {
List<String> facetValues = new ArrayList<String>(controlFacets.keySet());
regex = facetValues.get(random.nextInt(facetValues.size()));
regex = "^" + regex.substring(0, regex.length() / 2) + ".*";
termsFacetBuilder.regex(regex);
}
boolean allTerms = random.nextInt(10) == 3;
termsFacetBuilder.allTerms(allTerms);
SearchResponse response = client.prepareSearch("test")
.setQuery(QueryBuilders.termQuery("field2", queryVal))
.addFacet(termsFacetBuilder)
.execute().actionGet();
TermsFacet termsFacet = response.getFacets().facet("facet1");
List<Tuple<Text, Integer>> controlFacetEntries = getControlFacetEntries(field1Values, controlFacets, size, compType, excludes, regex, allTerms);
String reason = String.format("query: %s field: %s size: %d order: %s all_terms: %s regex: %s excludes: %s", queryVal, facetField, size, compType, allTerms, regex, excludes);
assertThat(reason, termsFacet.getEntries().size(), equalTo(controlFacetEntries.size()));
for (int i = 0; i < controlFacetEntries.size(); i++) {
assertThat(reason, termsFacet.getEntries().get(i).getTerm(), equalTo(controlFacetEntries.get(i).v1()));
assertThat(reason, termsFacet.getEntries().get(i).getCount(), equalTo(controlFacetEntries.get(i).v2()));
}
}
}
} catch (Throwable t) {
logger.error("Failed with seed:" + seed);
throw t;
}
}
private List<Tuple<Text, Integer>> getControlFacetEntries(String[] field1Values, Map<String, Integer> controlFacets, int size, TermsFacet.ComparatorType sort, List<String> excludes, String regex, boolean allTerms) {
Pattern pattern = null;
if (regex != null) {
pattern = Regex.compile(regex, null);
}
List<Tuple<Text, Integer>> entries = new ArrayList<Tuple<Text, Integer>>();
for (Map.Entry<String, Integer> e : controlFacets.entrySet()) {
if (excludes.contains(e.getKey())) {
continue;
}
if (pattern != null && !pattern.matcher(e.getKey()).matches()) {
continue;
}
entries.add(new Tuple<Text, Integer>(new StringText(e.getKey()), e.getValue()));
}
if (allTerms) {
for (String field1Value : field1Values) {
if (!controlFacets.containsKey(field1Value)) {
if (excludes.contains(field1Value)) {
continue;
}
if (pattern != null && !pattern.matcher(field1Value).matches()) {
continue;
}
entries.add(new Tuple<Text, Integer>(new StringText(field1Value), 0));
}
}
}
switch (sort) {
case COUNT:
Collections.sort(entries, count);
break;
case REVERSE_COUNT:
Collections.sort(entries, count_reverse);
break;
case TERM:
Collections.sort(entries, term);
break;
case REVERSE_TERM:
Collections.sort(entries, term_reverse);
break;
}
return size >= entries.size() ? entries : entries.subList(0, size);
}
private final static COUNT count = new COUNT();
private final static COUNT_REVERSE count_reverse = new COUNT_REVERSE();
private final static TERM term = new TERM();
private final static TERM_REVERSE term_reverse = new TERM_REVERSE();
private static class COUNT implements Comparator<Tuple<Text, Integer>> {
@Override
public int compare(Tuple<Text, Integer> o1, Tuple<Text, Integer> o2) {
int cmp = o2.v2() - o1.v2();
if (cmp != 0) {
return cmp;
}
cmp = o2.v1().compareTo(o1.v1());
if (cmp != 0) {
return cmp;
}
return System.identityHashCode(o2) - System.identityHashCode(o1);
}
}
private static class COUNT_REVERSE implements Comparator<Tuple<Text, Integer>> {
@Override
public int compare(Tuple<Text, Integer> o1, Tuple<Text, Integer> o2) {
return -count.compare(o1, o2);
}
}
private static class TERM implements Comparator<Tuple<Text, Integer>> {
@Override
public int compare(Tuple<Text, Integer> o1, Tuple<Text, Integer> o2) {
return o1.v1().compareTo(o2.v1());
}
}
private static class TERM_REVERSE implements Comparator<Tuple<Text, Integer>> {
@Override
public int compare(Tuple<Text, Integer> o1, Tuple<Text, Integer> o2) {
return -term.compare(o1, o2);
}
}
}

View File

@ -0,0 +1,21 @@
package org.elasticsearch.test.integration.search.facet;
/**
*/
public class ExtendedFacetsTestsMultiShardMultiNodeTests extends ExtendedFacetsTests {
@Override
protected int numberOfShards() {
return 8;
}
@Override
protected int numberOfNodes() {
return 4;
}
@Override
protected int numDocs() {
return 50000;
}
}