mirror of https://github.com/apache/lucene.git
SOLR-1387: Move contains() method to SimpleFacets
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1672106 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3b8ea7b2e7
commit
3c0ff1de91
|
@ -22,7 +22,6 @@ import java.math.BigInteger;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
|
@ -134,35 +133,7 @@ public abstract class StringHelper {
|
|||
public static boolean endsWith(BytesRef ref, BytesRef suffix) {
|
||||
return sliceEquals(ref, suffix, ref.length - suffix.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff the ref contains the given slice. Otherwise
|
||||
* <code>false</code>.
|
||||
*
|
||||
* @param ref
|
||||
* the {@link BytesRef} to test
|
||||
* @param slice
|
||||
* the slice to look for
|
||||
* @param ignoreCase
|
||||
* whether the comparison should be case-insensitive
|
||||
* @return Returns <code>true</code> iff the ref contains the given slice.
|
||||
* Otherwise <code>false</code>.
|
||||
*/
|
||||
public static boolean contains(BytesRef ref, BytesRef slice, boolean ignoreCase) {
|
||||
if (ignoreCase) {
|
||||
String s1 = ref.utf8ToString();
|
||||
String s2 = slice.utf8ToString();
|
||||
return s1.toLowerCase(Locale.ENGLISH).contains(s2.toLowerCase(Locale.ENGLISH));
|
||||
} else {
|
||||
for (int pos = 0; pos <= ref.length - slice.length; ++pos) {
|
||||
if (sliceEquals(ref, slice, pos)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
private static boolean sliceEquals(BytesRef sliceToTest, BytesRef other, int pos) {
|
||||
if (pos < 0 || sliceToTest.length - pos < other.length) {
|
||||
return false;
|
||||
|
|
|
@ -56,36 +56,6 @@ public class TestStringHelper extends LuceneTestCase {
|
|||
assertTrue(StringHelper.endsWith(ref, slice));
|
||||
}
|
||||
|
||||
public void testContainsAtStart() {
|
||||
BytesRef ref = new BytesRef("foobar");
|
||||
BytesRef slice = new BytesRef("foo");
|
||||
assertTrue(StringHelper.contains(ref, slice, false));
|
||||
}
|
||||
|
||||
public void testContains() {
|
||||
BytesRef ref = new BytesRef("foobar");
|
||||
BytesRef slice = new BytesRef("ooba");
|
||||
assertTrue(StringHelper.contains(ref, slice, false));
|
||||
}
|
||||
|
||||
public void testContainsAtEnd() {
|
||||
BytesRef ref = new BytesRef("foobar");
|
||||
BytesRef slice = new BytesRef("bar");
|
||||
assertTrue(StringHelper.contains(ref, slice, false));
|
||||
}
|
||||
|
||||
public void testContainsWhole() {
|
||||
BytesRef ref = new BytesRef("foobar");
|
||||
BytesRef slice = new BytesRef("foobar");
|
||||
assertTrue(StringHelper.contains(ref, slice, false));
|
||||
}
|
||||
|
||||
public void testContainsIgnoreCase() {
|
||||
BytesRef ref = new BytesRef("FooBar");
|
||||
BytesRef slice = new BytesRef("bar");
|
||||
assertTrue(StringHelper.contains(ref, slice, true));
|
||||
}
|
||||
|
||||
public void testMurmurHash3() throws Exception {
|
||||
// Hashes computed using murmur3_32 from https://code.google.com/p/pyfasthash
|
||||
assertEquals(0xf6a5c420, StringHelper.murmurhash3_x86_32(new BytesRef("foo"), 0));
|
||||
|
|
|
@ -17,12 +17,12 @@ package org.apache.lucene.demo.facet;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class TestSimpleFacetsExample extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
|
@ -54,4 +54,5 @@ public class TestSimpleFacetsExample extends LuceneTestCase {
|
|||
assertEquals("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", result.get(0).toString());
|
||||
assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", result.get(1).toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,11 +17,8 @@ package org.apache.solr.request;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
|
@ -34,7 +31,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -44,6 +40,9 @@ import org.apache.solr.search.DocSet;
|
|||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.LongPriorityQueue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Computes term facets for docvalues field (single or multivalued).
|
||||
* <p>
|
||||
|
@ -99,8 +98,6 @@ public class DocValuesFacets {
|
|||
prefixRef.copyChars(prefix);
|
||||
}
|
||||
|
||||
final BytesRef containsBR = contains != null ? new BytesRef(contains) : null;
|
||||
|
||||
int startTermIndex, endTermIndex;
|
||||
if (prefix!=null) {
|
||||
startTermIndex = (int) si.lookupTerm(prefixRef.get());
|
||||
|
@ -173,9 +170,9 @@ public class DocValuesFacets {
|
|||
int min=mincount-1; // the smallest value in the top 'N' values
|
||||
for (int i=(startTermIndex==-1)?1:0; i<nTerms; i++) {
|
||||
int c = counts[i];
|
||||
if (containsBR != null) {
|
||||
if (contains != null) {
|
||||
final BytesRef term = si.lookupOrd(startTermIndex+i);
|
||||
if (!StringHelper.contains(term, containsBR, ignoreCase)) {
|
||||
if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -212,7 +209,7 @@ public class DocValuesFacets {
|
|||
} else {
|
||||
// add results in index order
|
||||
int i=(startTermIndex==-1)?1:0;
|
||||
if (mincount<=0 && containsBR == null) {
|
||||
if (mincount<=0 && contains == null) {
|
||||
// if mincount<=0 and we're not examining the values for contains, then
|
||||
// we won't discard any terms and we know exactly where to start.
|
||||
i+=off;
|
||||
|
@ -223,9 +220,9 @@ public class DocValuesFacets {
|
|||
int c = counts[i];
|
||||
if (c<mincount) continue;
|
||||
BytesRef term = null;
|
||||
if (containsBR != null) {
|
||||
if (contains != null) {
|
||||
term = si.lookupOrd(startTermIndex+i);
|
||||
if (!StringHelper.contains(term, containsBR, ignoreCase)) {
|
||||
if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,12 +17,8 @@
|
|||
|
||||
package org.apache.solr.request;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
|
@ -32,7 +28,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
|
@ -42,6 +37,16 @@ import org.apache.solr.search.DocSet;
|
|||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CompletionService;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.ExecutorCompletionService;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
|
||||
class PerSegmentSingleValuedFaceting {
|
||||
|
||||
|
@ -55,7 +60,7 @@ class PerSegmentSingleValuedFaceting {
|
|||
boolean missing;
|
||||
String sort;
|
||||
String prefix;
|
||||
BytesRef containsBR;
|
||||
String contains;
|
||||
boolean ignoreCase;
|
||||
|
||||
Filter baseSet;
|
||||
|
@ -72,7 +77,7 @@ class PerSegmentSingleValuedFaceting {
|
|||
this.missing = missing;
|
||||
this.sort = sort;
|
||||
this.prefix = prefix;
|
||||
this.containsBR = contains != null ? new BytesRef(contains) : null;
|
||||
this.contains = contains;
|
||||
this.ignoreCase = ignoreCase;
|
||||
}
|
||||
|
||||
|
@ -180,7 +185,7 @@ class PerSegmentSingleValuedFaceting {
|
|||
SegFacet seg = queue.top();
|
||||
|
||||
// if facet.contains specified, only actually collect the count if substring contained
|
||||
boolean collect = containsBR == null || StringHelper.contains(seg.tempBR, containsBR, ignoreCase);
|
||||
boolean collect = contains == null || SimpleFacets.contains(seg.tempBR.utf8ToString(), contains, ignoreCase);
|
||||
|
||||
// we will normally end up advancing the term enum for this segment
|
||||
// while still using "val", so we need to make a copy since the BytesRef
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.solr.request;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -143,6 +144,25 @@ public class SimpleFacets {
|
|||
this.rb = rb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if a String contains the given substring. Otherwise
|
||||
* <code>false</code>.
|
||||
*
|
||||
* @param ref
|
||||
* the {@link String} to test
|
||||
* @param substring
|
||||
* the substring to look for
|
||||
* @param ignoreCase
|
||||
* whether the comparison should be case-insensitive
|
||||
* @return Returns <code>true</code> iff the String contains the given substring.
|
||||
* Otherwise <code>false</code>.
|
||||
*/
|
||||
public static boolean contains(String ref, String substring, boolean ignoreCase) {
|
||||
if (ignoreCase)
|
||||
return StringUtils.containsIgnoreCase(ref, substring);
|
||||
return StringUtils.contains(ref, substring);
|
||||
}
|
||||
|
||||
|
||||
protected void parseParams(String type, String param) throws SyntaxError, IOException {
|
||||
localParams = QueryParsing.getLocalParams(param, req.getParams());
|
||||
|
@ -494,7 +514,6 @@ public class SimpleFacets {
|
|||
}
|
||||
|
||||
BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null;
|
||||
BytesRef containsRef = contains != null ? new BytesRef(contains) : null;
|
||||
final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBytesRef, 128);
|
||||
|
||||
SchemaField sf = searcher.getSchema().getFieldOrNull(groupField);
|
||||
|
@ -526,7 +545,7 @@ public class SimpleFacets {
|
|||
= result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit);
|
||||
for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
|
||||
//:TODO:can we do contains earlier than this to make it more efficient?
|
||||
if (containsRef != null && !StringHelper.contains(facetEntry.getValue(), containsRef, ignoreCase)) {
|
||||
if (contains != null && !contains(facetEntry.getValue().utf8ToString(), contains, ignoreCase)) {
|
||||
continue;
|
||||
}
|
||||
facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef);
|
||||
|
@ -730,12 +749,6 @@ public class SimpleFacets {
|
|||
String indexedPrefix = ft.toInternal(prefix);
|
||||
prefixTermBytes = new BytesRef(indexedPrefix);
|
||||
}
|
||||
|
||||
BytesRef containsTermBytes = null;
|
||||
if (contains != null) {
|
||||
String indexedContains = ft.toInternal(contains);
|
||||
containsTermBytes = new BytesRef(indexedContains);
|
||||
}
|
||||
|
||||
Fields fields = r.fields();
|
||||
Terms terms = fields==null ? null : fields.terms(field);
|
||||
|
@ -769,7 +782,7 @@ public class SimpleFacets {
|
|||
if (prefixTermBytes != null && !StringHelper.startsWith(term, prefixTermBytes))
|
||||
break;
|
||||
|
||||
if (containsTermBytes == null || StringHelper.contains(term, containsTermBytes, ignoreCase)) {
|
||||
if (contains == null || contains(term.utf8ToString(), contains, ignoreCase)) {
|
||||
int df = termsEnum.docFreq();
|
||||
|
||||
// If we are sorting, we can use df>min (rather than >=) since we
|
||||
|
|
|
@ -2258,4 +2258,24 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||
400);
|
||||
}
|
||||
|
||||
public void testContainsAtStart() {
|
||||
assertTrue(SimpleFacets.contains("foobar", "foo", false));
|
||||
}
|
||||
|
||||
public void testContains() {
|
||||
assertTrue(SimpleFacets.contains("foobar", "ooba", false));
|
||||
}
|
||||
|
||||
public void testContainsAtEnd() {
|
||||
assertTrue(SimpleFacets.contains("foobar", "bar", false));
|
||||
}
|
||||
|
||||
public void testContainsWhole() {
|
||||
assertTrue(SimpleFacets.contains("foobar", "foobar", false));
|
||||
}
|
||||
|
||||
public void testContainsIgnoreCase() {
|
||||
assertTrue(SimpleFacets.contains("FooBar", "bar", true));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue