This commit is contained in:
Uwe Schindler 2016-12-29 22:57:39 +01:00
commit b584f9c56c
9 changed files with 352 additions and 57 deletions

View File

@ -53,6 +53,8 @@ public class TestDoubleValuesSource extends LuceneTestCase {
document.add(new NumericDocValuesField("long", random().nextLong())); document.add(new NumericDocValuesField("long", random().nextLong()));
document.add(new FloatDocValuesField("float", random().nextFloat())); document.add(new FloatDocValuesField("float", random().nextFloat()));
document.add(new DoubleDocValuesField("double", random().nextDouble())); document.add(new DoubleDocValuesField("double", random().nextDouble()));
if (i == 545)
document.add(new DoubleDocValuesField("onefield", 45.72));
iw.addDocument(document); iw.addDocument(document);
} }
reader = iw.getReader(); reader = iw.getReader();
@ -67,6 +69,13 @@ public class TestDoubleValuesSource extends LuceneTestCase {
super.tearDown(); super.tearDown();
} }
public void testSortMissing() throws Exception {
DoubleValuesSource onefield = DoubleValuesSource.fromDoubleField("onefield");
TopDocs results = searcher.search(new MatchAllDocsQuery(), 1, new Sort(onefield.getSortField(true)));
FieldDoc first = (FieldDoc) results.scoreDocs[0];
assertEquals(45.72, first.fields[0]);
}
public void testSimpleFieldEquivalences() throws Exception { public void testSimpleFieldEquivalences() throws Exception {
checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean()))); checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean())));
checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean()))); checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean())));

View File

@ -49,6 +49,8 @@ public class TestLongValuesSource extends LuceneTestCase {
document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
document.add(new NumericDocValuesField("int", random().nextInt())); document.add(new NumericDocValuesField("int", random().nextInt()));
document.add(new NumericDocValuesField("long", random().nextLong())); document.add(new NumericDocValuesField("long", random().nextLong()));
if (i == 545)
document.add(new NumericDocValuesField("onefield", 45));
iw.addDocument(document); iw.addDocument(document);
} }
reader = iw.getReader(); reader = iw.getReader();
@ -63,6 +65,13 @@ public class TestLongValuesSource extends LuceneTestCase {
super.tearDown(); super.tearDown();
} }
public void testSortMissing() throws Exception {
LongValuesSource onefield = LongValuesSource.fromLongField("onefield");
TopDocs results = searcher.search(new MatchAllDocsQuery(), 1, new Sort(onefield.getSortField(true)));
FieldDoc first = (FieldDoc) results.scoreDocs[0];
assertEquals(45L, first.fields[0]);
}
public void testSimpleFieldEquivalences() throws Exception { public void testSimpleFieldEquivalences() throws Exception {
checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean()))); checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("int", SortField.Type.INT, random().nextBoolean())));
checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean()))); checkSorts(new MatchAllDocsQuery(), new Sort(new SortField("long", SortField.Type.LONG, random().nextBoolean())));

View File

@ -136,6 +136,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
/** {@link IndexSearcher} used for lookups. */ /** {@link IndexSearcher} used for lookups. */
protected SearcherManager searcherMgr; protected SearcherManager searcherMgr;
/** Used to manage concurrent access to searcherMgr */
protected final Object searcherMgrLock = new Object();
/** Default minimum number of leading characters before /** Default minimum number of leading characters before
* PrefixQuery is used (4). */ * PrefixQuery is used (4). */
@ -275,53 +278,55 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
@Override @Override
public void build(InputIterator iter) throws IOException { public void build(InputIterator iter) throws IOException {
if (searcherMgr != null) { synchronized (searcherMgrLock) {
searcherMgr.close(); if (searcherMgr != null) {
searcherMgr = null; searcherMgr.close();
} searcherMgr = null;
if (writer != null) {
writer.close();
writer = null;
}
boolean success = false;
try {
// First pass: build a temporary normal Lucene index,
// just indexing the suggestions as they iterate:
writer = new IndexWriter(dir,
getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
//long t0 = System.nanoTime();
// TODO: use threads?
BytesRef text;
while ((text = iter.next()) != null) {
BytesRef payload;
if (iter.hasPayloads()) {
payload = iter.payload();
} else {
payload = null;
}
add(text, iter.contexts(), iter.weight(), payload);
} }
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); if (writer != null) {
if (commitOnBuild || closeIndexWriterOnBuild) { writer.close();
commit(); writer = null;
} }
searcherMgr = new SearcherManager(writer, null);
success = true; boolean success = false;
} finally { try {
if (success) { // First pass: build a temporary normal Lucene index,
if (closeIndexWriterOnBuild) { // just indexing the suggestions as they iterate:
writer.close(); writer = new IndexWriter(dir,
writer = null; getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
//long t0 = System.nanoTime();
// TODO: use threads?
BytesRef text;
while ((text = iter.next()) != null) {
BytesRef payload;
if (iter.hasPayloads()) {
payload = iter.payload();
} else {
payload = null;
}
add(text, iter.contexts(), iter.weight(), payload);
} }
} else { // failure
if (writer != null) { //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
writer.rollback(); if (commitOnBuild || closeIndexWriterOnBuild) {
writer = null; commit();
}
searcherMgr = new SearcherManager(writer, null);
success = true;
} finally {
if (success) {
if (closeIndexWriterOnBuild) {
writer.close();
writer = null;
}
} else { // failure
if (writer != null) {
writer.rollback();
writer = null;
}
} }
} }
} }
@ -369,10 +374,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} else { } else {
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE)); writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
} }
SearcherManager oldSearcherMgr = searcherMgr; synchronized (searcherMgrLock) {
searcherMgr = new SearcherManager(writer, null); SearcherManager oldSearcherMgr = searcherMgr;
if (oldSearcherMgr != null) { searcherMgr = new SearcherManager(writer, null);
oldSearcherMgr.close(); if (oldSearcherMgr != null) {
oldSearcherMgr.close();
}
} }
} }
} }
@ -642,7 +649,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// only retrieve the first num hits now: // only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
List<LookupResult> results = null; List<LookupResult> results = null;
IndexSearcher searcher = searcherMgr.acquire(); SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
searcher = mgr.acquire();
}
try { try {
//System.out.println("got searcher=" + searcher); //System.out.println("got searcher=" + searcher);
searcher.search(finalQuery, c2); searcher.search(finalQuery, c2);
@ -653,7 +665,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// hits = searcher.search(query, null, num, SORT); // hits = searcher.search(query, null, num, SORT);
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
} finally { } finally {
searcherMgr.release(searcher); mgr.release(searcher);
} }
//System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
@ -853,7 +865,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
long mem = RamUsageEstimator.shallowSizeOf(this); long mem = RamUsageEstimator.shallowSizeOf(this);
try { try {
if (searcherMgr != null) { if (searcherMgr != null) {
IndexSearcher searcher = searcherMgr.acquire(); SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
searcher = mgr.acquire();
}
try { try {
for (LeafReaderContext context : searcher.getIndexReader().leaves()) { for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
LeafReader reader = FilterLeafReader.unwrap(context.reader()); LeafReader reader = FilterLeafReader.unwrap(context.reader());
@ -862,7 +879,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} }
} }
} finally { } finally {
searcherMgr.release(searcher); mgr.release(searcher);
} }
} }
return mem; return mem;
@ -876,7 +893,12 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
List<Accountable> resources = new ArrayList<>(); List<Accountable> resources = new ArrayList<>();
try { try {
if (searcherMgr != null) { if (searcherMgr != null) {
IndexSearcher searcher = searcherMgr.acquire(); SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
searcher = mgr.acquire();
}
try { try {
for (LeafReaderContext context : searcher.getIndexReader().leaves()) { for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
LeafReader reader = FilterLeafReader.unwrap(context.reader()); LeafReader reader = FilterLeafReader.unwrap(context.reader());
@ -885,7 +907,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} }
} }
} finally { } finally {
searcherMgr.release(searcher); mgr.release(searcher);
} }
} }
return Collections.unmodifiableList(resources); return Collections.unmodifiableList(resources);
@ -899,11 +921,16 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (searcherMgr == null) { if (searcherMgr == null) {
return 0; return 0;
} }
IndexSearcher searcher = searcherMgr.acquire(); SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
searcher = mgr.acquire();
}
try { try {
return searcher.getIndexReader().numDocs(); return searcher.getIndexReader().numDocs();
} finally { } finally {
searcherMgr.release(searcher); mgr.release(searcher);
} }
} }
}; }

View File

@ -202,6 +202,8 @@ New Features
* SOLR-9897: Add hl.requireFieldMatch toggle support when using the UnifiedHighlighter. Defaults to false like the * SOLR-9897: Add hl.requireFieldMatch toggle support when using the UnifiedHighlighter. Defaults to false like the
other highlighters that support this. (David Smiley) other highlighters that support this. (David Smiley)
* SOLR-9905: Add NullStream to isolate the performance of the ExportWriter (Joel Bernstein)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
@ -296,6 +298,8 @@ Bug Fixes
* SOLR-9901: Implement move in HdfsDirectoryFactory. (Mark Miller) * SOLR-9901: Implement move in HdfsDirectoryFactory. (Mark Miller)
* SOLR-9900: fix false positives on range queries with ReversedWildcardFilterFactory (Yonik Seeley via Mikhail Khludnev)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -139,7 +139,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("classify", ClassifyStream.class) .withFunctionName("classify", ClassifyStream.class)
.withFunctionName("fetch", FetchStream.class) .withFunctionName("fetch", FetchStream.class)
.withFunctionName("executor", ExecutorStream.class) .withFunctionName("executor", ExecutorStream.class)
.withFunctionName("null", NullStream.class)
// metrics // metrics
.withFunctionName("min", MinMetric.class) .withFunctionName("min", MinMetric.class)
.withFunctionName("max", MaxMetric.class) .withFunctionName("max", MaxMetric.class)

View File

@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.AutomatonQuery;
@ -894,6 +895,19 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError { protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
checkNullField(field); checkNullField(field);
SchemaField sf = schema.getField(field); SchemaField sf = schema.getField(field);
if (part1 == null) {
ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType());
if (factory != null) {
// There will be reversed tokens starting with u0001 that we want to exclude, so
// lets start at u0002 inclusive instead.
char[] buf = new char[1];
buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1;
part1 = new String(buf);
startInclusive = true;
}
}
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive); return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
} }

View File

@ -182,6 +182,18 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
assertQ("false positive", assertQ("false positive",
req("+id:1 +one:*zemog*"), req("+id:1 +one:*zemog*"),
"//result[@numFound=0]"); "//result[@numFound=0]");
assertQ("no reverse, no false positive",
req("q", "+id:1 +three:[* TO a]",
"debugQuery", "true"),
"//result[@numFound=0]");
{
String reverseField = random().nextBoolean() ? "one":"two";
assertQ("false positive",
req("q", "+id:1 +"+reverseField+":[* TO a]",
"debugQuery", "true"),
"//result[@numFound=0]");
}
assertQ("false positive", assertQ("false positive",
req("+id:1 +two:*zemog*"), req("+id:1 +two:*zemog*"),
"//result[@numFound=0]"); "//result[@numFound=0]");

View File

@ -0,0 +1,155 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.stream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Date;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.StreamComparator;
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
/**
* The NullStream Iterates over a TupleStream and eats the tuples. It returns the tuple count in the EOF tuple.
* Because the NullStreaam eats all the Tuples it see's it can be used as a simple tool for performance analysis of
* underlying streams.
**/
public class NullStream extends TupleStream implements Expressible {
private static final long serialVersionUID = 1;
private TupleStream stream;
private long count;
private long start;
private Tuple eof;
public NullStream(TupleStream tupleStream) throws IOException {
init(tupleStream);
}
public NullStream(StreamExpression expression, StreamFactory factory) throws IOException {
// grab all parameters out
List<StreamExpression> streamExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, TupleStream.class);
TupleStream stream = factory.constructStream(streamExpressions.get(0));
init(stream);
}
private void init(TupleStream tupleStream) throws IOException{
this.stream = tupleStream;
}
@Override
public StreamExpression toExpression(StreamFactory factory) throws IOException{
return toExpression(factory, true);
}
private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException {
// function name
StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass()));
if(includeStreams){
// stream
if(stream instanceof Expressible){
expression.addParameter(((Expressible)stream).toExpression(factory));
}
else{
throw new IOException("This RankStream contains a non-expressible TupleStream - it cannot be converted to an expression");
}
}
else{
expression.addParameter("<stream>");
}
return expression;
}
@Override
public Explanation toExplanation(StreamFactory factory) throws IOException {
return new StreamExplanation(getStreamNodeId().toString())
.withChildren(new Explanation[]{
stream.toExplanation(factory)
})
.withFunctionName(factory.getFunctionName(this.getClass()))
.withImplementingClass(this.getClass().getName())
.withExpressionType(ExpressionType.STREAM_DECORATOR)
.withExpression(toExpression(factory, false).toString());
}
public void setStreamContext(StreamContext context) {
this.stream.setStreamContext(context);
}
public List<TupleStream> children() {
List<TupleStream> l = new ArrayList<TupleStream>();
l.add(stream);
return l;
}
public void open() throws IOException {
start = new Date().getTime();
count = 0;
stream.open();
}
public void close() throws IOException {
stream.close();
}
public Tuple read() throws IOException {
if(eof != null) {
return eof;
}
while(true) {
Tuple tuple = stream.read();
if(tuple.EOF) {
eof = tuple;
long end = new Date().getTime();
Tuple t = new Tuple(new HashMap());
t.put("nullCount", count);
t.put("timer", end-start);
return t;
} else {
++count;
}
}
}
/** Return the stream sort - ie, the order in which records are returned */
public StreamComparator getStreamSort(){
return stream.getStreamSort();
}
public int getCost() {
return 0;
}
}

View File

@ -373,6 +373,71 @@ public class StreamExpressionTest extends SolrCloudTestCase {
} }
@Test
public void testNullStream() throws Exception {
new UpdateRequest()
.add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0")
.add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0")
.add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3")
.add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4")
.add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1")
.add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2")
.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
StreamFactory factory = new StreamFactory()
.withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress())
.withFunctionName("search", CloudSolrStream.class)
.withFunctionName("null", NullStream.class);
// Basic test
stream = factory.constructStream("null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc\")");
tuples = getTuples(stream);
assertTrue(tuples.size() == 1);
assertTrue(tuples.get(0).getLong("nullCount") == 6);
}
@Test
public void testParallelNullStream() throws Exception {
new UpdateRequest()
.add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0")
.add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0")
.add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3")
.add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4")
.add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1")
.add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2")
.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
StreamFactory factory = new StreamFactory()
.withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress())
.withFunctionName("search", CloudSolrStream.class)
.withFunctionName("null", NullStream.class)
.withFunctionName("parallel", ParallelStream.class);
// Basic test
stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"nullCount desc\", null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), by=\"a_i asc\"))");
tuples = getTuples(stream);
assertTrue(tuples.size() == 2);
long nullCount = 0;
for(Tuple t : tuples) {
nullCount += t.getLong("nullCount");
}
assertEquals(nullCount, 6L);
}
@Test @Test
public void testNulls() throws Exception { public void testNulls() throws Exception {