LUCENE-5666: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5666@1595228 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-16 15:04:20 +00:00
commit 3c78c32c37
33 changed files with 2258 additions and 71 deletions

View File

@ -200,10 +200,19 @@ Bug fixes
* LUCENE-5668: Fix off-by-one in TieredMergePolicy (Mike McCandless)
* LUCENE-5673: MMapDirectory: Work around a "bug" in the JDK that throws
a confusing OutOfMemoryError wrapped inside IOException if the FileChannel
mapping failed because of lack of virtual address space. The IOException is
rethrown with more useful information about the problem, omitting the
incorrect OutOfMemoryError. (Robert Muir, Uwe Schindler)
Test Framework
* LUCENE-5622: Fail tests if they print over the given limit of bytes to
System.out or System.err. (Robert Muir, Dawid Weiss)
* LUCENE-5619: Added backwards compatibility tests to ensure we can update existing
indexes with doc-values updates. (Shai Erera, Robert Muir)
======================= Lucene 4.8.0 =======================

View File

@ -28,10 +28,10 @@ import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
import com.ibm.icu.lang.UScript;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
public class TestICUTokenizer extends BaseTokenStreamTestCase {
@ -270,4 +270,43 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
ts.end();
}
}
/** test for bugs like http://bugs.icu-project.org/trac/ticket/10767 */
public void testICUConcurrency() throws Exception {
int numThreads = 8;
final CountDownLatch startingGun = new CountDownLatch(1);
Thread threads[] = new Thread[numThreads];
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
long tokenCount = 0;
final String contents = "英 เบียร์ ビール ເບຍ abc";
for (int i = 0; i < 1000; i++) {
try (Tokenizer tokenizer = new ICUTokenizer()) {
tokenizer.setReader(new StringReader(contents));
tokenizer.reset();
while (tokenizer.incrementToken()) {
tokenCount++;
}
tokenizer.end();
}
}
if (VERBOSE) {
System.out.println(tokenCount);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (int i = 0; i < threads.length; i++) {
threads[i].join();
}
}
}

View File

@ -56,17 +56,14 @@ public class StreamUtils {
try {
return csfType==null ? in : new CompressorStreamFactory().createCompressorInputStream(csfType, in);
} catch (CompressorException e) {
IOException ioe = new IOException(e.getMessage());
ioe.initCause(e);
throw ioe; }
throw new IOException(e.getMessage(), e);
}
}
private OutputStream outputStream(OutputStream os) throws IOException {
try {
return csfType==null ? os : new CompressorStreamFactory().createCompressorOutputStream(csfType, os);
} catch (CompressorException e) {
IOException ioe = new IOException(e.getMessage());
ioe.initCause(e);
throw ioe;
throw new IOException(e.getMessage(), e);
}
}
}

View File

@ -168,9 +168,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
try {
bd = (BigDecimal) decoder.parse(scratch.utf8ToString());
} catch (ParseException pe) {
CorruptIndexException e = new CorruptIndexException("failed to parse BigDecimal value (resource=" + in + ")");
e.initCause(pe);
throw e;
throw new CorruptIndexException("failed to parse BigDecimal value (resource=" + in + ")", pe);
}
SimpleTextUtil.readLine(in, scratch); // read the line telling us if its real or not
return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
@ -231,9 +229,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
try {
len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
} catch (ParseException pe) {
CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
e.initCause(pe);
throw e;
throw new CorruptIndexException("failed to parse int length (resource=" + in + ")", pe);
}
result.bytes = new byte[len];
result.offset = 0;
@ -263,9 +259,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
try {
len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
} catch (ParseException pe) {
CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
e.initCause(pe);
throw e;
throw new CorruptIndexException("failed to parse int length (resource=" + in + ")", pe);
}
// skip past bytes
byte bytes[] = new byte[len];
@ -310,9 +304,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
try {
return (int) ordDecoder.parse(scratch.utf8ToString()).longValue()-1;
} catch (ParseException pe) {
CorruptIndexException e = new CorruptIndexException("failed to parse ord (resource=" + in + ")");
e.initCause(pe);
throw e;
throw new CorruptIndexException("failed to parse ord (resource=" + in + ")", pe);
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
@ -332,9 +324,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
try {
len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
} catch (ParseException pe) {
CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
e.initCause(pe);
throw e;
throw new CorruptIndexException("failed to parse int length (resource=" + in + ")", pe);
}
result.bytes = new byte[len];
result.offset = 0;
@ -410,9 +400,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
try {
len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
} catch (ParseException pe) {
CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
e.initCause(pe);
throw e;
throw new CorruptIndexException("failed to parse int length (resource=" + in + ")", pe);
}
result.bytes = new byte[len];
result.offset = 0;

View File

@ -24,8 +24,13 @@ import java.io.IOException;
* an inconsistency in the index.
*/
public class CorruptIndexException extends IOException {
/** Sole constructor. */
/** Create exception with a message only */
public CorruptIndexException(String message) {
super(message);
}
/** Create exception with message and root cause. */
public CorruptIndexException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -1708,11 +1708,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
for(int i=0;i<size;i++) {
final MergePolicy.OneMerge merge = mergeExceptions.get(i);
if (merge.maxNumSegments != -1) {
IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
final Throwable t = merge.getException();
if (t != null)
err.initCause(t);
throw err;
throw new IOException("background merge hit exception: " + merge.segString(directory), merge.getException());
}
}
}
@ -1808,12 +1804,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
if (pendingMerges.contains(merge) || runningMerges.contains(merge)) {
running = true;
}
Throwable t = merge.getException();
if (t != null) {
IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
ioe.initCause(t);
throw ioe;
}
throw new IOException("background merge hit exception: " + merge.segString(directory), merge.getException());
}
// If any of our merges are still running, wait:

View File

@ -38,8 +38,7 @@ public final class TwoPhaseCommitTool {
/** Sole constructor. */
public PrepareCommitFailException(Throwable cause, TwoPhaseCommit obj) {
super("prepareCommit() failed on " + obj);
initCause(cause);
super("prepareCommit() failed on " + obj, cause);
}
}
@ -51,8 +50,7 @@ public final class TwoPhaseCommitTool {
/** Sole constructor. */
public CommitFailException(Throwable cause, TwoPhaseCommit obj) {
super("commit() failed on " + obj);
initCause(cause);
super("commit() failed on " + obj, cause);
}
}

View File

@ -86,11 +86,7 @@ public abstract class Lock implements Closeable {
if (failureReason != null) {
reason += ": " + failureReason;
}
LockObtainFailedException e = new LockObtainFailedException(reason);
if (failureReason != null) {
e.initCause(failureReason);
}
throw e;
throw new LockObtainFailedException(reason, failureReason);
}
try {
Thread.sleep(LOCK_POLL_INTERVAL);

View File

@ -30,4 +30,8 @@ public class LockObtainFailedException extends IOException {
public LockObtainFailedException(String message) {
super(message);
}
public LockObtainFailedException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -28,6 +28,7 @@ import java.nio.file.StandardOpenOption;
import java.security.AccessController;
import java.security.PrivilegedExceptionAction;
import java.security.PrivilegedActionException;
import java.util.Locale;
import java.lang.reflect.Method;
import org.apache.lucene.util.Constants;
@ -75,6 +76,7 @@ import org.apache.lucene.util.Constants;
* blocked on IO. The channel will remain closed and subsequent access
* to {@link MMapDirectory} will throw a {@link ClosedChannelException}.
* </p>
* @see <a href="http://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html">Blog post about MMapDirectory</a>
*/
public class MMapDirectory extends FSDirectory {
private boolean useUnmapHack = UNMAP_SUPPORTED;
@ -216,7 +218,7 @@ public class MMapDirectory extends FSDirectory {
private final boolean useUnmapHack;
MMapIndexInput(String resourceDescription, FileChannel fc) throws IOException {
super(resourceDescription, map(fc, 0, fc.size()), fc.size(), chunkSizePower, getUseUnmap());
super(resourceDescription, map(resourceDescription, fc, 0, fc.size()), fc.size(), chunkSizePower, getUseUnmap());
this.useUnmapHack = getUseUnmap();
}
@ -244,18 +246,16 @@ public class MMapDirectory extends FSDirectory {
}
});
} catch (PrivilegedActionException e) {
final IOException ioe = new IOException("unable to unmap the mapped buffer");
ioe.initCause(e.getCause());
throw ioe;
throw new IOException("Unable to unmap the mapped buffer: " + toString(), e.getCause());
}
}
}
}
/** Maps a file into a set of buffers */
ByteBuffer[] map(FileChannel fc, long offset, long length) throws IOException {
final ByteBuffer[] map(String resourceDescription, FileChannel fc, long offset, long length) throws IOException {
if ((length >>> chunkSizePower) >= Integer.MAX_VALUE)
throw new IllegalArgumentException("RandomAccessFile too big for chunk size: " + fc.toString());
throw new IllegalArgumentException("RandomAccessFile too big for chunk size: " + resourceDescription);
final long chunkSize = 1L << chunkSizePower;
@ -270,10 +270,45 @@ public class MMapDirectory extends FSDirectory {
? chunkSize
: (length - bufferStart)
);
buffers[bufNr] = fc.map(MapMode.READ_ONLY, offset + bufferStart, bufSize);
try {
buffers[bufNr] = fc.map(MapMode.READ_ONLY, offset + bufferStart, bufSize);
} catch (IOException ioe) {
throw convertMapFailedIOException(ioe, resourceDescription, bufSize);
}
bufferStart += bufSize;
}
return buffers;
}
private IOException convertMapFailedIOException(IOException ioe, String resourceDescription, int bufSize) {
final String originalMessage;
final Throwable originalCause;
if (ioe.getCause() instanceof OutOfMemoryError) {
// nested OOM confuses users, because its "incorrect", just print a plain message:
originalMessage = "Map failed";
originalCause = null;
} else {
originalMessage = ioe.getMessage();
originalCause = ioe.getCause();
}
final String moreInfo;
if (!Constants.JRE_IS_64BIT) {
moreInfo = "MMapDirectory should only be used on 64bit platforms, because the address space on 32bit operating systems is too small. ";
} else if (Constants.WINDOWS) {
moreInfo = "Windows is unfortunately very limited on virtual address space. If your index size is several hundred Gigabytes, consider changing to Linux. ";
} else if (Constants.LINUX) {
moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'), and 'sysctl vm.max_map_count'. ";
} else {
moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'). ";
}
final IOException newIoe = new IOException(String.format(Locale.ENGLISH,
"%s: %s [this may be caused by lack of enough unfragmented virtual address space "+
"or too restrictive virtual memory limits enforced by the operating system, "+
"preventing us to map a chunk of %d bytes. %sMore information: "+
"http://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html]",
originalMessage, resourceDescription, bufSize, moreInfo), originalCause);
newIoe.setStackTrace(ioe.getStackTrace());
return newIoe;
}
}

View File

@ -162,6 +162,64 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
*/
private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value) throws IOException {
writer.updateNumericDocValue(new Term("id", id), f, value);
writer.updateNumericDocValue(new Term("id", id), cf, value*2);
}
private void updateBinary(IndexWriter writer, String id, String f, String cf, long value) throws IOException {
writer.updateBinaryDocValue(new Term("id", id), f, TestBinaryDocValuesUpdates.toBytes(value));
writer.updateBinaryDocValue(new Term("id", id), cf, TestBinaryDocValuesUpdates.toBytes(value*2));
}
/* // Creates an index with DocValues updates
public void testCreateIndexWithDocValuesUpdates() throws Exception {
// we use a real directory name that is not cleaned up,
// because this method is only used to create backwards
// indexes:
File indexDir = new File("/tmp/idx/dvupdates");
TestUtil.rm(indexDir);
Directory dir = newFSDirectory(indexDir);
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setUseCompoundFile(false).setMergePolicy(NoMergePolicy.INSTANCE);
IndexWriter writer = new IndexWriter(dir, conf);
// create an index w/ few doc-values fields, some with updates and some without
for (int i = 0; i < 30; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + i, Store.NO));
doc.add(new NumericDocValuesField("ndv1", i));
doc.add(new NumericDocValuesField("ndv1_c", i*2));
doc.add(new NumericDocValuesField("ndv2", i*3));
doc.add(new NumericDocValuesField("ndv2_c", i*6));
doc.add(new BinaryDocValuesField("bdv1", TestBinaryDocValuesUpdates.toBytes(i)));
doc.add(new BinaryDocValuesField("bdv1_c", TestBinaryDocValuesUpdates.toBytes(i*2)));
doc.add(new BinaryDocValuesField("bdv2", TestBinaryDocValuesUpdates.toBytes(i*3)));
doc.add(new BinaryDocValuesField("bdv2_c", TestBinaryDocValuesUpdates.toBytes(i*6)));
writer.addDocument(doc);
if ((i+1) % 10 == 0) {
writer.commit(); // flush every 10 docs
}
}
// first segment: no updates
// second segment: update two fields, same gen
updateNumeric(writer, "10", "ndv1", "ndv1_c", 100L);
updateBinary(writer, "11", "bdv1", "bdv1_c", 100L);
writer.commit();
// third segment: update few fields, different gens, few docs
updateNumeric(writer, "20", "ndv1", "ndv1_c", 100L);
updateBinary(writer, "21", "bdv1", "bdv1_c", 100L);
writer.commit();
updateNumeric(writer, "22", "ndv1", "ndv1_c", 200L); // update the field again
writer.commit();
writer.shutdown();
dir.close();
}*/
final static String[] oldNames = {"40.cfs",
"40.nocfs",
"41.cfs",
@ -983,4 +1041,62 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
TestUtil.checkIndex(dir);
dir.close();
}
public static final String dvUpdatesIndex = "dvupdates.48.zip";
private void assertNumericDocValues(AtomicReader r, String f, String cf) throws IOException {
NumericDocValues ndvf = r.getNumericDocValues(f);
NumericDocValues ndvcf = r.getNumericDocValues(cf);
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(ndvcf.get(i), ndvf.get(i)*2);
}
}
private void assertBinaryDocValues(AtomicReader r, String f, String cf) throws IOException {
BinaryDocValues bdvf = r.getBinaryDocValues(f);
BinaryDocValues bdvcf = r.getBinaryDocValues(cf);
BytesRef scratch = new BytesRef();
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(TestBinaryDocValuesUpdates.getValue(bdvcf, i, scratch ), TestBinaryDocValuesUpdates.getValue(bdvf, i, scratch)*2);
}
}
private void verifyDocValues(Directory dir) throws IOException {
DirectoryReader reader = DirectoryReader.open(dir);
for (AtomicReaderContext context : reader.leaves()) {
AtomicReader r = context.reader();
assertNumericDocValues(r, "ndv1", "ndv1_c");
assertNumericDocValues(r, "ndv2", "ndv2_c");
assertBinaryDocValues(r, "bdv1", "bdv1_c");
assertBinaryDocValues(r, "bdv2", "bdv2_c");
}
reader.close();
}
public void testDocValuesUpdates() throws Exception {
File oldIndexDir = createTempDir("dvupdates");
TestUtil.unzip(getDataFile(dvUpdatesIndex), oldIndexDir);
Directory dir = newFSDirectory(oldIndexDir);
verifyDocValues(dir);
// update fields and verify index
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(dir, conf);
updateNumeric(writer, "1", "ndv1", "ndv1_c", 300L);
updateNumeric(writer, "1", "ndv2", "ndv2_c", 300L);
updateBinary(writer, "1", "bdv1", "bdv1_c", 300L);
updateBinary(writer, "1", "bdv2", "bdv2_c", 300L);
writer.commit();
verifyDocValues(dir);
// merge all segments
writer.forceMerge(1);
writer.commit();
verifyDocValues(dir);
writer.shutdown();
dir.close();
}
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@ -884,4 +885,155 @@ public class TestTermsEnum extends LuceneTestCase {
r.close();
dir.close();
}
// LUCENE-5667
public void testCommonPrefixTerms() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Set<String> terms = new HashSet<String>();
//String prefix = TestUtil.randomSimpleString(random(), 1, 20);
String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
int numTerms = atLeast(1000);
if (VERBOSE) {
System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix);
}
while (terms.size() < numTerms) {
//terms.add(prefix + TestUtil.randomSimpleString(random(), 1, 20));
terms.add(prefix + TestUtil.randomRealisticUnicodeString(random(), 1, 20));
}
for(String term : terms) {
Document doc = new Document();
doc.add(newStringField("id", term, Field.Store.YES));
w.addDocument(doc);
}
IndexReader r = w.getReader();
if (VERBOSE) {
System.out.println("\nTEST: reader=" + r);
}
TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator(null);
DocsEnum docsEnum = null;
PerThreadPKLookup pkLookup = new PerThreadPKLookup(r, "id");
int iters = atLeast(numTerms*3);
List<String> termsList = new ArrayList<>(terms);
for(int iter=0;iter<iters;iter++) {
String term;
boolean shouldExist;
if (random().nextBoolean()) {
term = termsList.get(random().nextInt(terms.size()));
shouldExist = true;
} else {
term = prefix + TestUtil.randomSimpleString(random(), 1, 20);
shouldExist = terms.contains(term);
}
if (VERBOSE) {
System.out.println("\nTEST: try term=" + term);
System.out.println(" shouldExist?=" + shouldExist);
}
BytesRef termBytesRef = new BytesRef(term);
boolean actualResult = termsEnum.seekExact(termBytesRef);
assertEquals(shouldExist, actualResult);
if (shouldExist) {
docsEnum = termsEnum.docs(null, docsEnum, 0);
int docID = docsEnum.nextDoc();
assertTrue(docID != DocsEnum.NO_MORE_DOCS);
assertEquals(docID, pkLookup.lookup(termBytesRef));
StoredDocument doc = r.document(docID);
assertEquals(term, doc.get("id"));
if (random().nextInt(7) == 1) {
termsEnum.next();
}
} else {
assertEquals(-1, pkLookup.lookup(termBytesRef));
}
if (random().nextInt(7) == 1) {
TermsEnum.SeekStatus status = termsEnum.seekCeil(termBytesRef);
if (shouldExist) {
assertEquals(TermsEnum.SeekStatus.FOUND, status);
} else {
assertNotSame(TermsEnum.SeekStatus.FOUND, status);
}
}
}
r.close();
w.close();
d.close();
}
/** Utility class to do efficient primary-key (only 1 doc contains the
* given term) lookups by segment, re-using the enums. This class is
* not thread safe, so it is the caller's job to create and use one
* instance of this per thread. Do not use this if a term may appear
* in more than one document! It will only return the first one it
* finds. */
static class PerThreadPKLookup {
private final TermsEnum[] termsEnums;
private final DocsEnum[] docsEnums;
private final Bits[] liveDocs;
private final int[] docBases;
private final int numSegs;
private final boolean hasDeletions;
public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {
List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
// Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
@Override
public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
return c2.reader().numDocs() - c1.reader().numDocs();
}
});
termsEnums = new TermsEnum[leaves.size()];
docsEnums = new DocsEnum[leaves.size()];
liveDocs = new Bits[leaves.size()];
docBases = new int[leaves.size()];
int numSegs = 0;
boolean hasDeletions = false;
for(int i=0;i<leaves.size();i++) {
Fields fields = leaves.get(i).reader().fields();
if (fields != null) {
Terms terms = fields.terms(idFieldName);
if (terms != null) {
termsEnums[numSegs] = terms.iterator(null);
assert termsEnums[numSegs] != null;
docBases[numSegs] = leaves.get(i).docBase;
liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
hasDeletions |= leaves.get(i).reader().hasDeletions();
numSegs++;
}
}
}
this.numSegs = numSegs;
this.hasDeletions = hasDeletions;
}
/** Returns docID if found, else -1. */
public int lookup(BytesRef id) throws IOException {
for(int seg=0;seg<numSegs;seg++) {
if (termsEnums[seg].seekExact(id)) {
docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
int docID = docsEnums[seg].nextDoc();
if (docID != DocsEnum.NO_MORE_DOCS) {
return docBases[seg] + docID;
}
assert hasDeletions;
}
}
return -1;
}
// TODO: add reopen method to carry over re-used enums...?
}
}

View File

@ -36,7 +36,7 @@ com.google.inject.guice.version = 3.0
/com.googlecode.concurrentlinkedhashmap/concurrentlinkedhashmap-lru = 1.2
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
/com.googlecode.mp4parser/isoparser = 1.0-RC-1
/com.ibm.icu/icu4j = 52.1
/com.ibm.icu/icu4j = 53.1
/com.spatial4j/spatial4j = 0.4.1
com.sun.jersey.version = 1.8

View File

@ -1 +0,0 @@
7dbc327670673acd14b487d120f05747d712c1c0

View File

@ -0,0 +1 @@
786d9055d4ca8c1aab4a7d4ac8283f973fd7e41f

View File

@ -113,6 +113,9 @@ New Features
* SOLR-6043: Add ability to set http headers in solr response
(Tomás Fernández Löbbe via Ryan Ernst)
* SOLR-5973: Pluggable Ranking Collectors and Merge Strategies
(Joel Bernstein)
Bug Fixes
----------------------

View File

@ -288,7 +288,6 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
}
private void prioritizeOverseerNodes() throws KeeperException, InterruptedException {
log.info("prioritizing overseer nodes at {}", LeaderElector.getNodeName(myId));
SolrZkClient zk = zkStateReader.getZkClient();
if(!zk.exists(ZkStateReader.ROLES,true))return;
Map m = (Map) ZkStateReader.fromJSON(zk.getData(ZkStateReader.ROLES, null, new Stat(), true));
@ -296,6 +295,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
List overseerDesignates = (List) m.get("overseer");
if(overseerDesignates==null || overseerDesignates.isEmpty()) return;
if(overseerDesignates.size() == 1 && overseerDesignates.contains(getLeaderNode(zk))) return;
log.info("prioritizing overseer nodes at {}", LeaderElector.getNodeName(myId));
log.info("overseer designates {}", overseerDesignates);
List<String> nodeNames = getSortedOverseerNodeNames(zk);

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import org.apache.solr.search.SolrIndexSearcher;
import java.util.Comparator;
import java.io.IOException;
/**
* The MergeStrategy class defines custom merge logic for distributed searches.
**/
public interface MergeStrategy {
/**
* merge defines the merging behaving of results that are collected from the
* shards during a distributed search.
*
**/
public void merge(ResponseBuilder rb, ShardRequest sreq);
/**
* mergesIds must return true if the merge method merges document ids from the shards.
* If it merges other output from the shards it must return false.
* */
public boolean mergesIds();
/**
* handlesMergeFields must return true if the MergeStrategy
* implements a custom handleMergeFields(ResponseBuilder rb, SolrIndexSearch searcher)
* */
public boolean handlesMergeFields();
/**
* Implement handleMergeFields(ResponseBuilder rb, SolrIndexSearch searcher) if
* your merge strategy needs more complex data then the sort fields provide.
* */
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException;
/**
* Defines the order that the mergeStrategies are applied. Lower costs are applied first.
* */
public int getCost();
public static final Comparator MERGE_COMP = new Comparator() {
public int compare(Object o1, Object o2) {
MergeStrategy m1 = (MergeStrategy)o1;
MergeStrategy m2 = (MergeStrategy)o2;
return m1.getCost()-m2.getCost();
}
};
}

View File

@ -77,6 +77,7 @@ import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.RankQuery;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.grouping.CommandHandler;
import org.apache.solr.search.grouping.GroupingSpecification;
@ -98,6 +99,8 @@ import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTran
import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer;
import org.apache.solr.util.SolrPluginUtils;
import java.util.Collections;
import java.util.Comparator;
/**
* TODO!
@ -147,6 +150,17 @@ public class QueryComponent extends SearchComponent
// normalize a null query to a query that matches nothing
q = new BooleanQuery();
}
if(q instanceof RankQuery) {
MergeStrategy mergeStrategy = ((RankQuery)q).getMergeStrategy();
if(mergeStrategy != null) {
rb.addMergeStrategy(mergeStrategy);
if(mergeStrategy.handlesMergeFields()) {
rb.mergeFieldHandler = mergeStrategy;
}
}
}
rb.setQuery( q );
rb.setSortSpec( parser.getSort(true) );
rb.setQparser(parser);
@ -473,7 +487,13 @@ public class QueryComponent extends SearchComponent
rb.getNextCursorMark().getSerializedTotem());
}
}
doFieldSortValues(rb, searcher);
if(rb.mergeFieldHandler != null) {
rb.mergeFieldHandler.handleMergeFields(rb, searcher);
} else {
doFieldSortValues(rb, searcher);
}
doPrefetch(rb);
}
@ -821,6 +841,22 @@ public class QueryComponent extends SearchComponent
private void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
List<MergeStrategy> mergeStrategies = rb.getMergeStrategies();
if(mergeStrategies != null) {
Collections.sort(mergeStrategies, MergeStrategy.MERGE_COMP);
boolean idsMerged = false;
for(MergeStrategy mergeStrategy : mergeStrategies) {
mergeStrategy.merge(rb, sreq);
if(mergeStrategy.mergesIds()) {
idsMerged = true;
}
}
if(idsMerged) {
return; //ids were merged above so return.
}
}
SortSpec ss = rb.getSortSpec();
Sort sort = ss.getSort();

View File

@ -40,6 +40,7 @@ import org.apache.solr.search.grouping.distributed.command.QueryCommandResult;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
@ -58,6 +59,7 @@ public class ResponseBuilder
public boolean doExpand;
public boolean doStats;
public boolean doTerms;
public MergeStrategy mergeFieldHandler;
private boolean needDocList = false;
private boolean needDocSet = false;
@ -74,6 +76,9 @@ public class ResponseBuilder
private CursorMark cursorMark;
private CursorMark nextCursorMark;
private List<MergeStrategy> mergeStrategies;
private DocListAndSet results = null;
private NamedList<Object> debugInfo = null;
private RTimer timer = null;
@ -230,7 +235,23 @@ public class ResponseBuilder
debugResults = dbg;
debugTrack = dbg;
}
public void addMergeStrategy(MergeStrategy mergeStrategy) {
if(mergeStrategies == null) {
mergeStrategies = new ArrayList();
}
mergeStrategies.add(mergeStrategy);
}
public List<MergeStrategy> getMergeStrategies() {
return this.mergeStrategies;
}
public void setResponseDocs(SolrDocumentList _responseDocs) {
this._responseDocs = _responseDocs;
}
public boolean isDebugTrack() {
return debugTrack;
}

View File

@ -35,7 +35,7 @@ public class ShardDoc extends FieldDoc {
public String shard;
public String shardAddress; // TODO
int orderInShard;
public int orderInShard;
// the position of this doc within the shard... this can be used
// to short-circuit comparisons if the shard is equal, and can
// also be used to break ties within the same shard.

View File

@ -108,9 +108,7 @@ public class XSLTResponseWriter implements QueryResponseWriter {
try {
t.transform(source, result);
} catch(TransformerException te) {
final IOException ioe = new IOException("XSLT transformation error");
ioe.initCause(te);
throw ioe;
throw new IOException("XSLT transformation error", te);
}
}

View File

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.Query;
import org.apache.solr.handler.component.MergeStrategy;
public abstract class RankQuery extends Query {
public abstract TopDocsCollector getTopDocsCollector(int len, SolrIndexSearcher.QueryCommand cmd);
public abstract MergeStrategy getMergeStrategy();
}

View File

@ -1499,7 +1499,13 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
* TopDocsCollector to use.
*/
private TopDocsCollector buildTopDocsCollector(int len, QueryCommand cmd) throws IOException {
Query q = cmd.getQuery();
if(q instanceof RankQuery) {
RankQuery rq = (RankQuery)q;
return rq.getTopDocsCollector(len, cmd);
}
if (null == cmd.getSort()) {
assert null == cmd.getCursorMark() : "have cursor but no sort";
return TopScoreDocCollector.create(len, true);

View File

@ -144,7 +144,7 @@ public final class SystemIdResolver implements EntityResolver, EntityResolver2 {
return is;
} catch (RuntimeException re) {
// unfortunately XInclude fallback only works with IOException, but openResource() never throws that one
throw (IOException) (new IOException(re.getMessage()).initCause(re));
throw new IOException(re.getMessage(), re);
}
} else {
// resolve all other URIs using the standard resolver

View File

@ -83,9 +83,7 @@ public class TransformerProvider {
result = lastTemplates.newTransformer();
} catch(TransformerConfigurationException tce) {
log.error(getClass().getName(), "getTransformer", tce);
final IOException ioe = new IOException("newTransformer fails ( " + lastFilename + ")");
ioe.initCause(tce);
throw ioe;
throw new IOException("newTransformer fails ( " + lastFilename + ")", tce);
}
return result;
@ -114,9 +112,7 @@ public class TransformerProvider {
}
} catch (Exception e) {
log.error(getClass().getName(), "newTemplates", e);
final IOException ioe = new IOException("Unable to initialize Templates '" + filename + "'");
ioe.initCause(e);
throw ioe;
throw new IOException("Unable to initialize Templates '" + filename + "'", e);
}
lastFilename = filename;

View File

@ -0,0 +1,579 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- This is a "kitchen sink" config file that tests can use.
When writting a new test, feel free to add *new* items (plugins,
config options, etc...) as long as they don't break any existing
tests. if you need to test something esoteric please add a new
"solrconfig-your-esoteric-purpose.xml" config file.
Note in particular that this test is used by MinimalSchemaTest so
Anything added to this file needs to work correctly even if there
is now uniqueKey or defaultSearch Field.
-->
<config>
<jmx />
<!-- Used to specify an alternate directory to hold all index data.
It defaults to "index" if not present, and should probably
not be changed if replication is in use. -->
<dataDir>${solr.data.dir:}</dataDir>
<!-- The DirectoryFactory to use for indexes.
solr.StandardDirectoryFactory, the default, is filesystem based.
solr.RAMDirectoryFactory is memory based and not persistent. -->
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}">
<double name="maxWriteMBPerSecDefault">1000000</double>
<double name="maxWriteMBPerSecFlush">2000000</double>
<double name="maxWriteMBPerSecMerge">3000000</double>
<double name="maxWriteMBPerSecRead">4000000</double>
</directoryFactory>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<updateHandler class="solr.DirectUpdateHandler2">
<!-- autocommit pending docs if certain criteria are met
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>3600000</maxTime>
</autoCommit>
-->
<!-- represents a lower bound on the frequency that commits may
occur (in seconds). NOTE: not yet implemented
<commitIntervalLowerBound>0</commitIntervalLowerBound>
-->
<!-- The RunExecutableListener executes an external command.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">/var/opt/resin3/__PORT__/scripts/solr/snapshooter</str>
<str name="dir">/var/opt/resin3/__PORT__</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
<updateLog enable="${enable.update.log:true}">
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
<commitWithin>
<softCommit>${solr.commitwithin.softcommit:true}</softCommit>
</commitWithin>
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or wildcard queries that expand to big boolean
queries. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
<filterCache
class="solr.search.FastLRUCache"
size="512"
initialSize="512"
autowarmCount="2"/>
<queryResultCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="2"/>
<documentCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<cache name="perSegFilter"
class="solr.search.LRUCache"
size="10"
initialSize="0"
autowarmCount="10" />
<!-- If true, stored fields that are not requested will be loaded lazily.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!--
<cache name="myUserCache"
class="solr.search.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="MyRegenerator"
/>
-->
<!--
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<queryResultWindowSize>10</queryResultWindowSize>
<!-- set maxSize artificially low to exercise both types of sets -->
<HashDocSet maxSize="3" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index)
-->
<boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain prewarming data from. -->
<!--
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
</query>
<queryResponseWriter name="xml" default="true"
class="solr.XMLResponseWriter" />
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the 'qt' param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler">
<bool name="httpCaching">true</bool>
</requestHandler>
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
</lst>
</requestHandler>
<requestHandler name="dismax" class="solr.SearchHandler" >
<lst name="defaults">
<str name="defType">dismax</str>
<str name="q.alt">*:*</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</lst>
</requestHandler>
<requestHandler name="mock" class="org.apache.solr.core.MockQuerySenderListenerReqHandler"/>
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
<!-- test query parameter defaults -->
<requestHandler name="defaults" class="solr.StandardRequestHandler">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<!-- test query parameter defaults -->
<requestHandler name="lazy" class="solr.StandardRequestHandler" startup="lazy">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent">
<!-- This is slightly different from the field value so we can test dealing with token offset changes -->
<str name="queryAnalyzerFieldType">lowerpunctfilt</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellchecker1</str>
<str name="buildOnCommit">false</str>
</lst>
<lst name="spellchecker">
<str name="name">direct</str>
<str name="classname">DirectSolrSpellChecker</str>
<str name="field">lowerfilt</str>
<int name="minQueryLength">3</int>
</lst>
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">lowerfilt</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>
<lst name="spellchecker">
<str name="name">multipleFields</str>
<str name="field">lowerfilt1and2</str>
<str name="spellcheckIndexDir">spellcheckerMultipleFields</str>
<str name="buildOnCommit">false</str>
</lst>
<!-- Example of using different distance measure -->
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">lowerfilt</str>
<!-- Use a different Distance Measure -->
<str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">spellchecker2</str>
</lst>
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">external</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellchecker3</str>
</lst>
<!-- Comparator -->
<lst name="spellchecker">
<str name="name">freq</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFreq</str>
<!-- comparatorClass be one of:
1. score (default)
2. freq (Frequency first, then score)
3. A fully qualified class name
-->
<str name="comparatorClass">freq</str>
<str name="buildOnCommit">false</str>
</lst>
<lst name="spellchecker">
<str name="name">fqcn</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFQCN</str>
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
<str name="buildOnCommit">false</str>
</lst>
<lst name="spellchecker">
<str name="name">perDict</str>
<str name="classname">org.apache.solr.handler.component.DummyCustomParamSpellChecker</str>
<str name="field">lowerfilt</str>
</lst>
</searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
<arr name="components">
<str>termsComp</str>
</arr>
</requestHandler>
<!--
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
-->
<queryConverter name="queryConverter" class="org.apache.solr.spelling.SpellingQueryConverter"/>
<requestHandler name="spellCheckCompRH" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckCompRH_Direct" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="spellcheck.dictionary">direct</str>
<str name="spellcheck.onlyMorePopular">false</str>
<str name="spellcheck.extendedResults">false</str>
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckWithWordbreak" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck.count">20</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckWithWordbreak_Direct" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="spellcheck.dictionary">direct</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck.count">20</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="qf">lowerfilt1^1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="mltrh" class="org.apache.solr.handler.component.SearchHandler">
</requestHandler>
<searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
<requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>
<!-- test elevation -->
<searchComponent name="elevate" class="org.apache.solr.handler.component.QueryElevationComponent" >
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>
<requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<arr name="last-components">
<str>elevate</str>
</arr>
</requestHandler>
<requestHandler name="/mlt" class="solr.MoreLikeThisHandler">
</requestHandler>
<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<!-- Configure the standard fragmenter -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>
<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple" class="org.apache.solr.highlight.SimpleFragListBuilder" default="true"/>
<!-- Configure the standard fragmentsBuilder -->
<fragmentsBuilder name="simple" class="org.apache.solr.highlight.SimpleFragmentsBuilder" default="true"/>
<fragmentsBuilder name="scoreOrder" class="org.apache.solr.highlight.ScoreOrderFragmentsBuilder"/>
<boundaryScanner name="simple" class="solr.highlight.SimpleBoundaryScanner" default="true">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
</lst>
</boundaryScanner>
<boundaryScanner name="breakIterator" class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.type">WORD</str>
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>
<!-- enable streaming for testing... -->
<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr" never304="false">
<cacheControl>max-age=30, public</cacheControl>
</httpCaching>
</requestDispatcher>
<!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<admin>
<defaultQuery>solr</defaultQuery>
<gettableFiles>solrconfig.xml schema.xml admin-extra.html</gettableFiles>
</admin>
<!-- test getting system property -->
<propTest attr1="${solr.test.sys.prop1}-$${literal}"
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
<queryParser name="rank" class="org.apache.solr.search.TestRankQueryPlugin"/>
<updateRequestProcessorChain name="dedupe">
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">false</bool>
<bool name="overwriteDupes">true</bool>
<str name="fields">v_t,t_field</str>
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="dedupe-allfields">
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">false</bool>
<bool name="overwriteDupes">false</bool>
<str name="signatureField">id</str>
<str name="fields"></str>
<str name="signatureClass">org.apache.solr.update.processor.Lookup3Signature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="stored_sig">
<!-- this chain is valid even though the signature field is not
indexed, because we are not asking for dups to be overwritten
-->
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
<str name="signatureField">non_indexed_signature_sS</str>
<bool name="overwriteDupes">false</bool>
<str name="fields">v_t,t_field</str>
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="uniq-fields">
<processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
<arr name="fieldName">
<str>uniq</str>
<str>uniq2</str>
<str>uniq3</str>
</arr>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="distrib-dup-test-chain-explicit">
<!-- explicit test using processors before and after distrib -->
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_A_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.DistributedUpdateProcessorFactory" />
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_B_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="distrib-dup-test-chain-implicit">
<!-- implicit test w/o distrib declared-->
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_A_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_B_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
</config>

View File

@ -0,0 +1,181 @@
package org.apache.solr.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.MergeStrategy;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.junit.BeforeClass;
import java.util.Arrays;
/**
* Test for QueryComponent's distributed querying
*
* @see org.apache.solr.handler.component.QueryComponent
*/
public class MergeStrategyTest extends BaseDistributedSearchTestCase {
public MergeStrategyTest() {
fixShardCount = true;
shardCount = 3;
stress = 0;
}
@BeforeClass
public static void setUpBeforeClass() throws Exception {
initCore("solrconfig-plugcollector.xml", "schema15.xml");
}
@Override
public void doTest() throws Exception {
del("*:*");
index_specific(0,"id","1", "sort_i", "5");
index_specific(0,"id","2", "sort_i", "50");
index_specific(1,"id","5", "sort_i", "4");
index_specific(1,"id","6", "sort_i", "10");
index_specific(0,"id","7", "sort_i", "1");
index_specific(1,"id","8", "sort_i", "2");
index_specific(2,"id","9", "sort_i", "1000");
index_specific(2,"id","10", "sort_i", "1500");
index_specific(2,"id","11", "sort_i", "1300");
index_specific(1,"id","12", "sort_i", "15");
index_specific(1,"id","13", "sort_i", "16");
commit();
handle.put("explain", SKIPVAL);
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("score", SKIPVAL);
handle.put("wt", SKIP);
handle.put("distrib", SKIP);
handle.put("shards.qt", SKIP);
handle.put("shards", SKIP);
handle.put("q", SKIP);
handle.put("maxScore", SKIPVAL);
handle.put("_version_", SKIP);
//Test mergeStrategy that uses score
query("q", "{!rank q=$qq}", "qq", "*:*", "rows","12", "sort", "sort_i asc", "fl","*,score");
//Test without mergeStrategy
query("q", "*:*", "rows","12", "sort", "sort_i asc");
//Test mergeStrategy1 that uses a sort field.
query("q", "{!rank mergeStrategy=1 q=$qq}", "qq", "*:*", "rows","12", "sort", "sort_i asc");
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("qq", "*:*");
params.add("rows", "12");
params.add("q", "{!rank q=$qq}");
params.add("sort", "sort_i asc");
params.add("fl","*,score");
setDistributedParams(params);
QueryResponse rsp = queryServer(params);
assertOrder(rsp,"10","11","9","2","13","12","6","1","5","8","7");
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("rows", "12");
params.add("sort", "sort_i asc");
params.add("fl","*,score");
setDistributedParams(params);
rsp = queryServer(params);
assertOrder(rsp,"7","8","5","1","6","12","13","2","9","11","10");
MergeStrategy m1 = new MergeStrategy() {
@Override
public void merge(ResponseBuilder rb, ShardRequest sreq) {
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() { return false;}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {}
@Override
public int getCost() {
return 1;
}
};
MergeStrategy m2 = new MergeStrategy() {
@Override
public void merge(ResponseBuilder rb, ShardRequest sreq) {
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() { return false;}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {}
@Override
public int getCost() {
return 100;
}
};
MergeStrategy m3 = new MergeStrategy() {
@Override
public void merge(ResponseBuilder rb, ShardRequest sreq) {
}
public boolean mergesIds() {
return false;
}
public boolean handlesMergeFields() { return false;}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {}
@Override
public int getCost() {
return 50;
}
};
MergeStrategy[] merges = {m1,m2,m3};
Arrays.sort(merges, MergeStrategy.MERGE_COMP);
assert(merges[0].getCost() == 1);
assert(merges[1].getCost() == 50);
assert(merges[2].getCost() == 100);
}
private void assertOrder(QueryResponse rsp, String ... docs) throws Exception {
SolrDocumentList list = rsp.getResults();
for(int i=0; i<docs.length; i++) {
SolrDocument doc = list.get(i);
Object o = doc.getFieldValue("id");
if(!docs[i].equals(o)) {
throw new Exception("Order is not correct:"+o+"!="+docs[i]);
}
}
}
}

View File

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
public class RankQueryTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-plugcollector.xml", "schema15.xml");
}
@Override
@Before
public void setUp() throws Exception {
// if you override setUp or tearDown, you better call
// the super classes version
super.setUp();
clearIndex();
assertU(commit());
}
@Test
public void testPluggableCollector() throws Exception {
String[] doc = {"id","1", "sort_i", "100"};
assertU(adoc(doc));
assertU(commit());
String[] doc1 = {"id","2", "sort_i", "50"};
assertU(adoc(doc1));
String[] doc2 = {"id","3", "sort_i", "1000"};
assertU(adoc(doc2));
assertU(commit());
String[] doc3 = {"id","4", "sort_i", "2000"};
assertU(adoc(doc3));
String[] doc4 = {"id","5", "sort_i", "2"};
assertU(adoc(doc4));
assertU(commit());
String[] doc5 = {"id","6", "sort_i","11"};
assertU(adoc(doc5));
assertU(commit());
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("qq", "*:*");
params.add("q", "{!rank q=$qq}");
params.add("sort","sort_i asc");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[1]/str[@name='id'][.='4']",
"//result/doc[2]/str[@name='id'][.='3']",
"//result/doc[3]/str[@name='id'][.='1']",
"//result/doc[4]/str[@name='id'][.='2']",
"//result/doc[5]/str[@name='id'][.='6']",
"//result/doc[6]/str[@name='id'][.='5']"
);
params = new ModifiableSolrParams();
params.add("qq", "{!edismax bf=$bff}*:*");
params.add("bff", "field(sort_i)");
params.add("q", "{!rank q=$qq collector=1}");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[6]/str[@name='id'][.='4']",
"//result/doc[5]/str[@name='id'][.='3']",
"//result/doc[4]/str[@name='id'][.='1']",
"//result/doc[3]/str[@name='id'][.='2']",
"//result/doc[2]/str[@name='id'][.='6']",
"//result/doc[1]/str[@name='id'][.='5']"
);
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("sort","sort_i asc");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[6]/str[@name='id'][.='4']",
"//result/doc[5]/str[@name='id'][.='3']",
"//result/doc[4]/str[@name='id'][.='1']",
"//result/doc[3]/str[@name='id'][.='2']",
"//result/doc[2]/str[@name='id'][.='6']",
"//result/doc[1]/str[@name='id'][.='5']"
);
}
}

View File

@ -0,0 +1,817 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.MergeStrategy;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardDoc;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.Ignore;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Ignore
public class TestRankQueryPlugin extends QParserPlugin {
public void init(NamedList params) {
}
public QParser createParser(String query, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new TestRankQueryParser(query, localParams, params, req);
}
class TestRankQueryParser extends QParser {
public TestRankQueryParser(String query, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(query, localParams, params, req);
}
public Query parse() throws SyntaxError {
String qs = localParams.get("q");
QParser parser = QParser.getParser(qs, null, req);
Query q = parser.getQuery();
int mergeStrategy = localParams.getInt("mergeStrategy", 0);
int collector = localParams.getInt("collector", 0);
return new TestRankQuery(collector, mergeStrategy, q);
}
}
class TestRankQuery extends RankQuery {
private int mergeStrategy;
private int collector;
private Query q;
public int hashCode() {
return collector+q.hashCode();
}
public boolean equals(Object o) {
if(o instanceof TestRankQuery) {
TestRankQuery trq = (TestRankQuery)o;
return (trq.q.equals(q) && trq.collector == collector) ;
}
return false;
}
public Weight createWeight(IndexSearcher indexSearcher ) throws IOException{
return q.createWeight(indexSearcher);
}
public void setBoost(float boost) {
q.setBoost(boost);
}
public float getBoost() {
return q.getBoost();
}
public String toString() {
return q.toString();
}
public String toString(String field) {
return q.toString(field);
}
public TestRankQuery(int collector, int mergeStrategy, Query q) {
this.q = q;
this.collector = collector;
this.mergeStrategy = mergeStrategy;
}
public TopDocsCollector getTopDocsCollector(int len, SolrIndexSearcher.QueryCommand cmd) {
if(collector == 0)
return new TestCollector(null);
else
return new TestCollector1(null);
}
public MergeStrategy getMergeStrategy() {
if(mergeStrategy == 0)
return new TestMergeStrategy();
else
return new TestMergeStrategy1();
}
}
class TestMergeStrategy implements MergeStrategy {
public int getCost() {
return 1;
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() {
return false;
}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {
}
public void merge(ResponseBuilder rb, ShardRequest sreq) {
// id to shard mapping, to eliminate any accidental dups
HashMap<Object,String> uniqueDoc = new HashMap<>();
NamedList<Object> shardInfo = null;
if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
shardInfo = new SimpleOrderedMap<>();
rb.rsp.getValues().add(ShardParams.SHARDS_INFO,shardInfo);
}
IndexSchema schema = rb.req.getSchema();
SchemaField uniqueKeyField = schema.getUniqueKeyField();
long numFound = 0;
Float maxScore=null;
boolean partialResults = false;
List<ShardDoc> shardDocs = new ArrayList();
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = null;
if(shardInfo!=null) {
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
if (srsp.getException() != null) {
Throwable t = srsp.getException();
if(t instanceof SolrServerException) {
t = ((SolrServerException)t).getCause();
}
nl.add("error", t.toString() );
StringWriter trace = new StringWriter();
t.printStackTrace(new PrintWriter(trace));
nl.add("trace", trace.toString() );
if (srsp.getShardAddress() != null) {
nl.add("shardAddress", srsp.getShardAddress());
}
}
else {
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
nl.add("numFound", docs.getNumFound());
nl.add("maxScore", docs.getMaxScore());
nl.add("shardAddress", srsp.getShardAddress());
}
if(srsp.getSolrResponse()!=null) {
nl.add("time", srsp.getSolrResponse().getElapsedTime());
}
shardInfo.add(srsp.getShard(), nl);
}
// now that we've added the shard info, let's only proceed if we have no error.
if (srsp.getException() != null) {
partialResults = true;
continue;
}
if (docs == null) { // could have been initialized in the shards info block above
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
}
NamedList<?> responseHeader = (NamedList<?>)srsp.getSolrResponse().getResponse().get("responseHeader");
if (responseHeader != null && Boolean.TRUE.equals(responseHeader.get("partialResults"))) {
partialResults = true;
}
// calculate global maxScore and numDocsFound
if (docs.getMaxScore() != null) {
maxScore = maxScore==null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
}
numFound += docs.getNumFound();
for (int i=0; i<docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());
String prevShard = uniqueDoc.put(id, srsp.getShard());
if (prevShard != null) {
// duplicate detected
numFound--;
// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
continue;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(srsp.shard) >= 0) {
// TODO: remove previous from priority queue
// continue;
// }
}
ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
shardDoc.orderInShard = i;
Object scoreObj = doc.getFieldValue("score");
if (scoreObj != null) {
if (scoreObj instanceof String) {
shardDoc.score = Float.parseFloat((String)scoreObj);
} else {
shardDoc.score = (Float)scoreObj;
}
}
shardDocs.add(shardDoc);
} // end for-each-doc-in-response
} // end for-each-response
Collections.sort(shardDocs, new Comparator<ShardDoc>() {
@Override
public int compare(ShardDoc o1, ShardDoc o2) {
if(o1.score < o2.score) {
return 1;
} else if (o1.score > o2.score) {
return -1;
} else {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
}
});
int resultSize = shardDocs.size();
Map<Object,ShardDoc> resultIds = new HashMap<>();
for (int i=0; i<shardDocs.size(); i++) {
ShardDoc shardDoc = shardDocs.get(i);
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
// Add hits for distributed requests
// https://issues.apache.org/jira/browse/SOLR-3518
rb.rsp.addToLog("hits", numFound);
SolrDocumentList responseDocs = new SolrDocumentList();
if (maxScore!=null) responseDocs.setMaxScore(maxScore);
responseDocs.setNumFound(numFound);
responseDocs.setStart(0);
// size appropriately
for (int i=0; i<resultSize; i++) responseDocs.add(null);
// save these results in a private area so we can access them
// again when retrieving stored fields.
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb.setResponseDocs(responseDocs);
if (partialResults) {
rb.rsp.getResponseHeader().add( "partialResults", Boolean.TRUE );
}
}
}
class TestMergeStrategy1 implements MergeStrategy {
public int getCost() {
return 1;
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() {
return true;
}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException {
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
// The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
// currently have an option to return sort field values. Because of this, we
// take the documents given and re-derive the sort values.
//
// TODO: See SOLR-5595
boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false);
if(fsv){
NamedList<Object[]> sortVals = new NamedList<>(); // order is important for the sort fields
IndexReaderContext topReaderContext = searcher.getTopReaderContext();
List<AtomicReaderContext> leaves = topReaderContext.leaves();
AtomicReaderContext currentLeaf = null;
if (leaves.size()==1) {
// if there is a single segment, use that subReader and avoid looking up each time
currentLeaf = leaves.get(0);
leaves=null;
}
DocList docList = rb.getResults().docList;
// sort ids from lowest to highest so we can access them in order
int nDocs = docList.size();
final long[] sortedIds = new long[nDocs];
final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds
DocList docs = rb.getResults().docList;
DocIterator it = docs.iterator();
for (int i=0; i<nDocs; i++) {
sortedIds[i] = (((long)it.nextDoc()) << 32) | i;
scores[i] = docs.hasScores() ? it.score() : Float.NaN;
}
// sort ids and scores together
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpId = sortedIds[i];
float tmpScore = scores[i];
sortedIds[i] = sortedIds[j];
scores[i] = scores[j];
sortedIds[j] = tmpId;
scores[j] = tmpScore;
}
@Override
protected int compare(int i, int j) {
return Long.compare(sortedIds[i], sortedIds[j]);
}
}.sort(0, sortedIds.length);
SortSpec sortSpec = rb.getSortSpec();
Sort sort = searcher.weightSort(sortSpec.getSort());
SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort();
List<SchemaField> schemaFields = sortSpec.getSchemaFields();
for (int fld = 0; fld < schemaFields.size(); fld++) {
SchemaField schemaField = schemaFields.get(fld);
FieldType ft = null == schemaField? null : schemaField.getType();
SortField sortField = sortFields[fld];
SortField.Type type = sortField.getType();
// :TODO: would be simpler to always serialize every position of SortField[]
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
FieldComparator comparator = null;
Object[] vals = new Object[nDocs];
int lastIdx = -1;
int idx = 0;
for (int i = 0; i < sortedIds.length; ++i) {
long idAndPos = sortedIds[i];
float score = scores[i];
int doc = (int)(idAndPos >>> 32);
int position = (int)idAndPos;
if (leaves != null) {
idx = ReaderUtil.subIndex(doc, leaves);
currentLeaf = leaves.get(idx);
if (idx != lastIdx) {
// we switched segments. invalidate comparator.
comparator = null;
}
}
if (comparator == null) {
comparator = sortField.getComparator(1,0);
comparator = comparator.setNextReader(currentLeaf);
}
doc -= currentLeaf.docBase; // adjust for what segment this is in
comparator.setScorer(new FakeScorer(doc, score));
comparator.copy(0, doc);
Object val = comparator.value(0);
if (null != ft) val = ft.marshalSortValue(val);
vals[position] = val;
}
sortVals.add(sortField.getField(), vals);
}
rsp.add("merge_values", sortVals);
}
}
private class FakeScorer extends Scorer {
final int docid;
final float score;
FakeScorer(int docid, float score) {
super(null);
this.docid = docid;
this.score = score;
}
@Override
public int docID() {
return docid;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return 1;
}
@Override
public Weight getWeight() {
throw new UnsupportedOperationException();
}
@Override
public Collection<ChildScorer> getChildren() {
throw new UnsupportedOperationException();
}
}
public void merge(ResponseBuilder rb, ShardRequest sreq) {
// id to shard mapping, to eliminate any accidental dups
HashMap<Object,String> uniqueDoc = new HashMap<>();
NamedList<Object> shardInfo = null;
if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
shardInfo = new SimpleOrderedMap<>();
rb.rsp.getValues().add(ShardParams.SHARDS_INFO,shardInfo);
}
IndexSchema schema = rb.req.getSchema();
SchemaField uniqueKeyField = schema.getUniqueKeyField();
long numFound = 0;
Float maxScore=null;
boolean partialResults = false;
List<ShardDoc> shardDocs = new ArrayList();
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = null;
if(shardInfo!=null) {
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
if (srsp.getException() != null) {
Throwable t = srsp.getException();
if(t instanceof SolrServerException) {
t = ((SolrServerException)t).getCause();
}
nl.add("error", t.toString() );
StringWriter trace = new StringWriter();
t.printStackTrace(new PrintWriter(trace));
nl.add("trace", trace.toString() );
if (srsp.getShardAddress() != null) {
nl.add("shardAddress", srsp.getShardAddress());
}
}
else {
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
nl.add("numFound", docs.getNumFound());
nl.add("maxScore", docs.getMaxScore());
nl.add("shardAddress", srsp.getShardAddress());
}
if(srsp.getSolrResponse()!=null) {
nl.add("time", srsp.getSolrResponse().getElapsedTime());
}
shardInfo.add(srsp.getShard(), nl);
}
// now that we've added the shard info, let's only proceed if we have no error.
if (srsp.getException() != null) {
partialResults = true;
continue;
}
if (docs == null) { // could have been initialized in the shards info block above
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
}
NamedList<?> responseHeader = (NamedList<?>)srsp.getSolrResponse().getResponse().get("responseHeader");
if (responseHeader != null && Boolean.TRUE.equals(responseHeader.get("partialResults"))) {
partialResults = true;
}
// calculate global maxScore and numDocsFound
if (docs.getMaxScore() != null) {
maxScore = maxScore==null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
}
numFound += docs.getNumFound();
SortSpec ss = rb.getSortSpec();
Sort sort = ss.getSort();
NamedList sortFieldValues = (NamedList)(srsp.getSolrResponse().getResponse().get("merge_values"));
NamedList unmarshalledSortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema);
List lst = (List)unmarshalledSortFieldValues.getVal(0);
for (int i=0; i<docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());
String prevShard = uniqueDoc.put(id, srsp.getShard());
if (prevShard != null) {
// duplicate detected
numFound--;
// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
continue;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(srsp.shard) >= 0) {
// TODO: remove previous from priority queue
// continue;
// }
}
ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
shardDoc.orderInShard = i;
Object scoreObj = lst.get(i);
if (scoreObj != null) {
shardDoc.score = ((Integer)scoreObj).floatValue();
}
shardDocs.add(shardDoc);
} // end for-each-doc-in-response
} // end for-each-response
Collections.sort(shardDocs, new Comparator<ShardDoc>() {
@Override
public int compare(ShardDoc o1, ShardDoc o2) {
if(o1.score < o2.score) {
return 1;
} else if (o1.score > o2.score) {
return -1;
} else {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
}
});
int resultSize = shardDocs.size();
Map<Object,ShardDoc> resultIds = new HashMap<>();
for (int i=0; i<shardDocs.size(); i++) {
ShardDoc shardDoc = shardDocs.get(i);
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
// Add hits for distributed requests
// https://issues.apache.org/jira/browse/SOLR-3518
rb.rsp.addToLog("hits", numFound);
SolrDocumentList responseDocs = new SolrDocumentList();
if (maxScore!=null) responseDocs.setMaxScore(maxScore);
responseDocs.setNumFound(numFound);
responseDocs.setStart(0);
// size appropriately
for (int i=0; i<resultSize; i++) responseDocs.add(null);
// save these results in a private area so we can access them
// again when retrieving stored fields.
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb.setResponseDocs(responseDocs);
if (partialResults) {
rb.rsp.getResponseHeader().add( "partialResults", Boolean.TRUE );
}
}
private NamedList unmarshalSortValues(SortSpec sortSpec,
NamedList sortFieldValues,
IndexSchema schema) {
NamedList unmarshalledSortValsPerField = new NamedList();
if (0 == sortFieldValues.size()) return unmarshalledSortValsPerField;
List<SchemaField> schemaFields = sortSpec.getSchemaFields();
SortField[] sortFields = sortSpec.getSort().getSort();
int marshalledFieldNum = 0;
for (int sortFieldNum = 0; sortFieldNum < sortFields.length; sortFieldNum++) {
final SortField sortField = sortFields[sortFieldNum];
final SortField.Type type = sortField.getType();
// :TODO: would be simpler to always serialize every position of SortField[]
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
final String sortFieldName = sortField.getField();
final String valueFieldName = sortFieldValues.getName(marshalledFieldNum);
assert sortFieldName.equals(valueFieldName)
: "sortFieldValues name key does not match expected SortField.getField";
List sortVals = (List)sortFieldValues.getVal(marshalledFieldNum);
final SchemaField schemaField = schemaFields.get(sortFieldNum);
if (null == schemaField) {
unmarshalledSortValsPerField.add(sortField.getField(), sortVals);
} else {
FieldType fieldType = schemaField.getType();
List unmarshalledSortVals = new ArrayList();
for (Object sortVal : sortVals) {
unmarshalledSortVals.add(fieldType.unmarshalSortValue(sortVal));
}
unmarshalledSortValsPerField.add(sortField.getField(), unmarshalledSortVals);
}
marshalledFieldNum++;
}
return unmarshalledSortValsPerField;
}
}
class TestCollector extends TopDocsCollector {
private List<ScoreDoc> list = new ArrayList();
private NumericDocValues values;
private int base;
public TestCollector(PriorityQueue pq) {
super(pq);
}
public boolean acceptsDocsOutOfOrder() {
return false;
}
public void doSetNextReader(AtomicReaderContext context) throws IOException {
values = DocValues.getNumeric(context.reader(), "sort_i");
base = context.docBase;
}
public void collect(int doc) {
list.add(new ScoreDoc(doc+base, (float)values.get(doc)));
}
public int topDocsSize() {
return list.size();
}
public TopDocs topDocs() {
Collections.sort(list, new Comparator() {
public int compare(Object o1, Object o2) {
ScoreDoc s1 = (ScoreDoc) o1;
ScoreDoc s2 = (ScoreDoc) o2;
if (s1.score == s2.score) {
return 0;
} else if (s1.score < s2.score) {
return 1;
} else {
return -1;
}
}
});
ScoreDoc[] scoreDocs = list.toArray(new ScoreDoc[list.size()]);
return new TopDocs(list.size(), scoreDocs, 0.0f);
}
public TopDocs topDocs(int start, int len) {
return topDocs();
}
public int getTotalHits() {
return list.size();
}
}
class TestCollector1 extends TopDocsCollector {
private List<ScoreDoc> list = new ArrayList();
private int base;
private Scorer scorer;
public TestCollector1(PriorityQueue pq) {
super(pq);
}
public boolean acceptsDocsOutOfOrder() {
return false;
}
public void doSetNextReader(AtomicReaderContext context) throws IOException {
base = context.docBase;
}
public void setScorer(Scorer scorer) {
this.scorer = scorer;
}
public void collect(int doc) throws IOException {
list.add(new ScoreDoc(doc+base, scorer.score()));
}
public int topDocsSize() {
return list.size();
}
public TopDocs topDocs() {
Collections.sort(list, new Comparator() {
public int compare(Object o1, Object o2) {
ScoreDoc s1 = (ScoreDoc) o1;
ScoreDoc s2 = (ScoreDoc) o2;
if (s1.score == s2.score) {
return 0;
} else if (s1.score > s2.score) {
return 1;
} else {
return -1;
}
}
});
ScoreDoc[] scoreDocs = list.toArray(new ScoreDoc[list.size()]);
return new TopDocs(list.size(), scoreDocs, 0.0f);
}
public TopDocs topDocs(int start, int len) {
return topDocs();
}
public int getTotalHits() {
return list.size();
}
}
}

View File

@ -1 +0,0 @@
7dbc327670673acd14b487d120f05747d712c1c0

View File

@ -0,0 +1 @@
786d9055d4ca8c1aab4a7d4ac8283f973fd7e41f