mirror of https://github.com/apache/lucene.git
LUCENE-5487: merge trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1575397 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
d784980654
|
@ -16,6 +16,7 @@
|
|||
<orderEntry type="library" scope="TEST" name="HSQLDB" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Derby" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
|
||||
<orderEntry type="library" name="Solr core library" level="project" />
|
||||
<orderEntry type="library" name="Solrj library" level="project" />
|
||||
<orderEntry type="library" name="Solr DIH library" level="project" />
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
<orderEntry type="library" name="Solr morphlines core library" level="project" />
|
||||
<orderEntry type="library" name="Solr morphlines cell library" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Solr morphlines core test library" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
|
||||
<orderEntry type="module" module-name="solr-core" />
|
||||
|
|
|
@ -212,7 +212,7 @@ def checkClassSummaries(fullPath):
|
|||
if inThing:
|
||||
if lineLower.find('</tr>') != -1:
|
||||
if not hasDesc:
|
||||
missing.append((lastCaption, lastItem))
|
||||
missing.append((lastCaption, unEscapeURL(lastItem)))
|
||||
inThing = False
|
||||
continue
|
||||
else:
|
||||
|
@ -298,6 +298,11 @@ def checkSummary(fullPath):
|
|||
f.close()
|
||||
return anyMissing
|
||||
|
||||
def unEscapeURL(s):
|
||||
# Not exhaustive!!
|
||||
s = s.replace('%20', ' ')
|
||||
return s
|
||||
|
||||
def unescapeHTML(s):
|
||||
s = s.replace('<', '<')
|
||||
s = s.replace('>', '>')
|
||||
|
|
|
@ -731,7 +731,7 @@ def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs
|
|||
os.chdir('solr')
|
||||
|
||||
print(" run tests w/ Java 7 and testArgs='%s'..." % testArgs)
|
||||
run('%s; ant clean test %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath)
|
||||
run('%s; ant clean test -Dtests.slow=false %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath)
|
||||
|
||||
# test javadocs
|
||||
print(' generate javadocs w/ Java 7...')
|
||||
|
|
|
@ -68,6 +68,13 @@ Optimizations
|
|||
|
||||
======================= Lucene 4.8.0 =======================
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException
|
||||
if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous
|
||||
behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
|
||||
(hossman, Mike McCandless, Varun Thacker)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
|
||||
|
@ -89,6 +96,13 @@ New Features
|
|||
|
||||
* LUCENE-5485: Add circumfix support to HunspellStemFilter. (Robert Muir)
|
||||
|
||||
* LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-5493: SortingMergePolicy, and EarlyTerminatingSortingCollector
|
||||
support arbitrary Sort specifications.
|
||||
(Robert Muir, Mike McCandless, Adrien Grand)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
|
||||
|
@ -96,6 +110,12 @@ API Changes
|
|||
|
||||
* LUCENE-5468: Move offline Sort (from suggest module) to OfflineSort. (Robert Muir)
|
||||
|
||||
* LUCENE-5493: SortingMergePolicy and EarlyTerminatingSortingCollector take
|
||||
Sort instead of Sorter. BlockJoinSorter is removed, replaced with
|
||||
BlockJoinComparatorSource, which can take a Sort for ordering of parents
|
||||
and a separate Sort for ordering of children within a block.
|
||||
(Robert Muir, Mike McCandless, Adrien Grand)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-5468: HunspellStemFilter uses 10 to 100x less RAM. It also loads
|
||||
|
|
|
@ -21,14 +21,17 @@ import org.apache.lucene.store.ByteArrayDataOutput;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.CharSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.IntSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.Outputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
|
@ -67,6 +70,9 @@ public class Dictionary {
|
|||
private static final String FLAG_KEY = "FLAG";
|
||||
private static final String COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
|
||||
private static final String CIRCUMFIX_KEY = "CIRCUMFIX";
|
||||
private static final String IGNORE_KEY = "IGNORE";
|
||||
private static final String ICONV_KEY = "ICONV";
|
||||
private static final String OCONV_KEY = "OCONV";
|
||||
|
||||
private static final String NUM_FLAG_TYPE = "num";
|
||||
private static final String UTF8_FLAG_TYPE = "UTF-8";
|
||||
|
@ -110,6 +116,16 @@ public class Dictionary {
|
|||
|
||||
int circumfix = -1; // circumfix flag, or -1 if one is not defined
|
||||
|
||||
// ignored characters (dictionary, affix, inputs)
|
||||
private char[] ignore;
|
||||
|
||||
// FSTs used for ICONV/OCONV, output ord pointing to replacement text
|
||||
FST<CharsRef> iconv;
|
||||
FST<CharsRef> oconv;
|
||||
|
||||
boolean needsInputCleaning;
|
||||
boolean needsOutputCleaning;
|
||||
|
||||
/**
|
||||
* Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
|
||||
* and dictionary files.
|
||||
|
@ -136,9 +152,13 @@ public class Dictionary {
|
|||
*/
|
||||
public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
|
||||
this.ignoreCase = ignoreCase;
|
||||
// hungarian has thousands of AF before the SET, so a 32k buffer is needed
|
||||
BufferedInputStream buffered = new BufferedInputStream(affix, 32768);
|
||||
buffered.mark(32768);
|
||||
this.needsInputCleaning = ignoreCase;
|
||||
this.needsOutputCleaning = false; // set if we have an OCONV
|
||||
// TODO: we really need to probably buffer this on disk since so many newer dictionaries
|
||||
// (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare
|
||||
// their encoding... but for now this large buffer is a workaround
|
||||
BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
|
||||
buffered.mark(65536);
|
||||
String encoding = getDictionaryEncoding(buffered);
|
||||
buffered.reset();
|
||||
CharsetDecoder decoder = getJavaEncoding(encoding);
|
||||
|
@ -249,6 +269,29 @@ public class Dictionary {
|
|||
throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber());
|
||||
}
|
||||
circumfix = flagParsingStrategy.parseFlag(parts[1]);
|
||||
} else if (line.startsWith(IGNORE_KEY)) {
|
||||
String parts[] = line.split("\\s+");
|
||||
if (parts.length != 2) {
|
||||
throw new ParseException("Illegal IGNORE declaration", reader.getLineNumber());
|
||||
}
|
||||
ignore = parts[1].toCharArray();
|
||||
Arrays.sort(ignore);
|
||||
needsInputCleaning = true;
|
||||
} else if (line.startsWith(ICONV_KEY) || line.startsWith(OCONV_KEY)) {
|
||||
String parts[] = line.split("\\s+");
|
||||
String type = parts[0];
|
||||
if (parts.length != 2) {
|
||||
throw new ParseException("Illegal " + type + " declaration", reader.getLineNumber());
|
||||
}
|
||||
int num = Integer.parseInt(parts[1]);
|
||||
FST<CharsRef> res = parseConversions(reader, num);
|
||||
if (type.equals("ICONV")) {
|
||||
iconv = res;
|
||||
needsInputCleaning |= iconv != null;
|
||||
} else {
|
||||
oconv = res;
|
||||
needsOutputCleaning |= oconv != null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -291,6 +334,7 @@ public class Dictionary {
|
|||
Map<String,Integer> seenPatterns) throws IOException, ParseException {
|
||||
|
||||
BytesRef scratch = new BytesRef();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String args[] = header.split("\\s+");
|
||||
|
||||
boolean crossProduct = args[2].equals("Y");
|
||||
|
@ -300,9 +344,6 @@ public class Dictionary {
|
|||
ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
|
||||
|
||||
for (int i = 0; i < numLines; i++) {
|
||||
if (currentAffix > Short.MAX_VALUE) {
|
||||
throw new UnsupportedOperationException("Too many affixes, please report this to dev@lucene.apache.org");
|
||||
}
|
||||
assert affixWriter.getPosition() == currentAffix << 3;
|
||||
String line = reader.readLine();
|
||||
String ruleArgs[] = line.split("\\s+");
|
||||
|
@ -345,6 +386,9 @@ public class Dictionary {
|
|||
Integer patternIndex = seenPatterns.get(regex);
|
||||
if (patternIndex == null) {
|
||||
patternIndex = patterns.size();
|
||||
if (patternIndex > Short.MAX_VALUE) {
|
||||
throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org");
|
||||
}
|
||||
seenPatterns.put(regex, patternIndex);
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
patterns.add(pattern);
|
||||
|
@ -355,6 +399,8 @@ public class Dictionary {
|
|||
if (stripOrd < 0) {
|
||||
// already exists in our hash
|
||||
stripOrd = (-stripOrd)-1;
|
||||
} else if (stripOrd > Character.MAX_VALUE) {
|
||||
throw new UnsupportedOperationException("Too many unique strips, please report this to dev@lucene.apache.org");
|
||||
}
|
||||
|
||||
if (appendFlags == null) {
|
||||
|
@ -368,7 +414,7 @@ public class Dictionary {
|
|||
appendFlagsOrd = (-appendFlagsOrd)-1;
|
||||
} else if (appendFlagsOrd > Short.MAX_VALUE) {
|
||||
// this limit is probably flexible, but its a good sanity check too
|
||||
throw new UnsupportedOperationException("Too many unique flags, please report this to dev@lucene.apache.org");
|
||||
throw new UnsupportedOperationException("Too many unique append flags, please report this to dev@lucene.apache.org");
|
||||
}
|
||||
|
||||
affixWriter.writeShort((short)flag);
|
||||
|
@ -378,6 +424,11 @@ public class Dictionary {
|
|||
affixWriter.writeShort((short)patternOrd);
|
||||
affixWriter.writeShort((short)appendFlagsOrd);
|
||||
|
||||
if (needsInputCleaning) {
|
||||
CharSequence cleaned = cleanInput(affixArg, sb);
|
||||
affixArg = cleaned.toString();
|
||||
}
|
||||
|
||||
List<Character> list = affixes.get(affixArg);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Character>();
|
||||
|
@ -389,6 +440,31 @@ public class Dictionary {
|
|||
}
|
||||
}
|
||||
|
||||
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
|
||||
Map<String,String> mappings = new TreeMap<>();
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
String line = reader.readLine();
|
||||
String parts[] = line.split("\\s+");
|
||||
if (parts.length != 3) {
|
||||
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
|
||||
}
|
||||
if (mappings.put(parts[1], parts[2]) != null) {
|
||||
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
|
||||
}
|
||||
}
|
||||
|
||||
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
|
||||
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
|
||||
IntsRef scratchInts = new IntsRef();
|
||||
for (Map.Entry<String,String> entry : mappings.entrySet()) {
|
||||
Util.toUTF16(entry.getKey(), scratchInts);
|
||||
builder.add(scratchInts, new CharsRef(entry.getValue()));
|
||||
}
|
||||
|
||||
return builder.finish();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the encoding specified in the affix file readable through the provided InputStream
|
||||
*
|
||||
|
@ -485,6 +561,8 @@ public class Dictionary {
|
|||
BytesRef flagsScratch = new BytesRef();
|
||||
IntsRef scratchInts = new IntsRef();
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
File unsorted = File.createTempFile("unsorted", "dat", tempDir);
|
||||
try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
|
||||
for (InputStream dictionary : dictionaries) {
|
||||
|
@ -492,16 +570,19 @@ public class Dictionary {
|
|||
String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
|
||||
|
||||
while ((line = lines.readLine()) != null) {
|
||||
if (ignoreCase) {
|
||||
if (needsInputCleaning) {
|
||||
int flagSep = line.lastIndexOf('/');
|
||||
if (flagSep == -1) {
|
||||
writer.write(line.toLowerCase(Locale.ROOT).getBytes(IOUtils.CHARSET_UTF_8));
|
||||
CharSequence cleansed = cleanInput(line, sb);
|
||||
writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
|
||||
} else {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(line.substring(0, flagSep).toLowerCase(Locale.ROOT));
|
||||
if (flagSep < line.length()) {
|
||||
sb.append(line.substring(flagSep, line.length()));
|
||||
String text = line.substring(0, flagSep);
|
||||
CharSequence cleansed = cleanInput(text, sb);
|
||||
if (cleansed != sb) {
|
||||
sb.setLength(0);
|
||||
sb.append(cleansed);
|
||||
}
|
||||
sb.append(line.substring(flagSep));
|
||||
writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8));
|
||||
}
|
||||
} else {
|
||||
|
@ -761,4 +842,76 @@ public class Dictionary {
|
|||
static boolean hasFlag(char flags[], char flag) {
|
||||
return Arrays.binarySearch(flags, flag) >= 0;
|
||||
}
|
||||
|
||||
CharSequence cleanInput(CharSequence input, StringBuilder reuse) {
|
||||
reuse.setLength(0);
|
||||
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
char ch = input.charAt(i);
|
||||
|
||||
if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ignoreCase && iconv == null) {
|
||||
// if we have no input conversion mappings, do this on-the-fly
|
||||
ch = Character.toLowerCase(ch);
|
||||
}
|
||||
|
||||
reuse.append(ch);
|
||||
}
|
||||
|
||||
if (iconv != null) {
|
||||
try {
|
||||
applyMappings(iconv, reuse);
|
||||
} catch (IOException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
if (ignoreCase) {
|
||||
for (int i = 0; i < reuse.length(); i++) {
|
||||
reuse.setCharAt(i, Character.toLowerCase(reuse.charAt(i)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return reuse;
|
||||
}
|
||||
|
||||
// TODO: this could be more efficient!
|
||||
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
|
||||
final FST.BytesReader bytesReader = fst.getBytesReader();
|
||||
final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
|
||||
final CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
|
||||
// temporary stuff
|
||||
final FST.Arc<CharsRef> arc = new FST.Arc<>();
|
||||
int longestMatch;
|
||||
CharsRef longestOutput;
|
||||
|
||||
for (int i = 0; i < sb.length(); i++) {
|
||||
arc.copyFrom(firstArc);
|
||||
CharsRef output = NO_OUTPUT;
|
||||
longestMatch = -1;
|
||||
longestOutput = null;
|
||||
|
||||
for (int j = i; j < sb.length(); j++) {
|
||||
char ch = sb.charAt(j);
|
||||
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
|
||||
break;
|
||||
} else {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
|
||||
longestMatch = j;
|
||||
}
|
||||
}
|
||||
|
||||
if (longestMatch >= 0) {
|
||||
sb.delete(i, longestMatch+1);
|
||||
sb.insert(i, longestOutput);
|
||||
i += (longestOutput.length - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
@ -24,8 +25,8 @@ import java.util.List;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.CharacterUtils;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
@ -40,7 +41,10 @@ final class Stemmer {
|
|||
private final BytesRef scratch = new BytesRef();
|
||||
private final StringBuilder segment = new StringBuilder();
|
||||
private final ByteArrayDataInput affixReader;
|
||||
private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT);
|
||||
|
||||
// used for normalization
|
||||
private final StringBuilder scratchSegment = new StringBuilder();
|
||||
private char scratchBuffer[] = new char[32];
|
||||
|
||||
/**
|
||||
* Constructs a new Stemmer which will use the provided Dictionary to create its stems.
|
||||
|
@ -69,16 +73,24 @@ final class Stemmer {
|
|||
* @return List of stems for the word
|
||||
*/
|
||||
public List<CharsRef> stem(char word[], int length) {
|
||||
if (dictionary.ignoreCase) {
|
||||
charUtils.toLowerCase(word, 0, length);
|
||||
|
||||
if (dictionary.needsInputCleaning) {
|
||||
scratchSegment.setLength(0);
|
||||
scratchSegment.append(word, 0, length);
|
||||
CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
|
||||
scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
|
||||
length = segment.length();
|
||||
segment.getChars(0, length, scratchBuffer, 0);
|
||||
word = scratchBuffer;
|
||||
}
|
||||
|
||||
List<CharsRef> stems = new ArrayList<CharsRef>();
|
||||
IntsRef forms = dictionary.lookupWord(word, 0, length);
|
||||
if (forms != null) {
|
||||
// TODO: some forms should not be added, e.g. ONLYINCOMPOUND
|
||||
// just because it exists, does not make it valid...
|
||||
for (int i = 0; i < forms.length; i++) {
|
||||
stems.add(new CharsRef(word, 0, length));
|
||||
stems.add(newStem(word, length));
|
||||
}
|
||||
}
|
||||
stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false));
|
||||
|
@ -107,6 +119,23 @@ final class Stemmer {
|
|||
return deduped;
|
||||
}
|
||||
|
||||
private CharsRef newStem(char buffer[], int length) {
|
||||
if (dictionary.needsOutputCleaning) {
|
||||
scratchSegment.setLength(0);
|
||||
scratchSegment.append(buffer, 0, length);
|
||||
try {
|
||||
Dictionary.applyMappings(dictionary.oconv, scratchSegment);
|
||||
} catch (IOException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
char cleaned[] = new char[scratchSegment.length()];
|
||||
scratchSegment.getChars(0, cleaned.length, cleaned, 0);
|
||||
return new CharsRef(cleaned, 0, cleaned.length);
|
||||
} else {
|
||||
return new CharsRef(buffer, 0, length);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================= Helper Methods ================================================
|
||||
|
||||
/**
|
||||
|
@ -292,7 +321,7 @@ final class Stemmer {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
stems.add(new CharsRef(strippedWord, 0, length));
|
||||
stems.add(newStem(strippedWord, length));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* These thunderbird dictionaries can be retrieved via:
|
||||
* https://addons.mozilla.org/en-US/thunderbird/language-tools/
|
||||
* You must click and download every file: sorry!
|
||||
*/
|
||||
@Ignore("enable manually")
|
||||
public class TestAllDictionaries2 extends LuceneTestCase {
|
||||
|
||||
// set this to the location of where you downloaded all the files
|
||||
static final File DICTIONARY_HOME =
|
||||
new File("/data/thunderbirdDicts");
|
||||
|
||||
final String tests[] = {
|
||||
/* zip file */ /* dictionary */ /* affix */
|
||||
"addon-0.4.5-an+fx+tb+fn+sm.xpi", "dictionaries/ru.dic", "dictionaries/ru.aff",
|
||||
"addon-0.5.5-fx+tb.xpi", "dictionaries/ko-KR.dic", "dictionaries/ko-KR.aff",
|
||||
"afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff",
|
||||
"albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff",
|
||||
"amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff",
|
||||
//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
|
||||
//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
|
||||
"azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff",
|
||||
"belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff",
|
||||
"belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff",
|
||||
"bengali_bangladesh_dictionary-0.08-sm+tb+fx.xpi", "dictionaries/bn-BD.dic", "dictionaries/bn-BD.aff",
|
||||
"brazilian_portuguese_dictionary_former_spelling-28.20140203-tb+sm+fx.xpi", "dictionaries/pt-BR-antigo.dic", "dictionaries/pt-BR-antigo.aff",
|
||||
"brazilian_portuguese_dictionary_new_spelling-28.20140203-fx+sm+tb.xpi", "dictionaries/pt-BR.dic", "dictionaries/pt-BR.aff",
|
||||
"british_english_dictionary_updated-1.19.5-sm+fx+tb.xpi", "dictionaries/en-GB.dic", "dictionaries/en-GB.aff",
|
||||
"bulgarian_dictionary-4.3-fx+tb+sm.xpi", "dictionaries/bg.dic", "dictionaries/bg.aff",
|
||||
"canadian_english_dictionary-2.0.8-fx+sm+tb.xpi", "dictionaries/en-CA.dic", "dictionaries/en-CA.aff",
|
||||
"ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff",
|
||||
"chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff",
|
||||
"corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff",
|
||||
"corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff",
|
||||
"corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff",
|
||||
"croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff",
|
||||
"croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff",
|
||||
"dansk_ordbog_til_stavekontrollen-2.2.1-sm+tb+fx.xpi", "dictionaries/da.dic", "dictionaries/da.aff",
|
||||
"deutsches_worterbuch_de_de_alte_rechtschreibung-2.1.8-sm.xpi", "dictionaries/de-DE-1901.dic", "dictionaries/de-DE-1901.aff",
|
||||
"diccionario_de_espanolespana-1.7-sm+tb+fn+fx.xpi", "dictionaries/es-ES.dic", "dictionaries/es-ES.aff",
|
||||
"diccionario_en_espanol_para_venezuela-1.1.17-sm+an+tb+fn+fx.xpi", "dictionaries/es_VE.dic", "dictionaries/es_VE.aff",
|
||||
"diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff",
|
||||
"diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff",
|
||||
"diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff",
|
||||
//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff",
|
||||
"difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff",
|
||||
//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
|
||||
"dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff",
|
||||
"dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff",
|
||||
"eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff",
|
||||
"english_australian_dictionary-2.1.2-tb+fx+sm.xpi", "dictionaries/en-AU.dic", "dictionaries/en-AU.aff",
|
||||
"esperanta_vortaro-1.0.2-fx+tb+sm.xpi", "dictionaries/eo-EO.dic", "dictionaries/eo-EO.aff",
|
||||
"european_portuguese_spellchecker-14.1.1.1-tb+fx.xpi", "dictionaries/pt-PT.dic", "dictionaries/pt-PT.aff",
|
||||
"faroese_spell_checker_faroe_islands-2.0-tb+sm+fx+fn.xpi", "dictionaries/fo_FO.dic", "dictionaries/fo_FO.aff",
|
||||
"frysk_wurdboek-2.1.1-fn+sm+fx+an+tb.xpi", "dictionaries/fy.dic", "dictionaries/fy.aff",
|
||||
"geiriadur_cymraeg-1.08-tb+sm+fx.xpi", "dictionaries/cy_GB.dic", "dictionaries/cy_GB.aff",
|
||||
"general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi", "dictionaries/ca.dic", "dictionaries/ca.aff",
|
||||
"german_dictionary-2.0.3-fn+fx+sm+tb.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff",
|
||||
"german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff",
|
||||
"german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff",
|
||||
"german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff",
|
||||
"german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff",
|
||||
"german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff",
|
||||
"greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi", "dictionaries/el-GR.dic", "dictionaries/el-GR.aff",
|
||||
"gujarati_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/gu_IN.dic", "dictionaries/gu_IN.aff",
|
||||
"haitian_creole_spell_checker-0.08-tb+sm+fx.xpi", "dictionaries/ht-HT.dic", "dictionaries/ht-HT.aff",
|
||||
"hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff",
|
||||
"hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff",
|
||||
"hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff",
|
||||
//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff",
|
||||
//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
|
||||
"kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff",
|
||||
//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
|
||||
"kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff",
|
||||
"kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff",
|
||||
"kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff",
|
||||
"lao_spellchecking_dictionary-0-fx+tb+sm+fn+an.xpi", "dictionaries/lo_LA.dic", "dictionaries/lo_LA.aff",
|
||||
"latviesu_valodas_pareizrakstibas_parbaudes_vardnica-1.0.0-fn+fx+tb+sm.xpi", "dictionaries/lv_LV.dic", "dictionaries/lv_LV.aff",
|
||||
"lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff",
|
||||
"litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff",
|
||||
"litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff",
|
||||
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
|
||||
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
|
||||
"malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff",
|
||||
"marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff",
|
||||
"ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff",
|
||||
"nepali_dictionary-1.2-fx+tb.xpi", "dictionaries/ne_NP.dic", "dictionaries/ne_NP.aff",
|
||||
"norsk_bokmal_ordliste-2.0.10.2-fx+tb+sm.xpi", "dictionaries/nb.dic", "dictionaries/nb.aff",
|
||||
"norsk_nynorsk_ordliste-2.1.0-sm+fx+tb.xpi", "dictionaries/nn.dic", "dictionaries/nn.aff",
|
||||
"northern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nso-ZA.dic", "dictionaries/nso-ZA.aff",
|
||||
"oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff",
|
||||
"polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff",
|
||||
"punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff",
|
||||
//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
|
||||
//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
|
||||
"sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff",
|
||||
"scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff",
|
||||
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff",
|
||||
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Latn.dic", "dictionaries/sr-RS-Latn.aff",
|
||||
"slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK.dic", "dictionaries/sk-SK.aff",
|
||||
"slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK-ascii.dic", "dictionaries/sk-SK-ascii.aff",
|
||||
"slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi", "dictionaries/sl.dic", "dictionaries/sl.aff",
|
||||
"songhay_spell_checker-0.03-fx+tb+sm.xpi", "dictionaries/Songhay - Mali.dic", "dictionaries/Songhay - Mali.aff",
|
||||
"southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/st-ZA.dic", "dictionaries/st-ZA.aff",
|
||||
"sownik_acinski-0.41.20110603-tb+fx+sm.xpi", "dictionaries/la.dic", "dictionaries/la.aff",
|
||||
"sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi", "dictionaries/dsb.dic", "dictionaries/dsb.aff",
|
||||
"srpska_latinica-0.1-fx+tb+sm.xpi", "dictionaries/Srpski_latinica.dic", "dictionaries/Srpski_latinica.aff",
|
||||
"svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv.dic", "dictionaries/sv.aff",
|
||||
"svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv_FI.dic", "dictionaries/sv_FI.aff",
|
||||
"swati_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ss-ZA.dic", "dictionaries/ss-ZA.aff",
|
||||
"tamil_spell_checker_for_firefox-0.4-tb+fx.xpi", "dictionaries/ta-TA.dic", "dictionaries/ta-TA.aff",
|
||||
"telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff",
|
||||
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff",
|
||||
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff",
|
||||
//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
|
||||
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
|
||||
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
|
||||
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
|
||||
//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
|
||||
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
|
||||
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",
|
||||
"upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff",
|
||||
//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
|
||||
"uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff",
|
||||
"valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff",
|
||||
"venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff",
|
||||
"verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff",
|
||||
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff",
|
||||
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff",
|
||||
//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
|
||||
"xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff",
|
||||
"xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff",
|
||||
"yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff",
|
||||
"zulu_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/zu-ZA.dic", "dictionaries/zu-ZA.aff"
|
||||
};
|
||||
|
||||
public void test() throws Exception {
|
||||
for (int i = 0; i < tests.length; i += 3) {
|
||||
File f = new File(DICTIONARY_HOME, tests[i]);
|
||||
assert f.exists();
|
||||
|
||||
try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
|
||||
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
|
||||
assert dicEntry != null;
|
||||
ZipEntry affEntry = zip.getEntry(tests[i+2]);
|
||||
assert affEntry != null;
|
||||
|
||||
try (InputStream dictionary = zip.getInputStream(dicEntry);
|
||||
InputStream affix = zip.getInputStream(affEntry)) {
|
||||
Dictionary dic = new Dictionary(affix, dictionary);
|
||||
System.out.println(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" +
|
||||
"words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " +
|
||||
"flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " +
|
||||
"strips=" + RamUsageEstimator.humanSizeOf(dic.stripLookup) + ", " +
|
||||
"conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " +
|
||||
"affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " +
|
||||
"prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " +
|
||||
"suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testOneDictionary() throws Exception {
|
||||
String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
|
||||
for (int i = 0; i < tests.length; i++) {
|
||||
if (tests[i].equals(toTest)) {
|
||||
File f = new File(DICTIONARY_HOME, tests[i]);
|
||||
assert f.exists();
|
||||
|
||||
try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
|
||||
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
|
||||
assert dicEntry != null;
|
||||
ZipEntry affEntry = zip.getEntry(tests[i+2]);
|
||||
assert affEntry != null;
|
||||
|
||||
try (InputStream dictionary = zip.getInputStream(dicEntry);
|
||||
InputStream affix = zip.getInputStream(affEntry)) {
|
||||
new Dictionary(affix, dictionary);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestConv extends StemmerTestBase {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
init("conv.aff", "conv.dic");
|
||||
}
|
||||
|
||||
public void testConversion() {
|
||||
assertStemsTo("drink", "drInk");
|
||||
assertStemsTo("drInk", "drInk");
|
||||
assertStemsTo("drInkAble", "drInk");
|
||||
assertStemsTo("drInkABle", "drInk");
|
||||
assertStemsTo("drinkABle", "drInk");
|
||||
}
|
||||
}
|
|
@ -22,10 +22,15 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.text.ParseException;
|
||||
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.CharSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Outputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
public class TestDictionary extends LuceneTestCase {
|
||||
|
||||
|
@ -123,4 +128,54 @@ public class TestDictionary extends LuceneTestCase {
|
|||
assertTrue(affixStream.isClosed());
|
||||
assertTrue(dictStream.isClosed());
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void testReplacements() throws Exception {
|
||||
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
|
||||
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
|
||||
IntsRef scratchInts = new IntsRef();
|
||||
|
||||
// a -> b
|
||||
Util.toUTF16("a", scratchInts);
|
||||
builder.add(scratchInts, new CharsRef("b"));
|
||||
|
||||
// ab -> c
|
||||
Util.toUTF16("ab", scratchInts);
|
||||
builder.add(scratchInts, new CharsRef("c"));
|
||||
|
||||
// c -> de
|
||||
Util.toUTF16("c", scratchInts);
|
||||
builder.add(scratchInts, new CharsRef("de"));
|
||||
|
||||
// def -> gh
|
||||
Util.toUTF16("def", scratchInts);
|
||||
builder.add(scratchInts, new CharsRef("gh"));
|
||||
|
||||
FST<CharsRef> fst = builder.finish();
|
||||
|
||||
StringBuilder sb = new StringBuilder("atestanother");
|
||||
Dictionary.applyMappings(fst, sb);
|
||||
assertEquals("btestbnother", sb.toString());
|
||||
|
||||
sb = new StringBuilder("abtestanother");
|
||||
Dictionary.applyMappings(fst, sb);
|
||||
assertEquals("ctestbnother", sb.toString());
|
||||
|
||||
sb = new StringBuilder("atestabnother");
|
||||
Dictionary.applyMappings(fst, sb);
|
||||
assertEquals("btestcnother", sb.toString());
|
||||
|
||||
sb = new StringBuilder("abtestabnother");
|
||||
Dictionary.applyMappings(fst, sb);
|
||||
assertEquals("ctestcnother", sb.toString());
|
||||
|
||||
sb = new StringBuilder("abtestabcnother");
|
||||
Dictionary.applyMappings(fst, sb);
|
||||
assertEquals("ctestcdenother", sb.toString());
|
||||
|
||||
sb = new StringBuilder("defdefdefc");
|
||||
Dictionary.applyMappings(fst, sb);
|
||||
assertEquals("ghghghde", sb.toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -30,7 +31,6 @@ import org.apache.lucene.analysis.hunspell.Dictionary;
|
|||
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
@ -94,4 +94,20 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkOneTerm(a, "", "");
|
||||
}
|
||||
|
||||
public void testIgnoreCaseNoSideEffects() throws Exception {
|
||||
final Dictionary d;
|
||||
try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
|
||||
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
|
||||
}
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new KeywordTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
|
||||
}
|
||||
};
|
||||
checkOneTerm(a, "NoChAnGy", "NoChAnGy");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestIgnore extends StemmerTestBase {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
init("ignore.aff", "ignore.dic");
|
||||
}
|
||||
|
||||
public void testExamples() {
|
||||
assertStemsTo("drink", "drink");
|
||||
assertStemsTo("drinkable", "drink");
|
||||
assertStemsTo("dr'ink-able", "drink");
|
||||
assertStemsTo("drank-able", "drank");
|
||||
assertStemsTo("'-'-'-");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
SET UTF-8
|
||||
|
||||
ICONV 4
|
||||
ICONV A a
|
||||
ICONV B b
|
||||
ICONV C c
|
||||
ICONV I i
|
||||
|
||||
OCONV 4
|
||||
OCONV a A
|
||||
OCONV b B
|
||||
OCONV c C
|
||||
OCONV i I
|
||||
|
||||
SFX X Y 1
|
||||
SFX X 0 able . +ABLE
|
|
@ -0,0 +1,2 @@
|
|||
1
|
||||
drink/X [VERB]
|
|
@ -0,0 +1,6 @@
|
|||
SET UTF-8
|
||||
|
||||
IGNORE '-
|
||||
|
||||
SFX X Y 1
|
||||
SFX X 0 able . +ABLE
|
|
@ -0,0 +1,3 @@
|
|||
1
|
||||
drink/X [VERB]
|
||||
dr-ank/X [VERB]
|
|
@ -209,11 +209,6 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
final DocFieldProcessorPerField perField = fields[i];
|
||||
perField.consumer.processFields(perField.fields, perField.fieldCount);
|
||||
}
|
||||
|
||||
if (docState.maxTermPrefix != null && docState.infoStream.isEnabled("IW")) {
|
||||
docState.infoStream.message("IW", "WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
|
||||
docState.maxTermPrefix = null;
|
||||
}
|
||||
}
|
||||
|
||||
private DocFieldProcessorPerField processField(FieldInfos.Builder fieldInfos,
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Holds state for inverting all occurrences of a single
|
||||
|
@ -182,6 +181,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
// when we come back around to the field...
|
||||
fieldState.position += posIncrAttribute.getPositionIncrement();
|
||||
fieldState.offset += offsetAttribute.endOffset();
|
||||
|
||||
|
||||
if (docState.maxTermPrefix != null) {
|
||||
final String msg = "Document contains at least one immense term in field=\"" + fieldInfo.name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'";
|
||||
if (docState.infoStream.isEnabled("IW")) {
|
||||
docState.infoStream.message("IW", "ERROR: " + msg);
|
||||
}
|
||||
docState.maxTermPrefix = null;
|
||||
throw new IllegalArgumentException(msg);
|
||||
}
|
||||
|
||||
/* if success was false above there is an exception coming through and we won't get here.*/
|
||||
succeededInProcessingField = true;
|
||||
} finally {
|
||||
|
|
|
@ -207,8 +207,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
|
|||
/**
|
||||
* Absolute hard maximum length for a term, in bytes once
|
||||
* encoded as UTF8. If a term arrives from the analyzer
|
||||
* longer than this length, it is skipped and a message is
|
||||
* printed to infoStream, if set (see {@link
|
||||
* longer than this length, an
|
||||
* <code>IllegalArgumentException</code> is thrown
|
||||
* and a message is printed to infoStream, if set (see {@link
|
||||
* IndexWriterConfig#setInfoStream(InfoStream)}).
|
||||
*/
|
||||
public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
|
||||
|
@ -1159,7 +1160,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
|
|||
* merge policy.
|
||||
*
|
||||
* <p>Note that each term in the document can be no longer
|
||||
* than 16383 characters, otherwise an
|
||||
* than {@link #MAX_TERM_LENGTH} in bytes, otherwise an
|
||||
* IllegalArgumentException will be thrown.</p>
|
||||
*
|
||||
* <p>Note that it's possible to create an invalid Unicode
|
||||
|
|
|
@ -179,12 +179,11 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
|||
try {
|
||||
termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
|
||||
} catch (MaxBytesLengthExceededException e) {
|
||||
// Not enough room in current block
|
||||
// Just skip this term, to remain as robust as
|
||||
// possible during indexing. A TokenFilter
|
||||
// can be inserted into the analyzer chain if
|
||||
// other behavior is wanted (pruning the term
|
||||
// to a prefix, throwing an exception, etc).
|
||||
// Term is too large; record this here (can't throw an
|
||||
// exc because DocInverterPerField will then abort the
|
||||
// entire segment) and then throw an exc later in
|
||||
// DocInverterPerField.java. LengthFilter can always be
|
||||
// used to prune the term before indexing:
|
||||
if (docState.maxTermPrefix == null) {
|
||||
final int saved = termBytesRef.length;
|
||||
try {
|
||||
|
|
|
@ -202,8 +202,8 @@ public class Sort {
|
|||
return 0x45aaf665 + Arrays.hashCode(fields);
|
||||
}
|
||||
|
||||
/** Whether the relevance score is needed to sort documents. */
|
||||
boolean needsScores() {
|
||||
/** Returns true if the relevance score is needed to sort documents. */
|
||||
public boolean needsScores() {
|
||||
for (SortField sortField : fields) {
|
||||
if (sortField.needsScores()) {
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.After;
|
||||
|
||||
/**
|
||||
* Tests that a useful exception is thrown when attempting to index a term that is
|
||||
* too large
|
||||
*
|
||||
* @see IndexWriter#MAX_TERM_LENGTH
|
||||
*/
|
||||
public class TestExceedMaxTermLength extends LuceneTestCase {
|
||||
|
||||
private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
|
||||
private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
|
||||
|
||||
Directory dir = null;
|
||||
|
||||
@Before
|
||||
public void createDir() {
|
||||
dir = newDirectory();
|
||||
}
|
||||
@After
|
||||
public void destroyDir() throws IOException {
|
||||
dir.close();
|
||||
dir = null;
|
||||
}
|
||||
|
||||
public void test() throws Exception {
|
||||
|
||||
IndexWriter w = new IndexWriter
|
||||
(dir, newIndexWriterConfig(random(),
|
||||
TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random())));
|
||||
try {
|
||||
final FieldType ft = new FieldType();
|
||||
ft.setIndexed(true);
|
||||
ft.setStored(random().nextBoolean());
|
||||
ft.freeze();
|
||||
|
||||
final Document doc = new Document();
|
||||
if (random().nextBoolean()) {
|
||||
// totally ok short field value
|
||||
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
|
||||
TestUtil.randomSimpleString(random(), 1, 10),
|
||||
ft));
|
||||
}
|
||||
// problematic field
|
||||
final String name = TestUtil.randomSimpleString(random(), 1, 50);
|
||||
final String value = TestUtil.randomSimpleString(random(),
|
||||
minTestTermLength,
|
||||
maxTestTermLegnth);
|
||||
final Field f = new Field(name, value, ft);
|
||||
if (random().nextBoolean()) {
|
||||
// totally ok short field value
|
||||
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
|
||||
TestUtil.randomSimpleString(random(), 1, 10),
|
||||
ft));
|
||||
}
|
||||
doc.add(f);
|
||||
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
fail("Did not get an exception from adding a monster term");
|
||||
} catch (IllegalArgumentException e) {
|
||||
final String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH);
|
||||
final String msg = e.getMessage();
|
||||
assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg,
|
||||
msg.contains("immense term"));
|
||||
assertTrue("IllegalArgumentException didn't mention max length ("+maxLengthMsg+"): " + msg,
|
||||
msg.contains(maxLengthMsg));
|
||||
assertTrue("IllegalArgumentException didn't mention field name ("+name+"): " + msg,
|
||||
msg.contains(name));
|
||||
}
|
||||
} finally {
|
||||
w.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1660,32 +1660,32 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
// This contents produces a too-long term:
|
||||
String contents = "abc xyz x" + bigTerm + " another term";
|
||||
doc.add(new TextField("content", contents, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
fail("should have hit exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
|
||||
// Make sure we can add another normal document
|
||||
doc = new Document();
|
||||
doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
// So we remove the deleted doc:
|
||||
w.forceMerge(1);
|
||||
|
||||
IndexReader reader = w.getReader();
|
||||
w.close();
|
||||
|
||||
// Make sure all terms < max size were indexed
|
||||
assertEquals(2, reader.docFreq(new Term("content", "abc")));
|
||||
assertEquals(1, reader.docFreq(new Term("content", "abc")));
|
||||
assertEquals(1, reader.docFreq(new Term("content", "bbb")));
|
||||
assertEquals(1, reader.docFreq(new Term("content", "term")));
|
||||
assertEquals(1, reader.docFreq(new Term("content", "another")));
|
||||
assertEquals(0, reader.docFreq(new Term("content", "term")));
|
||||
|
||||
// Make sure position is still incremented when
|
||||
// massive term is skipped:
|
||||
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another"));
|
||||
assertEquals(0, tps.nextDoc());
|
||||
assertEquals(1, tps.freq());
|
||||
assertEquals(3, tps.nextPosition());
|
||||
|
||||
// Make sure the doc that has the massive term is in
|
||||
// Make sure the doc that has the massive term is NOT in
|
||||
// the index:
|
||||
assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
|
||||
assertEquals("document with wicked long term is in the index!", 1, reader.numDocs());
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
|
|
|
@ -0,0 +1,223 @@
|
|||
package org.apache.lucene.index.sorter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.FieldComparatorSource;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.IndexSearcher; // javadocs
|
||||
import org.apache.lucene.search.Query; // javadocs
|
||||
import org.apache.lucene.search.ScoreDoc; // javadocs
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* Helper class to sort readers that contain blocks of documents.
|
||||
* <p>
|
||||
* Note that this class is intended to used with {@link SortingMergePolicy},
|
||||
* and for other purposes has some limitations:
|
||||
* <ul>
|
||||
* <li>Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter}
|
||||
* <li>Filling sort field values is not yet supported.
|
||||
* </ul>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// TODO: can/should we clean this thing up (e.g. return a proper sort value)
|
||||
// and move to the join/ module?
|
||||
public class BlockJoinComparatorSource extends FieldComparatorSource {
|
||||
final Filter parentsFilter;
|
||||
final Sort parentSort;
|
||||
final Sort childSort;
|
||||
|
||||
/**
|
||||
* Create a new BlockJoinComparatorSource, sorting only blocks of documents
|
||||
* with {@code parentSort} and not reordering children with a block.
|
||||
*
|
||||
* @param parentsFilter Filter identifying parent documents
|
||||
* @param parentSort Sort for parent documents
|
||||
*/
|
||||
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) {
|
||||
this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new BlockJoinComparatorSource, specifying the sort order for both
|
||||
* blocks of documents and children within a block.
|
||||
*
|
||||
* @param parentsFilter Filter identifying parent documents
|
||||
* @param parentSort Sort for parent documents
|
||||
* @param childSort Sort for child documents in the same block
|
||||
*/
|
||||
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort) {
|
||||
this.parentsFilter = parentsFilter;
|
||||
this.parentSort = parentSort;
|
||||
this.childSort = childSort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
// we keep parallel slots: the parent ids and the child ids
|
||||
final int parentSlots[] = new int[numHits];
|
||||
final int childSlots[] = new int[numHits];
|
||||
|
||||
SortField parentFields[] = parentSort.getSort();
|
||||
final int parentReverseMul[] = new int[parentFields.length];
|
||||
final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length];
|
||||
for (int i = 0; i < parentFields.length; i++) {
|
||||
parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
|
||||
parentComparators[i] = parentFields[i].getComparator(1, i);
|
||||
}
|
||||
|
||||
SortField childFields[] = childSort.getSort();
|
||||
final int childReverseMul[] = new int[childFields.length];
|
||||
final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length];
|
||||
for (int i = 0; i < childFields.length; i++) {
|
||||
childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
|
||||
childComparators[i] = childFields[i].getComparator(1, i);
|
||||
}
|
||||
|
||||
// NOTE: we could return parent ID as value but really our sort "value" is more complex...
|
||||
// So we throw UOE for now. At the moment you really should only use this at indexing time.
|
||||
return new FieldComparator<Integer>() {
|
||||
int bottomParent;
|
||||
int bottomChild;
|
||||
FixedBitSet parentBits;
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
try {
|
||||
return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) {
|
||||
bottomParent = parentSlots[slot];
|
||||
bottomChild = childSlots[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Integer value) {
|
||||
// we dont have enough information (the docid is needed)
|
||||
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return compare(bottomChild, bottomParent, doc, parent(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
// we dont have enough information (the docid is needed)
|
||||
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
childSlots[slot] = doc;
|
||||
parentSlots[slot] = parent(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(context, null);
|
||||
if (parents == null) {
|
||||
throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!");
|
||||
}
|
||||
if (!(parents instanceof FixedBitSet)) {
|
||||
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
|
||||
}
|
||||
parentBits = (FixedBitSet) parents;
|
||||
for (int i = 0; i < parentComparators.length; i++) {
|
||||
parentComparators[i] = parentComparators[i].setNextReader(context);
|
||||
}
|
||||
for (int i = 0; i < childComparators.length; i++) {
|
||||
childComparators[i] = childComparators[i].setNextReader(context);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer value(int slot) {
|
||||
// really our sort "value" is more complex...
|
||||
throw new UnsupportedOperationException("filling sort field values is not yet supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) {
|
||||
super.setScorer(scorer);
|
||||
for (FieldComparator<?> comp : parentComparators) {
|
||||
comp.setScorer(scorer);
|
||||
}
|
||||
for (FieldComparator<?> comp : childComparators) {
|
||||
comp.setScorer(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
int parent(int doc) {
|
||||
return parentBits.nextSetBit(doc);
|
||||
}
|
||||
|
||||
int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
|
||||
if (parent1 == parent2) { // both are in the same block
|
||||
if (docID1 == parent1 || docID2 == parent2) {
|
||||
// keep parents at the end of blocks
|
||||
return docID1 - docID2;
|
||||
} else {
|
||||
return compare(docID1, docID2, childComparators, childReverseMul);
|
||||
}
|
||||
} else {
|
||||
int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
|
||||
if (cmp == 0) {
|
||||
return parent1 - parent2;
|
||||
} else {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int compare(int docID1, int docID2, FieldComparator<?> comparators[], int reverseMul[]) throws IOException {
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
|
||||
// the segments are always the same here...
|
||||
comparators[i].copy(0, docID1);
|
||||
comparators[i].setBottom(0);
|
||||
int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
|
||||
if (comp != 0) {
|
||||
return comp;
|
||||
}
|
||||
}
|
||||
return 0; // no need to docid tiebreak
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
|
||||
}
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package org.apache.lucene.index.sorter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* Helper class to sort readers that contain blocks of documents.
|
||||
*/
|
||||
public abstract class BlockJoinSorter extends Sorter {
|
||||
|
||||
protected final Filter parentsFilter;
|
||||
|
||||
/** Sole constructor. */
|
||||
public BlockJoinSorter(Filter parentsFilter) {
|
||||
this.parentsFilter = parentsFilter;
|
||||
}
|
||||
|
||||
/** Return a {@link Sorter.DocComparator} instance that will be called on
|
||||
* parent doc IDs. */
|
||||
protected abstract DocComparator getParentComparator(AtomicReader reader);
|
||||
|
||||
/** Return a {@link Sorter.DocComparator} instance that will be called on
|
||||
* children of the same parent. By default, children of the same parent are
|
||||
* not reordered. */
|
||||
protected DocComparator getChildComparator(AtomicReader reader) {
|
||||
return INDEX_ORDER_COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final DocMap sort(AtomicReader reader) throws IOException {
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(reader.getContext(), null);
|
||||
if (parents == null) {
|
||||
throw new IllegalStateException("AtomicReader " + reader + " contains no parents!");
|
||||
}
|
||||
if (!(parents instanceof FixedBitSet)) {
|
||||
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
|
||||
}
|
||||
final FixedBitSet parentBits = (FixedBitSet) parents;
|
||||
final DocComparator parentComparator = getParentComparator(reader);
|
||||
final DocComparator childComparator = getChildComparator(reader);
|
||||
final DocComparator comparator = new DocComparator() {
|
||||
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
final int parent1 = parentBits.nextSetBit(docID1);
|
||||
final int parent2 = parentBits.nextSetBit(docID2);
|
||||
if (parent1 == parent2) { // both are in the same block
|
||||
if (docID1 == parent1 || docID2 == parent2) {
|
||||
// keep parents at the end of blocks
|
||||
return docID1 - docID2;
|
||||
} else {
|
||||
return childComparator.compare(docID1, docID2);
|
||||
}
|
||||
} else {
|
||||
int cmp = parentComparator.compare(parent1, parent2);
|
||||
if (cmp == 0) {
|
||||
cmp = parent1 - parent2;
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
return sort(reader.maxDoc(), comparator);
|
||||
}
|
||||
|
||||
}
|
|
@ -24,50 +24,53 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
|
||||
/**
|
||||
* A {@link Collector} that early terminates collection of documents on a
|
||||
* per-segment basis, if the segment was sorted according to the given
|
||||
* {@link Sorter}.
|
||||
* {@link Sort}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> the {@link Collector} detects sorted segments according to
|
||||
* <b>NOTE:</b> the {@code Collector} detects sorted segments according to
|
||||
* {@link SortingMergePolicy}, so it's best used in conjunction with it. Also,
|
||||
* it collects up to a specified num docs from each segment, and therefore is
|
||||
* mostly suitable for use in conjunction with collectors such as
|
||||
* it collects up to a specified {@code numDocsToCollect} from each segment,
|
||||
* and therefore is mostly suitable for use in conjunction with collectors such as
|
||||
* {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}.
|
||||
* <p>
|
||||
* <b>NOTE</b>: If you wrap a {@link TopDocsCollector} that sorts in the same
|
||||
* order as the index order, the returned {@link TopDocsCollector#topDocs()}
|
||||
* <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
|
||||
* order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
|
||||
* will be correct. However the total of {@link TopDocsCollector#getTotalHits()
|
||||
* hit count} will be underestimated since not all matching documents will have
|
||||
* been collected.
|
||||
* <p>
|
||||
* <b>NOTE</b>: This {@link Collector} uses {@link Sorter#getID()} to detect
|
||||
* whether a segment was sorted with the same {@link Sorter} as the one given in
|
||||
* {@link #EarlyTerminatingSortingCollector(Collector, Sorter, int)}. This has
|
||||
* <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect
|
||||
* whether a segment was sorted with the same {@code Sort}. This has
|
||||
* two implications:
|
||||
* <ul>
|
||||
* <li>if {@link Sorter#getID()} is not implemented correctly and returns
|
||||
* different identifiers for equivalent {@link Sorter}s, this collector will not
|
||||
* <li>if a custom comparator is not implemented correctly and returns
|
||||
* different identifiers for equivalent instances, this collector will not
|
||||
* detect sorted segments,</li>
|
||||
* <li>if you suddenly change the {@link IndexWriter}'s
|
||||
* {@link SortingMergePolicy} to sort according to another criterion and if both
|
||||
* the old and the new {@link Sorter}s have the same identifier, this
|
||||
* {@link Collector} will incorrectly detect sorted segments.</li>
|
||||
* {@code SortingMergePolicy} to sort according to another criterion and if both
|
||||
* the old and the new {@code Sort}s have the same identifier, this
|
||||
* {@code Collector} will incorrectly detect sorted segments.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class EarlyTerminatingSortingCollector extends Collector {
|
||||
|
||||
/** The wrapped Collector */
|
||||
protected final Collector in;
|
||||
protected final Sorter sorter;
|
||||
/** Sort used to sort the search results */
|
||||
protected final Sort sort;
|
||||
/** Number of documents to collect in each segment */
|
||||
protected final int numDocsToCollect;
|
||||
|
||||
/** Number of documents to collect in the current segment being processed */
|
||||
protected int segmentTotalCollect;
|
||||
/** True if the current segment being processed is sorted by {@link #sort} */
|
||||
protected boolean segmentSorted;
|
||||
|
||||
private int numCollected;
|
||||
|
@ -77,20 +80,19 @@ public class EarlyTerminatingSortingCollector extends Collector {
|
|||
*
|
||||
* @param in
|
||||
* the collector to wrap
|
||||
* @param sorter
|
||||
* the same sorter as the one which is used by {@link IndexWriter}'s
|
||||
* {@link SortingMergePolicy}
|
||||
* @param sort
|
||||
* the sort you are sorting the search results on
|
||||
* @param numDocsToCollect
|
||||
* the number of documents to collect on each segment. When wrapping
|
||||
* a {@link TopDocsCollector}, this number should be the number of
|
||||
* hits.
|
||||
*/
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sorter sorter, int numDocsToCollect) {
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
|
||||
if (numDocsToCollect <= 0) {
|
||||
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
|
||||
}
|
||||
this.in = in;
|
||||
this.sorter = sorter;
|
||||
this.sort = sort;
|
||||
this.numDocsToCollect = numDocsToCollect;
|
||||
}
|
||||
|
||||
|
@ -110,7 +112,7 @@ public class EarlyTerminatingSortingCollector extends Collector {
|
|||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
in.setNextReader(context);
|
||||
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sorter);
|
||||
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort);
|
||||
segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE;
|
||||
numCollected = 0;
|
||||
}
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
package org.apache.lucene.index.sorter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
|
||||
/**
|
||||
* A {@link Sorter} which sorts documents according to their
|
||||
* {@link NumericDocValues}. One can specify ascending or descending sort order.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class NumericDocValuesSorter extends Sorter {
|
||||
|
||||
private final String fieldName;
|
||||
private final boolean ascending;
|
||||
|
||||
/** Constructor over the given field name, and ascending sort order. */
|
||||
public NumericDocValuesSorter(final String fieldName) {
|
||||
this(fieldName, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor over the given field name, and whether sorting should be
|
||||
* ascending ({@code true}) or descending ({@code false}).
|
||||
*/
|
||||
public NumericDocValuesSorter(final String fieldName, boolean ascending) {
|
||||
this.fieldName = fieldName;
|
||||
this.ascending = ascending;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Sorter.DocMap sort(final AtomicReader reader) throws IOException {
|
||||
final NumericDocValues ndv = reader.getNumericDocValues(fieldName);
|
||||
final DocComparator comparator;
|
||||
if (ascending) {
|
||||
comparator = new DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
final long v1 = ndv.get(docID1);
|
||||
final long v2 = ndv.get(docID2);
|
||||
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
comparator = new DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
final long v1 = ndv.get(docID1);
|
||||
final long v2 = ndv.get(docID2);
|
||||
return v1 > v2 ? -1 : v1 == v2 ? 0 : 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
return sort(reader.maxDoc(), comparator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getID() {
|
||||
return "DocValues(" + fieldName + "," + (ascending ? "ascending" : "descending") + ")";
|
||||
}
|
||||
|
||||
}
|
|
@ -22,47 +22,47 @@ import java.util.Comparator;
|
|||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.TimSorter;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
|
||||
/**
|
||||
* Sorts documents of a given index by returning a permutation on the document
|
||||
* IDs.
|
||||
* <p><b>NOTE</b>: A {@link Sorter} implementation can be easily written from
|
||||
* a {@link DocComparator document comparator} by using the
|
||||
* {@link #sort(int, DocComparator)} helper method. This is especially useful
|
||||
* when documents are directly comparable by their field values.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class Sorter {
|
||||
final class Sorter {
|
||||
final Sort sort;
|
||||
|
||||
/** A comparator that keeps documents in index order. */
|
||||
public static final DocComparator INDEX_ORDER_COMPARATOR = new DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
return docID1 - docID2;
|
||||
/** Creates a new Sorter to sort the index with {@code sort} */
|
||||
Sorter(Sort sort) {
|
||||
if (sort.needsScores()) {
|
||||
throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
|
||||
}
|
||||
};
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
/**
|
||||
* A permutation of doc IDs. For every document ID between <tt>0</tt> and
|
||||
* {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
|
||||
* return <code>docID</code>.
|
||||
*/
|
||||
public static abstract class DocMap {
|
||||
static abstract class DocMap {
|
||||
|
||||
/** Given a doc ID from the original index, return its ordinal in the
|
||||
* sorted index. */
|
||||
public abstract int oldToNew(int docID);
|
||||
abstract int oldToNew(int docID);
|
||||
|
||||
/** Given the ordinal of a doc ID, return its doc ID in the original index. */
|
||||
public abstract int newToOld(int docID);
|
||||
abstract int newToOld(int docID);
|
||||
|
||||
/** Return the number of documents in this map. This must be equal to the
|
||||
* {@link AtomicReader#maxDoc() number of documents} of the
|
||||
* {@link AtomicReader} which is sorted. */
|
||||
public abstract int size();
|
||||
|
||||
abstract int size();
|
||||
}
|
||||
|
||||
/** Check consistency of a {@link DocMap}, useful for assertions. */
|
||||
|
@ -81,7 +81,7 @@ public abstract class Sorter {
|
|||
}
|
||||
|
||||
/** A comparator of doc IDs. */
|
||||
public static abstract class DocComparator {
|
||||
static abstract class DocComparator {
|
||||
|
||||
/** Compare docID1 against docID2. The contract for the return value is the
|
||||
* same as {@link Comparator#compare(Object, Object)}. */
|
||||
|
@ -89,45 +89,13 @@ public abstract class Sorter {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts documents in reverse order. <b>NOTE</b>: This {@link Sorter} is not
|
||||
* idempotent. Sorting an {@link AtomicReader} once or twice will return two
|
||||
* different {@link AtomicReader} views. This {@link Sorter} should not be
|
||||
* used with {@link SortingMergePolicy}.
|
||||
*/
|
||||
public static final Sorter REVERSE_DOCS = new Sorter() {
|
||||
@Override
|
||||
public DocMap sort(final AtomicReader reader) throws IOException {
|
||||
final int maxDoc = reader.maxDoc();
|
||||
return new DocMap() {
|
||||
@Override
|
||||
public int oldToNew(int docID) {
|
||||
return maxDoc - docID - 1;
|
||||
}
|
||||
@Override
|
||||
public int newToOld(int docID) {
|
||||
return maxDoc - docID - 1;
|
||||
}
|
||||
@Override
|
||||
public int size() {
|
||||
return maxDoc;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getID() {
|
||||
return "ReverseDocs";
|
||||
}
|
||||
};
|
||||
|
||||
private static final class DocValueSorter extends TimSorter {
|
||||
|
||||
private final int[] docs;
|
||||
private final Sorter.DocComparator comparator;
|
||||
private final int[] tmp;
|
||||
|
||||
public DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
|
||||
DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
|
||||
super(docs.length / 64);
|
||||
this.docs = docs;
|
||||
this.comparator = comparator;
|
||||
|
@ -168,7 +136,7 @@ public abstract class Sorter {
|
|||
}
|
||||
|
||||
/** Computes the old-to-new permutation over the given comparator. */
|
||||
protected static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
|
||||
private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
|
||||
// check if the index is sorted
|
||||
boolean sorted = true;
|
||||
for (int i = 1; i < maxDoc; ++i) {
|
||||
|
@ -242,20 +210,75 @@ public abstract class Sorter {
|
|||
* <b>NOTE:</b> deleted documents are expected to appear in the mapping as
|
||||
* well, they will however be marked as deleted in the sorted view.
|
||||
*/
|
||||
public abstract DocMap sort(AtomicReader reader) throws IOException;
|
||||
DocMap sort(AtomicReader reader) throws IOException {
|
||||
SortField fields[] = sort.getSort();
|
||||
final int reverseMul[] = new int[fields.length];
|
||||
final FieldComparator<?> comparators[] = new FieldComparator[fields.length];
|
||||
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
reverseMul[i] = fields[i].getReverse() ? -1 : 1;
|
||||
comparators[i] = fields[i].getComparator(1, i);
|
||||
comparators[i].setNextReader(reader.getContext());
|
||||
comparators[i].setScorer(FAKESCORER);
|
||||
}
|
||||
final DocComparator comparator = new DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
try {
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
|
||||
// the segments are always the same here...
|
||||
comparators[i].copy(0, docID1);
|
||||
comparators[i].setBottom(0);
|
||||
int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
|
||||
if (comp != 0) {
|
||||
return comp;
|
||||
}
|
||||
}
|
||||
return Integer.compare(docID1, docID2); // docid order tiebreak
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
return sort(reader.maxDoc(), comparator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the identifier of this {@link Sorter}.
|
||||
* <p>This identifier is similar to {@link Object#hashCode()} and should be
|
||||
* chosen so that two instances of this class that sort documents likewise
|
||||
* will have the same identifier. On the contrary, this identifier should be
|
||||
* different on different {@link Sorter sorters}.
|
||||
* different on different {@link Sort sorts}.
|
||||
*/
|
||||
public abstract String getID();
|
||||
public String getID() {
|
||||
return sort.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getID();
|
||||
}
|
||||
|
||||
static final Scorer FAKESCORER = new Scorer(null) {
|
||||
|
||||
@Override
|
||||
public float score() throws IOException { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public int docID() { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public long cost() { throw new UnsupportedOperationException(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.index.StoredFieldVisitor;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMFile;
|
||||
|
@ -48,13 +49,13 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
|||
|
||||
/**
|
||||
* An {@link AtomicReader} which supports sorting documents by a given
|
||||
* {@link Sorter}. You can use this class to sort an index as follows:
|
||||
* {@link Sort}. You can use this class to sort an index as follows:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* IndexWriter writer; // writer to which the sorted index will be added
|
||||
* DirectoryReader reader; // reader on the input index
|
||||
* Sorter sorter; // determines how the documents are sorted
|
||||
* AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
|
||||
* Sort sort; // determines how the documents are sorted
|
||||
* AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
|
||||
* writer.addIndexes(reader);
|
||||
* writer.close();
|
||||
* reader.close();
|
||||
|
@ -480,7 +481,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
|
||||
|
||||
/**
|
||||
* A {@link Sorter} which sorts two parallel arrays of doc IDs and
|
||||
* A {@link TimSorter} which sorts two parallel arrays of doc IDs and
|
||||
* offsets in one go. Everytime a doc ID is 'swapped', its correponding offset
|
||||
* is swapped too.
|
||||
*/
|
||||
|
@ -708,14 +709,14 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
|
||||
/** Return a sorted view of <code>reader</code> according to the order
|
||||
* defined by <code>sorter</code>. If the reader is already sorted, this
|
||||
* defined by <code>sort</code>. If the reader is already sorted, this
|
||||
* method might return the reader as-is. */
|
||||
public static AtomicReader wrap(AtomicReader reader, Sorter sorter) throws IOException {
|
||||
return wrap(reader, sorter.sort(reader));
|
||||
public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
|
||||
return wrap(reader, new Sorter(sort).sort(reader));
|
||||
}
|
||||
|
||||
/** Expert: same as {@link #wrap(AtomicReader, Sorter)} but operates directly on a {@link Sorter.DocMap}. */
|
||||
public static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
|
||||
/** Expert: same as {@link #wrap(AtomicReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
|
||||
static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
|
||||
if (docMap == null) {
|
||||
// the reader is already sorter
|
||||
return reader;
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer; // javadocs
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -33,22 +34,23 @@ import org.apache.lucene.index.SegmentCommitInfo;
|
|||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
|
||||
/** A {@link MergePolicy} that reorders documents according to a {@link Sorter}
|
||||
/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
|
||||
* before merging them. As a consequence, all segments resulting from a merge
|
||||
* will be sorted while segments resulting from a flush will be in the order
|
||||
* in which documents have been added.
|
||||
* <p><b>NOTE</b>: Never use this {@link MergePolicy} if you rely on
|
||||
* {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)}
|
||||
* <p><b>NOTE</b>: Never use this policy if you rely on
|
||||
* {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments}
|
||||
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
|
||||
* <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent
|
||||
* {@link Sorter}s so that the order of segments is predictable. For example,
|
||||
* using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is
|
||||
* not idempotent) will make the order of documents in a segment depend on the
|
||||
* number of times the segment has been merged.
|
||||
* <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
|
||||
* so that the order of segments is predictable. For example, using
|
||||
* {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make
|
||||
* the order of documents in a segment depend on the number of times the segment
|
||||
* has been merged.
|
||||
* @lucene.experimental */
|
||||
public final class SortingMergePolicy extends MergePolicy {
|
||||
|
||||
|
@ -147,12 +149,12 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
|
||||
}
|
||||
|
||||
/** Returns true if the given reader is sorted by the given sorter. */
|
||||
public static boolean isSorted(AtomicReader reader, Sorter sorter) {
|
||||
/** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */
|
||||
public static boolean isSorted(AtomicReader reader, Sort sort) {
|
||||
if (reader instanceof SegmentReader) {
|
||||
final SegmentReader segReader = (SegmentReader) reader;
|
||||
final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
|
||||
if (diagnostics != null && sorter.getID().equals(diagnostics.get(SORTER_ID_PROP))) {
|
||||
if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -172,11 +174,13 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
|
||||
final MergePolicy in;
|
||||
final Sorter sorter;
|
||||
final Sort sort;
|
||||
|
||||
/** Create a new {@link MergePolicy} that sorts documents with <code>sorter</code>. */
|
||||
public SortingMergePolicy(MergePolicy in, Sorter sorter) {
|
||||
/** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
|
||||
public SortingMergePolicy(MergePolicy in, Sort sort) {
|
||||
this.in = in;
|
||||
this.sorter = sorter;
|
||||
this.sorter = new Sorter(sort);
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -200,7 +204,7 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
|
||||
@Override
|
||||
public MergePolicy clone() {
|
||||
return new SortingMergePolicy(in.clone(), sorter);
|
||||
return new SortingMergePolicy(in.clone(), sort);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,19 +17,16 @@
|
|||
-->
|
||||
<html>
|
||||
<body>
|
||||
<p>Provides index sorting capablities. The application can use one of the
|
||||
pre-existing Sorter implementations, e.g. to sort by a
|
||||
{@link org.apache.lucene.index.sorter.NumericDocValuesSorter}
|
||||
or {@link org.apache.lucene.index.sorter.Sorter#REVERSE_DOCS reverse} the order
|
||||
of the documents. Additionally, the application can implement a custom
|
||||
{@link org.apache.lucene.index.sorter.Sorter} which returns a permutation on
|
||||
a source {@link org.apache.lucene.index.AtomicReader}'s document IDs, to sort
|
||||
the input documents by additional criteria.
|
||||
<p>Provides index sorting capablities. The application can use any
|
||||
Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to
|
||||
reverse the order of the documents (by using SortField.Type.DOC in reverse).
|
||||
Multi-level sorts can be specified the same way you would when searching, by
|
||||
building Sort from multiple SortFields.
|
||||
|
||||
<p>{@link org.apache.lucene.index.sorter.SortingMergePolicy} can be used to
|
||||
make Lucene sort segments before merging them. This will ensure that every
|
||||
segment resulting from a merge will be sorted according to the provided
|
||||
{@link org.apache.lucene.index.sorter.Sorter}. This however makes merging and
|
||||
{@link org.apache.lucene.search.Sort}. This however makes merging and
|
||||
thus indexing slower.
|
||||
|
||||
<p>Sorted segments allow for early query termination when the sort order
|
||||
|
|
|
@ -24,6 +24,8 @@ import java.util.List;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -31,9 +33,9 @@ import org.junit.BeforeClass;
|
|||
|
||||
public class IndexSortingTest extends SorterTestBase {
|
||||
|
||||
private static final Sorter[] SORTERS = new Sorter[] {
|
||||
new NumericDocValuesSorter(NUMERIC_DV_FIELD, true),
|
||||
Sorter.REVERSE_DOCS,
|
||||
private static final Sort[] SORT = new Sort[] {
|
||||
new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
|
||||
new Sort(new SortField(null, SortField.Type.DOC, true))
|
||||
};
|
||||
|
||||
@BeforeClass
|
||||
|
@ -47,13 +49,14 @@ public class IndexSortingTest extends SorterTestBase {
|
|||
values.add(Integer.valueOf(reader.document(i).get(ID_FIELD)));
|
||||
}
|
||||
}
|
||||
Sorter sorter = SORTERS[random().nextInt(SORTERS.length)];
|
||||
if (sorter == Sorter.REVERSE_DOCS) {
|
||||
int idx = random().nextInt(SORT.length);
|
||||
Sort sorter = SORT[idx];
|
||||
if (idx == 1) { // reverse doc sort
|
||||
Collections.reverse(values);
|
||||
} else {
|
||||
Collections.sort(values);
|
||||
if (sorter instanceof NumericDocValuesSorter && random().nextBoolean()) {
|
||||
sorter = new NumericDocValuesSorter(NUMERIC_DV_FIELD, false); // descending
|
||||
if (random().nextBoolean()) {
|
||||
sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
|
||||
Collections.reverse(values);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,56 +17,37 @@ package org.apache.lucene.index.sorter;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class SortingAtomicReaderTest extends SorterTestBase {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClassSortingAtomicReaderTest() throws Exception {
|
||||
// build the mapping from the reader, since we deleted documents, some of
|
||||
// them might have disappeared from the index (e.g. if an entire segment is
|
||||
// dropped b/c all its docs are deleted)
|
||||
final int[] values = new int[reader.maxDoc()];
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
values[i] = Integer.valueOf(reader.document(i).get(ID_FIELD));
|
||||
}
|
||||
final Sorter.DocComparator comparator = new Sorter.DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
final int v1 = values[docID1];
|
||||
final int v2 = values[docID2];
|
||||
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
|
||||
}
|
||||
};
|
||||
|
||||
final Sorter.DocMap docMap = Sorter.sort(reader.maxDoc(), comparator);
|
||||
// sort the index by id (as integer, in NUMERIC_DV_FIELD)
|
||||
Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT));
|
||||
final Sorter.DocMap docMap = new Sorter(sort).sort(reader);
|
||||
|
||||
// Sorter.compute also sorts the values
|
||||
NumericDocValues dv = reader.getNumericDocValues(NUMERIC_DV_FIELD);
|
||||
sortedValues = new Integer[reader.maxDoc()];
|
||||
for (int i = 0; i < reader.maxDoc(); ++i) {
|
||||
sortedValues[docMap.oldToNew(i)] = values[i];
|
||||
sortedValues[docMap.oldToNew(i)] = (int)dv.get(i);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("docMap: " + docMap);
|
||||
System.out.println("sortedValues: " + Arrays.toString(sortedValues));
|
||||
}
|
||||
|
||||
reader = SortingAtomicReader.wrap(reader, new Sorter() {
|
||||
@Override
|
||||
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
|
||||
return docMap;
|
||||
}
|
||||
@Override
|
||||
public String getID() {
|
||||
return ID_FIELD;
|
||||
}
|
||||
});
|
||||
// sort the index by id (as integer, in NUMERIC_DV_FIELD)
|
||||
reader = SortingAtomicReader.wrap(reader, sort);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.print("mapped-deleted-docs: ");
|
||||
|
@ -82,4 +63,13 @@ public class SortingAtomicReaderTest extends SorterTestBase {
|
|||
TestUtil.checkReader(reader);
|
||||
}
|
||||
|
||||
public void testBadSort() throws Exception {
|
||||
try {
|
||||
SortingAtomicReader.wrap(reader, Sort.RELEVANCE);
|
||||
fail("Didn't get expected exception");
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.apache.lucene.search.DocIdSet;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -89,47 +91,14 @@ public class TestBlockJoinSorter extends LuceneTestCase {
|
|||
final AtomicReader reader = getOnlySegmentReader(indexReader);
|
||||
final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
|
||||
final FixedBitSet parentBits = (FixedBitSet) parentsFilter.getDocIdSet(reader.getContext(), null);
|
||||
|
||||
final NumericDocValues parentValues = reader.getNumericDocValues("parent_val");
|
||||
final Sorter.DocComparator parentComparator = new Sorter.DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
assertTrue(parentBits.get(docID1));
|
||||
assertTrue(parentBits.get(docID2));
|
||||
return Long.compare(parentValues.get(docID1), parentValues.get(docID2));
|
||||
}
|
||||
};
|
||||
|
||||
final NumericDocValues childValues = reader.getNumericDocValues("child_val");
|
||||
final Sorter.DocComparator childComparator = new Sorter.DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
assertFalse(parentBits.get(docID1));
|
||||
assertFalse(parentBits.get(docID2));
|
||||
return Long.compare(childValues.get(docID1), childValues.get(docID2));
|
||||
}
|
||||
};
|
||||
|
||||
final Sorter sorter = new BlockJoinSorter(parentsFilter) {
|
||||
final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG));
|
||||
final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
|
||||
|
||||
@Override
|
||||
public String getID() {
|
||||
return "Dummy";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocComparator getParentComparator(AtomicReader r) {
|
||||
assertEquals(reader, r);
|
||||
return parentComparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocComparator getChildComparator(AtomicReader r) {
|
||||
assertEquals(reader, r);
|
||||
return childComparator;
|
||||
}
|
||||
|
||||
};
|
||||
final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
|
||||
final Sorter sorter = new Sorter(sort);
|
||||
final Sorter.DocMap docMap = sorter.sort(reader);
|
||||
assertEquals(reader.maxDoc(), docMap.size());
|
||||
|
||||
|
|
|
@ -51,14 +51,14 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
private int numDocs;
|
||||
private List<String> terms;
|
||||
private Directory dir;
|
||||
private Sorter sorter;
|
||||
private Sort sort;
|
||||
private RandomIndexWriter iw;
|
||||
private IndexReader reader;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
sorter = new NumericDocValuesSorter("ndv1");
|
||||
sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
|
||||
}
|
||||
|
||||
private Document randomDocument() {
|
||||
|
@ -80,7 +80,7 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
terms = new ArrayList<String>(randomTerms);
|
||||
final long seed = random().nextLong();
|
||||
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
|
||||
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sorter));
|
||||
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
|
||||
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = randomDocument();
|
||||
|
@ -120,7 +120,7 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
for (int i = 0; i < iters; ++i) {
|
||||
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sorter, numHits));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
|
||||
}
|
||||
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
|
||||
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
|
@ -144,7 +144,8 @@ public class TestEarlyTermination extends LuceneTestCase {
|
|||
for (int i = 0; i < iters; ++i) {
|
||||
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, new NumericDocValuesSorter("ndv2"), numHits) {
|
||||
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
super.setNextReader(context);
|
||||
|
|
|
@ -40,6 +40,8 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -50,14 +52,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
|
||||
private List<String> terms;
|
||||
private Directory dir1, dir2;
|
||||
private Sorter sorter;
|
||||
private Sort sort;
|
||||
private IndexReader reader;
|
||||
private IndexReader sortedReader;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
sorter = new NumericDocValuesSorter("ndv");
|
||||
sort = new Sort(new SortField("ndv", SortField.Type.LONG));
|
||||
createRandomIndexes();
|
||||
}
|
||||
|
||||
|
@ -68,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
return doc;
|
||||
}
|
||||
|
||||
static MergePolicy newSortingMergePolicy(Sorter sorter) {
|
||||
static MergePolicy newSortingMergePolicy(Sort sort) {
|
||||
// create a MP with a low merge factor so that many merges happen
|
||||
MergePolicy mp;
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -83,7 +85,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
mp = lmp;
|
||||
}
|
||||
// wrap it with a sorting mp
|
||||
return new SortingMergePolicy(mp, sorter);
|
||||
return new SortingMergePolicy(mp, sort);
|
||||
}
|
||||
|
||||
private void createRandomIndexes() throws IOException {
|
||||
|
@ -99,7 +101,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
final long seed = random().nextLong();
|
||||
final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
|
||||
final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
|
||||
iwc2.setMergePolicy(newSortingMergePolicy(sorter));
|
||||
iwc2.setMergePolicy(newSortingMergePolicy(sort));
|
||||
final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
|
||||
final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
|
@ -162,7 +164,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSortingMP() throws IOException {
|
||||
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
|
||||
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
|
||||
final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
|
||||
|
||||
assertSorted(sortedReader1);
|
||||
|
@ -171,4 +173,13 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
assertReaderEquals("", sortedReader1, sortedReader2);
|
||||
}
|
||||
|
||||
public void testBadSort() throws Exception {
|
||||
try {
|
||||
new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE);
|
||||
fail("Didn't get expected exception");
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -46,17 +46,12 @@ import org.apache.lucene.index.BinaryDocValues;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FilterAtomicReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
|
||||
import org.apache.lucene.index.sorter.Sorter;
|
||||
import org.apache.lucene.index.sorter.SortingAtomicReader;
|
||||
import org.apache.lucene.index.sorter.SortingMergePolicy;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
@ -117,9 +112,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
/** Analyzer used at index time */
|
||||
protected final Analyzer indexAnalyzer;
|
||||
final Version matchVersion;
|
||||
private final File indexPath;
|
||||
private final Directory dir;
|
||||
final int minPrefixChars;
|
||||
private Directory dir;
|
||||
|
||||
/** Used for ongoing NRT additions/updates. */
|
||||
private IndexWriter writer;
|
||||
|
@ -131,16 +125,19 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
* PrefixQuery is used (4). */
|
||||
public static final int DEFAULT_MIN_PREFIX_CHARS = 4;
|
||||
|
||||
private Sorter sorter;
|
||||
/** How we sort the postings and search results. */
|
||||
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
|
||||
|
||||
/** Create a new instance, loading from a previously built
|
||||
* directory, if it exists. */
|
||||
public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
|
||||
this(matchVersion, indexPath, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
|
||||
* directory, if it exists. Note that {@link #close}
|
||||
* will also close the provided directory. */
|
||||
public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
|
||||
this(matchVersion, dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
|
||||
}
|
||||
|
||||
/** Create a new instance, loading from a previously built
|
||||
* directory, if it exists.
|
||||
* directory, if it exists. Note that {@link #close}
|
||||
* will also close the provided directory.
|
||||
*
|
||||
* @param minPrefixChars Minimum number of leading characters
|
||||
* before PrefixQuery is used (default 4).
|
||||
|
@ -148,7 +145,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
* ngrams (increasing index size but making lookups
|
||||
* faster).
|
||||
*/
|
||||
public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
|
||||
public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
|
||||
|
||||
if (minPrefixChars < 0) {
|
||||
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
|
||||
|
@ -157,33 +154,29 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
this.queryAnalyzer = queryAnalyzer;
|
||||
this.indexAnalyzer = indexAnalyzer;
|
||||
this.matchVersion = matchVersion;
|
||||
this.indexPath = indexPath;
|
||||
this.dir = dir;
|
||||
this.minPrefixChars = minPrefixChars;
|
||||
dir = getDirectory(indexPath);
|
||||
|
||||
if (DirectoryReader.indexExists(dir)) {
|
||||
// Already built; open it:
|
||||
initSorter();
|
||||
writer = new IndexWriter(dir,
|
||||
getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.APPEND));
|
||||
getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
|
||||
searcherMgr = new SearcherManager(writer, true, null);
|
||||
}
|
||||
}
|
||||
|
||||
/** Override this to customize index settings, e.g. which
|
||||
* codec to use. Sorter is null if this config is for
|
||||
* the first pass writer. */
|
||||
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sorter sorter, IndexWriterConfig.OpenMode openMode) {
|
||||
* codec to use. */
|
||||
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
|
||||
iwc.setCodec(new Lucene46Codec());
|
||||
iwc.setOpenMode(openMode);
|
||||
|
||||
if (sorter != null) {
|
||||
// This way all merged segments will be sorted at
|
||||
// merge time, allow for per-segment early termination
|
||||
// when those segments are searched:
|
||||
iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), sorter));
|
||||
}
|
||||
// This way all merged segments will be sorted at
|
||||
// merge time, allow for per-segment early termination
|
||||
// when those segments are searched:
|
||||
iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));
|
||||
|
||||
return iwc;
|
||||
}
|
||||
|
||||
|
@ -206,16 +199,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
writer = null;
|
||||
}
|
||||
|
||||
Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
|
||||
|
||||
IndexWriter w = null;
|
||||
AtomicReader r = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
// First pass: build a temporary normal Lucene index,
|
||||
// just indexing the suggestions as they iterate:
|
||||
w = new IndexWriter(dirTmp,
|
||||
getIndexWriterConfig(matchVersion, getGramAnalyzer(), null, IndexWriterConfig.OpenMode.CREATE));
|
||||
writer = new IndexWriter(dir,
|
||||
getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
|
||||
BytesRef text;
|
||||
Document doc = new Document();
|
||||
FieldType ft = getTextFieldType();
|
||||
|
@ -253,37 +243,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
if (iter.hasPayloads()) {
|
||||
payloadField.setBytesValue(iter.payload());
|
||||
}
|
||||
w.addDocument(doc);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
|
||||
|
||||
// Second pass: sort the entire index:
|
||||
r = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(w, false));
|
||||
//long t1 = System.nanoTime();
|
||||
|
||||
// We can rollback the first pass, now that have have
|
||||
// the reader open, because we will discard it anyway
|
||||
// (no sense in fsync'ing it):
|
||||
w.rollback();
|
||||
|
||||
initSorter();
|
||||
|
||||
r = SortingAtomicReader.wrap(r, sorter);
|
||||
|
||||
writer = new IndexWriter(dir,
|
||||
getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.CREATE));
|
||||
writer.addIndexes(new IndexReader[] {r});
|
||||
r.close();
|
||||
|
||||
//System.out.println("sort time: " + ((System.nanoTime()-t1)/1000000) + " msec");
|
||||
|
||||
searcherMgr = new SearcherManager(writer, true, null);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(w, r, dirTmp);
|
||||
IOUtils.close(r);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(w, writer, r, dirTmp);
|
||||
IOUtils.closeWhileHandlingException(writer, r);
|
||||
writer = null;
|
||||
}
|
||||
}
|
||||
|
@ -359,39 +329,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
searcherMgr.maybeRefreshBlocking();
|
||||
}
|
||||
|
||||
private void initSorter() {
|
||||
sorter = new Sorter() {
|
||||
|
||||
@Override
|
||||
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
|
||||
final NumericDocValues weights = reader.getNumericDocValues("weight");
|
||||
final Sorter.DocComparator comparator = new Sorter.DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
final long v1 = weights.get(docID1);
|
||||
final long v2 = weights.get(docID2);
|
||||
// Reverse sort (highest weight first);
|
||||
// java7 only:
|
||||
//return Long.compare(v2, v1);
|
||||
if (v1 > v2) {
|
||||
return -1;
|
||||
} else if (v1 < v2) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
return Sorter.sort(reader.maxDoc(), comparator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getID() {
|
||||
return "BySuggestWeight";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclass can override this method to change the field type of the text field
|
||||
* e.g. to change the index options
|
||||
|
@ -497,12 +434,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
//System.out.println("finalQuery=" + query);
|
||||
|
||||
// Sort by weight, descending:
|
||||
TopFieldCollector c = TopFieldCollector.create(new Sort(new SortField("weight", SortField.Type.LONG, true)),
|
||||
num, true, false, false, false);
|
||||
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false);
|
||||
|
||||
// We sorted postings by weight during indexing, so we
|
||||
// only retrieve the first num hits now:
|
||||
Collector c2 = new EarlyTerminatingSortingCollector(c, sorter, num);
|
||||
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
|
||||
IndexSearcher searcher = searcherMgr.acquire();
|
||||
List<LookupResult> results = null;
|
||||
try {
|
||||
|
@ -512,7 +448,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
TopFieldDocs hits = (TopFieldDocs) c.topDocs();
|
||||
|
||||
// Slower way if postings are not pre-sorted by weight:
|
||||
// hits = searcher.search(query, null, num, new Sort(new SortField("weight", SortField.Type.LONG, true)));
|
||||
// hits = searcher.search(query, null, num, SORT);
|
||||
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
|
||||
} finally {
|
||||
searcherMgr.release(searcher);
|
||||
|
@ -676,11 +612,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
}
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
writer = null;
|
||||
}
|
||||
if (dir != null) {
|
||||
dir.close();
|
||||
dir = null;
|
||||
writer = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.search.suggest.analyzing;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
|
@ -38,6 +37,7 @@ import org.apache.lucene.search.FieldDoc;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -92,8 +92,8 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
|
|||
* Create a new instance, loading from a previously built
|
||||
* directory, if it exists.
|
||||
*/
|
||||
public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
|
||||
super(matchVersion, indexPath, analyzer);
|
||||
public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
|
||||
super(matchVersion, dir, analyzer);
|
||||
this.blenderType = BlenderType.POSITION_LINEAR;
|
||||
this.numFactor = DEFAULT_NUM_FACTOR;
|
||||
}
|
||||
|
@ -106,9 +106,9 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
|
|||
* @param numFactor Factor to multiply the number of searched elements before ponderate
|
||||
* @throws IOException If there are problems opening the underlying Lucene index.
|
||||
*/
|
||||
public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
|
||||
public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
|
||||
int minPrefixChars, BlenderType blenderType, int numFactor) throws IOException {
|
||||
super(matchVersion, indexPath, indexAnalyzer, queryAnalyzer, minPrefixChars);
|
||||
super(matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars);
|
||||
this.blenderType = blenderType;
|
||||
this.numFactor = numFactor;
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
|
|||
import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup;
|
||||
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
|
||||
import org.apache.lucene.search.suggest.tst.TSTLookup;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
|
@ -161,7 +162,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
|
|||
} catch (InstantiationException e) {
|
||||
Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
|
||||
if (cls == AnalyzingInfixSuggester.class) {
|
||||
lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, TestUtil.getTempDir("LookupBenchmarkTest"), a);
|
||||
lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, FSDirectory.open(TestUtil.getTempDir("LookupBenchmarkTest")), a);
|
||||
} else {
|
||||
Constructor<? extends Lookup> ctor = cls.getConstructor(Analyzer.class);
|
||||
lookup = ctor.newInstance(a);
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
|
@ -39,7 +38,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
import org.apache.lucene.search.suggest.Input;
|
||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -55,15 +53,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
|
||||
|
@ -106,22 +97,12 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
assertEquals(2, suggester.getCount());
|
||||
suggester.close();
|
||||
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
|
||||
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
|
||||
assertEquals(2, results.size());
|
||||
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
|
||||
|
@ -159,15 +140,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
|
||||
@Override
|
||||
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
|
||||
try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
|
||||
|
@ -239,17 +213,11 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("lend me your ear", 8, new BytesRef("foobar")),
|
||||
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
int minPrefixLength = random().nextInt(10);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
for(int i=0;i<2;i++) {
|
||||
|
@ -306,12 +274,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
|
||||
// Make sure things still work after close and reopen:
|
||||
suggester.close();
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
|
||||
}
|
||||
suggester.close();
|
||||
}
|
||||
|
@ -321,15 +284,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
|
||||
assertEquals(1, results.size());
|
||||
|
@ -342,15 +298,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
|
||||
assertEquals(1, results.size());
|
||||
|
@ -359,18 +308,13 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
|
||||
// Try again, but overriding addPrefixMatch to highlight
|
||||
// the entire hit:
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
|
||||
@Override
|
||||
protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
|
||||
sb.append("<b>");
|
||||
sb.append(surface);
|
||||
sb.append("</b>");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
|
||||
|
@ -384,15 +328,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
suggester.close();
|
||||
suggester.close();
|
||||
|
@ -418,14 +355,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, indexAnalyzer, queryAnalyzer, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), indexAnalyzer, queryAnalyzer, 3);
|
||||
|
||||
Input keys[] = new Input[] {
|
||||
new Input("a bob for apples", 10, new BytesRef("foobaz")),
|
||||
|
@ -439,14 +369,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testEmptyAtStart() throws Exception {
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(new Input[0]));
|
||||
suggester.add(new BytesRef("a penny saved is a penny earned"), 10, new BytesRef("foobaz"));
|
||||
suggester.add(new BytesRef("lend me your ear"), 8, new BytesRef("foobar"));
|
||||
|
@ -483,14 +407,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testBothExactAndPrefix() throws Exception {
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(new Input[0]));
|
||||
suggester.add(new BytesRef("the pen is pretty"), 10, new BytesRef("foobaz"));
|
||||
suggester.refresh();
|
||||
|
@ -563,12 +481,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
System.out.println(" minPrefixChars=" + minPrefixChars);
|
||||
}
|
||||
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
|
||||
|
||||
// Initial suggester built with nothing:
|
||||
suggester.build(new InputArrayIterator(new Input[0]));
|
||||
|
@ -648,12 +561,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
lookupThread.finish();
|
||||
suggester.close();
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
|
||||
lookupThread = new LookupThread(suggester);
|
||||
lookupThread.start();
|
||||
|
||||
|
@ -824,15 +732,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("lend me your ear", 8, new BytesRef("foobar")),
|
||||
};
|
||||
|
||||
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newDirectory();
|
||||
}
|
||||
};
|
||||
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
import org.apache.lucene.search.suggest.Input;
|
||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -49,15 +48,10 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
File tempDir = TestUtil.getTempDir("BlendedInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
|
||||
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
|
||||
BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
|
||||
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
|
||||
BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
// we query for star wars and check that the weight
|
||||
|
@ -94,12 +88,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
|
||||
|
||||
// BlenderType.LINEAR is used by default (remove position*10%)
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
assertEquals(w, getInResults(suggester, "top", pl, 1));
|
||||
|
@ -109,13 +98,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
suggester.close();
|
||||
|
||||
// BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word
|
||||
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
assertEquals(w, getInResults(suggester, "top", pl, 1));
|
||||
|
@ -145,13 +129,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
|
||||
|
||||
// if factor is small, we don't get the expected element
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
|
||||
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
|
@ -169,13 +148,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
suggester.close();
|
||||
|
||||
// if we increase the factor we have it
|
||||
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
// we have it
|
||||
|
@ -205,14 +179,9 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
|
||||
|
||||
// if factor is small, we don't get the expected element
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
|
||||
BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
|
||||
@Override
|
||||
protected Directory getDirectory(File path) {
|
||||
return newFSDirectory(path);
|
||||
}
|
||||
};
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
|
||||
BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
|
||||
|
|
|
@ -440,8 +440,10 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
|
|||
if (dir instanceof BaseDirectoryWrapper) {
|
||||
((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
|
||||
}
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random())).setInfoStream(new FailOnNonBulkMergesInfoStream());
|
||||
analyzer).setInfoStream(new FailOnNonBulkMergesInfoStream());
|
||||
|
||||
if (LuceneTestCase.TEST_NIGHTLY) {
|
||||
// newIWConfig makes smallish max seg size, which
|
||||
|
|
|
@ -76,6 +76,15 @@ Velocity 1.7 and Velocity Tools 2.0
|
|||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.5
|
||||
|
||||
Upgrading from Solr 4.7
|
||||
----------------------
|
||||
|
||||
* In previous versions of Solr, Terms that exceeded Lucene's MAX_TERM_LENGTH were
|
||||
silently ignored when indexing documents. Begining with Solr 4.8, a document
|
||||
an error will be generated when attempting to index a document with a term
|
||||
that is too large. If you wish to continue to have large terms ignored,
|
||||
use "solr.LengthFilterFactory" in all of your Analyzers. See LUCENE-5472 for
|
||||
more details.
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
@ -88,6 +97,13 @@ New Features
|
|||
* SOLR-5183: JSON updates now support nested child documents using a
|
||||
"_childDocument_" object key. (Varun Thacker, hossman)
|
||||
|
||||
* SOLR-5714: You can now use one pool of memory for for the HDFS block cache
|
||||
that all collections share. (Mark Miller, Gregory Chanan)
|
||||
|
||||
* SOLR-5720: Add ExpandComponent to expand results collapsed by the
|
||||
CollapsingQParserPlugin. (Joel Bernstein)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -113,6 +129,15 @@ Bug Fixes
|
|||
* SOLR-5761: HttpSolrServer has a few fields that can be set via setters but
|
||||
are not volatile. (Mark Miller, Gregory Chanan)
|
||||
|
||||
* SOLR-5811: The Overseer will retry work items until success, which is a serious
|
||||
problem if you hit a bad work item. (Mark Miller)
|
||||
|
||||
* SOLR-5796: Increase how long we are willing to wait for a core to see the ZK
|
||||
advertised leader in it's local state. (Timothy Potter, Mark Miller)
|
||||
|
||||
* SOLR-5818: distrib search with custom comparator does not quite work correctly
|
||||
(Ryan Ernst)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
|
||||
|
@ -148,6 +173,15 @@ Other Changes
|
|||
registration exists, wait a short time to see if it goes away.
|
||||
(Mark Miller)
|
||||
|
||||
* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException
|
||||
if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous
|
||||
behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
|
||||
(hossman, Mike McCandless, Varun Thacker)
|
||||
|
||||
* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
|
||||
advertised leader in it's local state configurable.
|
||||
(Timothy Potter via Mark Miller)
|
||||
|
||||
================== 4.7.0 ==================
|
||||
|
||||
Versions of Major Components
|
||||
|
|
|
@ -123,7 +123,16 @@ public class Overseer {
|
|||
else if (LeaderStatus.YES == isLeader) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(head);
|
||||
final String operation = message.getStr(QUEUE_OPERATION);
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
try {
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
} catch (Exception e) {
|
||||
// generally there is nothing we can do - in most cases, we have
|
||||
// an issue that will fail again on retry or we cannot communicate with
|
||||
// ZooKeeper in which case another Overseer should take over
|
||||
// TODO: if ordering for the message is not important, we could
|
||||
// track retries and put it back on the end of the queue
|
||||
log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
|
||||
}
|
||||
zkClient.setData(ZkStateReader.CLUSTER_STATE,
|
||||
ZkStateReader.toJSON(clusterState), true);
|
||||
|
||||
|
@ -189,8 +198,16 @@ public class Overseer {
|
|||
while (head != null) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
|
||||
final String operation = message.getStr(QUEUE_OPERATION);
|
||||
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
try {
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
} catch (Exception e) {
|
||||
// generally there is nothing we can do - in most cases, we have
|
||||
// an issue that will fail again on retry or we cannot communicate with
|
||||
// ZooKeeper in which case another Overseer should take over
|
||||
// TODO: if ordering for the message is not important, we could
|
||||
// track retries and put it back on the end of the queue
|
||||
log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
|
||||
}
|
||||
workQueue.offer(head.getBytes());
|
||||
|
||||
stateUpdateQueue.poll();
|
||||
|
@ -294,6 +311,7 @@ public class Overseer {
|
|||
private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) {
|
||||
log.info("createReplica() {} ", message);
|
||||
String coll = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
Slice sl = clusterState.getSlice(coll, slice);
|
||||
if(sl == null){
|
||||
|
@ -334,6 +352,7 @@ public class Overseer {
|
|||
|
||||
private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) {
|
||||
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
log.info("Update shard state invoked for collection: " + collection + " with message: " + message);
|
||||
for (String key : message.keySet()) {
|
||||
if (ZkStateReader.COLLECTION_PROP.equals(key)) continue;
|
||||
|
@ -358,6 +377,7 @@ public class Overseer {
|
|||
|
||||
private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) {
|
||||
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
String routeKey = message.getStr("routeKey");
|
||||
String range = message.getStr("range");
|
||||
|
@ -397,8 +417,22 @@ public class Overseer {
|
|||
return clusterState;
|
||||
}
|
||||
|
||||
private boolean checkCollectionKeyExistence(ZkNodeProps message) {
|
||||
return checkKeyExistence(message, ZkStateReader.COLLECTION_PROP);
|
||||
}
|
||||
|
||||
private boolean checkKeyExistence(ZkNodeProps message, String key) {
|
||||
String value = message.getStr(key);
|
||||
if (value == null || value.trim().length() == 0) {
|
||||
log.error("Skipping invalid Overseer message because it has no " + key + " specified: " + message);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) {
|
||||
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
String routeKeyStr = message.getStr("routeKey");
|
||||
|
||||
|
@ -424,6 +458,7 @@ public class Overseer {
|
|||
|
||||
private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) {
|
||||
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
Slice slice = clusterState.getSlice(collection, shardId);
|
||||
if (slice == null) {
|
||||
|
@ -470,6 +505,7 @@ public class Overseer {
|
|||
|
||||
private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) {
|
||||
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
|
||||
if(collection==null || sliceName == null){
|
||||
|
@ -488,32 +524,30 @@ public class Overseer {
|
|||
/**
|
||||
* Try to assign core to the cluster.
|
||||
*/
|
||||
private ClusterState updateState(ClusterState state, final ZkNodeProps message) {
|
||||
private ClusterState updateState(ClusterState clusterState, final ZkNodeProps message) {
|
||||
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
assert collection.length() > 0 : message;
|
||||
|
||||
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null);
|
||||
log.info("Update state numShards={} message={}", numShards, message);
|
||||
|
||||
List<String> shardNames = new ArrayList<String>();
|
||||
|
||||
//collection does not yet exist, create placeholders if num shards is specified
|
||||
boolean collectionExists = state.hasCollection(collection);
|
||||
boolean collectionExists = clusterState.hasCollection(collection);
|
||||
if (!collectionExists && numShards!=null) {
|
||||
getShardNames(numShards, shardNames);
|
||||
state = createCollection(state, collection, shardNames, message);
|
||||
clusterState = createCollection(clusterState, collection, shardNames, message);
|
||||
}
|
||||
String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
|
||||
String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
|
||||
if (coreNodeName == null) {
|
||||
coreNodeName = getAssignedCoreNodeName(state, message);
|
||||
coreNodeName = getAssignedCoreNodeName(clusterState, message);
|
||||
if (coreNodeName != null) {
|
||||
log.info("node=" + coreNodeName + " is already registered");
|
||||
} else {
|
||||
// if coreNodeName is null, auto assign one
|
||||
coreNodeName = Assign.assignNode(collection, state);
|
||||
coreNodeName = Assign.assignNode(collection, clusterState);
|
||||
}
|
||||
message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP,
|
||||
coreNodeName);
|
||||
|
@ -522,7 +556,7 @@ public class Overseer {
|
|||
// use the provided non null shardId
|
||||
if (sliceName == null) {
|
||||
//get shardId from ClusterState
|
||||
sliceName = getAssignedId(state, coreNodeName, message);
|
||||
sliceName = getAssignedId(clusterState, coreNodeName, message);
|
||||
if (sliceName != null) {
|
||||
log.info("shard=" + sliceName + " is already registered");
|
||||
}
|
||||
|
@ -531,14 +565,14 @@ public class Overseer {
|
|||
//request new shardId
|
||||
if (collectionExists) {
|
||||
// use existing numShards
|
||||
numShards = state.getCollection(collection).getSlices().size();
|
||||
numShards = clusterState.getCollection(collection).getSlices().size();
|
||||
log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards);
|
||||
}
|
||||
sliceName = Assign.assignShard(collection, state, numShards);
|
||||
sliceName = Assign.assignShard(collection, clusterState, numShards);
|
||||
log.info("Assigning new node to shard shard=" + sliceName);
|
||||
}
|
||||
|
||||
Slice slice = state.getSlice(collection, sliceName);
|
||||
Slice slice = clusterState.getSlice(collection, sliceName);
|
||||
|
||||
Map<String,Object> replicaProps = new LinkedHashMap<String,Object>();
|
||||
|
||||
|
@ -584,9 +618,9 @@ public class Overseer {
|
|||
Map<String,Replica> replicas;
|
||||
|
||||
if (slice != null) {
|
||||
state = checkAndCompleteShardSplit(state, collection, coreNodeName, sliceName, replicaProps);
|
||||
clusterState = checkAndCompleteShardSplit(clusterState, collection, coreNodeName, sliceName, replicaProps);
|
||||
// get the current slice again because it may have been updated due to checkAndCompleteShardSplit method
|
||||
slice = state.getSlice(collection, sliceName);
|
||||
slice = clusterState.getSlice(collection, sliceName);
|
||||
sliceProps = slice.getProperties();
|
||||
replicas = slice.getReplicasCopy();
|
||||
} else {
|
||||
|
@ -600,7 +634,7 @@ public class Overseer {
|
|||
replicas.put(replica.getName(), replica);
|
||||
slice = new Slice(sliceName, replicas, sliceProps);
|
||||
|
||||
ClusterState newClusterState = updateSlice(state, collection, slice);
|
||||
ClusterState newClusterState = updateSlice(clusterState, collection, slice);
|
||||
return newClusterState;
|
||||
}
|
||||
|
||||
|
@ -849,13 +883,9 @@ public class Overseer {
|
|||
* Remove collection from cloudstate
|
||||
*/
|
||||
private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) {
|
||||
|
||||
final String collection = message.getStr("name");
|
||||
if (!checkKeyExistence(message, "name")) return clusterState;
|
||||
|
||||
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
|
||||
// newCollections.remove(collection);
|
||||
|
||||
// ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections);
|
||||
return clusterState.copyWith(singletonMap(collection, (DocCollection)null));
|
||||
}
|
||||
|
||||
|
@ -863,32 +893,28 @@ public class Overseer {
|
|||
* Remove collection slice from cloudstate
|
||||
*/
|
||||
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
|
||||
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
|
||||
log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
|
||||
|
||||
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
|
||||
DocCollection coll = clusterState.getCollection(collection);
|
||||
|
||||
Map<String, Slice> newSlices = new LinkedHashMap<String, Slice>(coll.getSlicesMap());
|
||||
newSlices.remove(sliceId);
|
||||
|
||||
DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter());
|
||||
// newCollections.put(newCollection.getName(), newCollection);
|
||||
return newState(clusterState, singletonMap(collection,newCollection));
|
||||
|
||||
// return new ClusterState(clusterState.getLiveNodes(), newCollections);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove core from cloudstate
|
||||
*/
|
||||
private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) {
|
||||
|
||||
String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
|
||||
|
||||
final String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
|
||||
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
if (!checkCollectionKeyExistence(message)) return clusterState;
|
||||
|
||||
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
|
||||
// DocCollection coll = newCollections.get(collection);
|
||||
|
|
|
@ -1627,7 +1627,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
if (!created)
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully createcollection: " + message.getStr("name"));
|
||||
|
||||
log.info("going to create cores replicas shardNames {} , repFactor : {}", shardNames, repFactor);
|
||||
log.info("Creating SolrCores for new collection, shardNames {} , replicationFactor : {}", shardNames, repFactor);
|
||||
Map<String ,ShardRequest> coresToCreate = new LinkedHashMap<String, ShardRequest>();
|
||||
for (int i = 1; i <= shardNames.size(); i++) {
|
||||
String sliceName = shardNames.get(i-1);
|
||||
|
@ -1671,14 +1671,17 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
sreq.actualShards = sreq.shards;
|
||||
sreq.params = params;
|
||||
|
||||
if(isLegacyCloud) shardHandler.submit(sreq, sreq.shards[0], sreq.params);
|
||||
else coresToCreate.put(coreName, sreq);
|
||||
if(isLegacyCloud) {
|
||||
shardHandler.submit(sreq, sreq.shards[0], sreq.params);
|
||||
} else {
|
||||
coresToCreate.put(coreName, sreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!isLegacyCloud) {
|
||||
//wait for all replica entries to be created
|
||||
Map<String, Replica> replicas = lookupReplicas(collectionName, coresToCreate.keySet());
|
||||
// wait for all replica entries to be created
|
||||
Map<String, Replica> replicas = waitToSeeReplicasInState(collectionName, coresToCreate.keySet());
|
||||
for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
|
||||
ShardRequest sreq = e.getValue();
|
||||
sreq.params.set(CoreAdminParams.CORE_NODE_NAME, replicas.get(e.getKey()).getName());
|
||||
|
@ -1704,37 +1707,35 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
}
|
||||
}
|
||||
|
||||
private Map<String, Replica> lookupReplicas(String collectionName, Collection<String> coreNames) throws InterruptedException {
|
||||
private Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
|
||||
Map<String, Replica> result = new HashMap<String, Replica>();
|
||||
long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS);
|
||||
for(;;) {
|
||||
DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
|
||||
for (String coreName : coreNames) {
|
||||
if(result.containsKey(coreName)) continue;
|
||||
long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
|
||||
while (true) {
|
||||
DocCollection coll = zkStateReader.getClusterState().getCollection(
|
||||
collectionName);
|
||||
for (String coreName : coreNames) {
|
||||
if (result.containsKey(coreName)) continue;
|
||||
for (Slice slice : coll.getSlices()) {
|
||||
for (Replica replica : slice.getReplicas()) {
|
||||
if(coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
|
||||
result.put(coreName,replica);
|
||||
if (coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
|
||||
result.put(coreName, replica);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(result.size() == coreNames.size()) {
|
||||
if (result.size() == coreNames.size()) {
|
||||
return result;
|
||||
}
|
||||
if( System.nanoTime() > endTime) {
|
||||
//time up . throw exception and go out
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to create replica entries in ZK");
|
||||
if (System.nanoTime() > endTime) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas in cluster state.");
|
||||
}
|
||||
|
||||
Thread.sleep(100);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
|
||||
String collection = message.getStr(COLLECTION_PROP);
|
||||
String node = message.getStr("node");
|
||||
|
@ -1789,7 +1790,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
|
||||
ZkStateReader.BASE_URL_PROP,zkStateReader.getBaseUrlForNodeName(node));
|
||||
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props));
|
||||
params.set(CoreAdminParams.CORE_NODE_NAME, lookupReplicas(collection, Collections.singletonList(coreName)).get(coreName).getName());
|
||||
params.set(CoreAdminParams.CORE_NODE_NAME, waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Enumeration;
|
|||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
@ -160,6 +161,7 @@ public final class ZkController {
|
|||
protected volatile Overseer overseer;
|
||||
|
||||
private int leaderVoteWait;
|
||||
private int leaderConflictResolveWait;
|
||||
|
||||
private boolean genericCoreNodeNames;
|
||||
|
||||
|
@ -168,8 +170,10 @@ public final class ZkController {
|
|||
private volatile boolean isClosed;
|
||||
|
||||
public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort,
|
||||
String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException,
|
||||
TimeoutException, IOException {
|
||||
String localHostContext, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect)
|
||||
throws InterruptedException, TimeoutException, IOException
|
||||
{
|
||||
|
||||
if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
|
||||
this.cc = cc;
|
||||
this.genericCoreNodeNames = genericCoreNodeNames;
|
||||
|
@ -188,6 +192,8 @@ public final class ZkController {
|
|||
this.localHostContext);
|
||||
|
||||
this.leaderVoteWait = leaderVoteWait;
|
||||
this.leaderConflictResolveWait = leaderConflictResolveWait;
|
||||
|
||||
this.clientTimeout = zkClientTimeout;
|
||||
zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout,
|
||||
zkClientConnectTimeout, new DefaultConnectionStrategy(),
|
||||
|
@ -850,19 +856,28 @@ public final class ZkController {
|
|||
shardId, timeoutms * 2); // since we found it in zk, we are willing to
|
||||
// wait a while to find it in state
|
||||
int tries = 0;
|
||||
final long msInSec = 1000L;
|
||||
int maxTries = (int)Math.floor(leaderConflictResolveWait/msInSec);
|
||||
while (!leaderUrl.equals(clusterStateLeaderUrl)) {
|
||||
if (tries == 60) {
|
||||
if (tries > maxTries) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"There is conflicting information about the leader of shard: "
|
||||
+ cloudDesc.getShardId() + " our state says:"
|
||||
+ clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl);
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
Thread.sleep(msInSec);
|
||||
tries++;
|
||||
clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId,
|
||||
timeoutms);
|
||||
leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms)
|
||||
.getCoreUrl();
|
||||
|
||||
if (tries % 30 == 0) {
|
||||
String warnMsg = String.format(Locale.ENGLISH, "Still seeing conflicting information about the leader "
|
||||
+ "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s",
|
||||
cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl);
|
||||
log.warn(warnMsg);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
|
@ -1013,7 +1028,8 @@ public final class ZkController {
|
|||
core.close();
|
||||
}
|
||||
}
|
||||
log.info("publishing core={} state={}", cd.getName(), state);
|
||||
String collection = cd.getCloudDescriptor().getCollectionName();
|
||||
log.info("publishing core={} state={} collection={}", cd.getName(), state, collection);
|
||||
//System.out.println(Thread.currentThread().getStackTrace()[3]);
|
||||
Integer numShards = cd.getCloudDescriptor().getNumShards();
|
||||
if (numShards == null) { //XXX sys prop hack
|
||||
|
@ -1021,8 +1037,7 @@ public final class ZkController {
|
|||
numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
|
||||
}
|
||||
|
||||
assert cd.getCloudDescriptor().getCollectionName() != null && cd.getCloudDescriptor()
|
||||
.getCollectionName().length() > 0;
|
||||
assert collection != null && collection.length() > 0;
|
||||
|
||||
String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
|
||||
//assert cd.getCloudDescriptor().getShardId() != null;
|
||||
|
@ -1033,12 +1048,9 @@ public final class ZkController {
|
|||
ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles(),
|
||||
ZkStateReader.NODE_NAME_PROP, getNodeName(),
|
||||
ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId(),
|
||||
ZkStateReader.COLLECTION_PROP, cd.getCloudDescriptor()
|
||||
.getCollectionName(),
|
||||
ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString()
|
||||
: null,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName
|
||||
: null);
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() : null,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName : null);
|
||||
if (updateLastState) {
|
||||
cd.getCloudDescriptor().lastPublished = state;
|
||||
}
|
||||
|
@ -1064,6 +1076,12 @@ public final class ZkController {
|
|||
final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
|
||||
final String collection = cd.getCloudDescriptor().getCollectionName();
|
||||
assert collection != null;
|
||||
|
||||
if (collection == null || collection.trim().length() == 0) {
|
||||
log.error("No collection was specified.");
|
||||
return;
|
||||
}
|
||||
|
||||
ElectionContext context = electionContexts.remove(new ContextKey(collection, coreNodeName));
|
||||
|
||||
if (context != null) {
|
||||
|
@ -1362,7 +1380,6 @@ public final class ZkController {
|
|||
|
||||
CloudDescriptor cloudDesc = cd.getCloudDescriptor();
|
||||
|
||||
|
||||
// make sure the node name is set on the descriptor
|
||||
if (cloudDesc.getCoreNodeName() == null) {
|
||||
cloudDesc.setCoreNodeName(coreNodeName);
|
||||
|
|
|
@ -138,6 +138,7 @@ public abstract class ConfigSolr {
|
|||
|
||||
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000;
|
||||
private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes
|
||||
private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
|
||||
private static final int DEFAULT_CORE_LOAD_THREADS = 3;
|
||||
|
||||
protected static final String DEFAULT_CORE_ADMIN_PATH = "/admin/cores";
|
||||
|
@ -158,6 +159,10 @@ public abstract class ConfigSolr {
|
|||
return getInt(CfgProp.SOLR_LEADERVOTEWAIT, DEFAULT_LEADER_VOTE_WAIT);
|
||||
}
|
||||
|
||||
public int getLeaderConflictResolveWait() {
|
||||
return getInt(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT);
|
||||
}
|
||||
|
||||
public boolean getGenericCoreNodeNames() {
|
||||
return getBool(CfgProp.SOLR_GENERICCORENODENAMES, false);
|
||||
}
|
||||
|
@ -255,6 +260,7 @@ public abstract class ConfigSolr {
|
|||
SOLR_GENERICCORENODENAMES,
|
||||
SOLR_ZKCLIENTTIMEOUT,
|
||||
SOLR_ZKHOST,
|
||||
SOLR_LEADERCONFLICTRESOLVEWAIT,
|
||||
|
||||
//TODO: Remove all of these elements for 5.0
|
||||
SOLR_PERSISTENT,
|
||||
|
|
|
@ -67,6 +67,7 @@ public class ConfigSolrXml extends ConfigSolr {
|
|||
failIfFound("solr/cores/@hostContext");
|
||||
failIfFound("solr/cores/@hostPort");
|
||||
failIfFound("solr/cores/@leaderVoteWait");
|
||||
failIfFound("solr/cores/@leaderConflictResolveWait");
|
||||
failIfFound("solr/cores/@genericCoreNodeNames");
|
||||
failIfFound("solr/cores/@managementPath");
|
||||
failIfFound("solr/cores/@shareSchema");
|
||||
|
@ -113,6 +114,7 @@ public class ConfigSolrXml extends ConfigSolr {
|
|||
propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']"));
|
||||
propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']"));
|
||||
propMap.put(CfgProp.SOLR_LEADERVOTEWAIT, doSub("solr/solrcloud/int[@name='leaderVoteWait']"));
|
||||
propMap.put(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, doSub("solr/solrcloud/int[@name='leaderConflictResolveWait']"));
|
||||
propMap.put(CfgProp.SOLR_GENERICCORENODENAMES, doSub("solr/solrcloud/bool[@name='genericCoreNodeNames']"));
|
||||
propMap.put(CfgProp.SOLR_MANAGEMENTPATH, doSub("solr/str[@name='managementPath']"));
|
||||
propMap.put(CfgProp.SOLR_SHAREDLIB, doSub("solr/str[@name='sharedLib']"));
|
||||
|
|
|
@ -51,6 +51,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
|
|||
public static final String BLOCKCACHE_SLAB_COUNT = "solr.hdfs.blockcache.slab.count";
|
||||
public static final String BLOCKCACHE_DIRECT_MEMORY_ALLOCATION = "solr.hdfs.blockcache.direct.memory.allocation";
|
||||
public static final String BLOCKCACHE_ENABLED = "solr.hdfs.blockcache.enabled";
|
||||
public static final String BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
|
||||
public static final String BLOCKCACHE_READ_ENABLED = "solr.hdfs.blockcache.read.enabled";
|
||||
public static final String BLOCKCACHE_WRITE_ENABLED = "solr.hdfs.blockcache.write.enabled";
|
||||
|
||||
|
@ -73,6 +74,8 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
|
|||
|
||||
private String confDir;
|
||||
|
||||
private static BlockCache globalBlockCache;
|
||||
|
||||
public static Metrics metrics;
|
||||
private static Boolean kerberosInit;
|
||||
|
||||
|
@ -102,6 +105,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
|
|||
}
|
||||
|
||||
boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true);
|
||||
boolean blockCacheGlobal = params.getBool(BLOCKCACHE_GLOBAL, false); // default to false for back compat
|
||||
boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED,
|
||||
true);
|
||||
boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true);
|
||||
|
@ -117,8 +121,6 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
|
|||
boolean directAllocation = params.getBool(
|
||||
BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true);
|
||||
|
||||
BlockCache blockCache;
|
||||
|
||||
int slabSize = numberOfBlocksPerBank * blockSize;
|
||||
LOG.info(
|
||||
"Number of slabs of block cache [{}] with direct memory allocation set to [{}]",
|
||||
|
@ -131,22 +133,13 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
|
|||
int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128);
|
||||
int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128);
|
||||
|
||||
BufferStore.initNewBuffer(bufferSize, bufferCount);
|
||||
long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank
|
||||
* (long) blockSize;
|
||||
try {
|
||||
blockCache = new BlockCache(metrics, directAllocation, totalMemory,
|
||||
slabSize, blockSize);
|
||||
} catch (OutOfMemoryError e) {
|
||||
throw new RuntimeException(
|
||||
"The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
|
||||
+ " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
|
||||
+ " your java heap size might not be large enough."
|
||||
+ " Failed allocating ~" + totalMemory / 1000000.0 + " MB.", e);
|
||||
}
|
||||
Cache cache = new BlockDirectoryCache(blockCache, metrics);
|
||||
BlockCache blockCache = getBlockDirectoryCache(path, numberOfBlocksPerBank,
|
||||
blockSize, bankCount, directAllocation, slabSize,
|
||||
bufferSize, bufferCount, blockCacheGlobal);
|
||||
|
||||
Cache cache = new BlockDirectoryCache(blockCache, path, metrics);
|
||||
HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf);
|
||||
dir = new BlockDirectory("solrcore", hdfsDirectory, cache, null,
|
||||
dir = new BlockDirectory(path, hdfsDirectory, cache, null,
|
||||
blockCacheReadEnabled, blockCacheWriteEnabled);
|
||||
} else {
|
||||
dir = new HdfsDirectory(new Path(path), conf);
|
||||
|
@ -165,6 +158,45 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
|
|||
return dir;
|
||||
}
|
||||
|
||||
private BlockCache getBlockDirectoryCache(String path,
|
||||
int numberOfBlocksPerBank, int blockSize, int bankCount,
|
||||
boolean directAllocation, int slabSize, int bufferSize, int bufferCount, boolean staticBlockCache) {
|
||||
if (!staticBlockCache) {
|
||||
LOG.info("Creating new single instance HDFS BlockCache");
|
||||
return createBlockCache(numberOfBlocksPerBank, blockSize, bankCount, directAllocation, slabSize, bufferSize, bufferCount);
|
||||
}
|
||||
LOG.info("Creating new global HDFS BlockCache");
|
||||
synchronized (HdfsDirectoryFactory.class) {
|
||||
|
||||
if (globalBlockCache == null) {
|
||||
globalBlockCache = createBlockCache(numberOfBlocksPerBank, blockSize, bankCount,
|
||||
directAllocation, slabSize, bufferSize, bufferCount);
|
||||
}
|
||||
}
|
||||
return globalBlockCache;
|
||||
}
|
||||
|
||||
private BlockCache createBlockCache(int numberOfBlocksPerBank, int blockSize,
|
||||
int bankCount, boolean directAllocation, int slabSize, int bufferSize,
|
||||
int bufferCount) {
|
||||
BufferStore.initNewBuffer(bufferSize, bufferCount);
|
||||
long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank
|
||||
* (long) blockSize;
|
||||
|
||||
BlockCache blockCache;
|
||||
try {
|
||||
blockCache = new BlockCache(metrics, directAllocation, totalMemory, slabSize, blockSize);
|
||||
} catch (OutOfMemoryError e) {
|
||||
throw new RuntimeException(
|
||||
"The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
|
||||
+ " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
|
||||
+ " your java heap size might not be large enough."
|
||||
+ " Failed allocating ~" + totalMemory / 1000000.0 + " MB.",
|
||||
e);
|
||||
}
|
||||
return blockCache;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean exists(String path) {
|
||||
Path hdfsDirPath = new Path(path);
|
||||
|
|
|
@ -66,11 +66,12 @@ public class ZkContainer {
|
|||
|
||||
initZooKeeper(cc, solrHome,
|
||||
config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(),
|
||||
config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames());
|
||||
config.getHost(), config.getLeaderVoteWait(), config.getLeaderConflictResolveWait(), config.getGenericCoreNodeNames());
|
||||
}
|
||||
|
||||
public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort,
|
||||
String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames) {
|
||||
String hostContext, String host, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames) {
|
||||
|
||||
ZkController zkController = null;
|
||||
|
||||
// if zkHost sys property is not set, we are not using ZooKeeper
|
||||
|
@ -135,7 +136,7 @@ public class ZkContainer {
|
|||
}
|
||||
zkController = new ZkController(cc, zookeeperHost, zkClientTimeout,
|
||||
zkClientConnectTimeout, host, hostPort, hostContext,
|
||||
leaderVoteWait, genericCoreNodeNames,
|
||||
leaderVoteWait, leaderConflictResolveWait, genericCoreNodeNames,
|
||||
new CurrentCoreDescriptorProvider() {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,12 +25,14 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.grouping.GroupDocs;
|
||||
import org.apache.lucene.search.grouping.SearchGroup;
|
||||
import org.apache.lucene.search.grouping.TopGroups;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
|
@ -500,12 +502,32 @@ public class QueryComponent extends SearchComponent
|
|||
|
||||
// sort ids from lowest to highest so we can access them in order
|
||||
int nDocs = docList.size();
|
||||
long[] sortedIds = new long[nDocs];
|
||||
DocIterator it = rb.getResults().docList.iterator();
|
||||
final long[] sortedIds = new long[nDocs];
|
||||
final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds
|
||||
DocList docs = rb.getResults().docList;
|
||||
DocIterator it = docs.iterator();
|
||||
for (int i=0; i<nDocs; i++) {
|
||||
sortedIds[i] = (((long)it.nextDoc()) << 32) | i;
|
||||
scores[i] = docs.hasScores() ? it.score() : Float.NaN;
|
||||
}
|
||||
Arrays.sort(sortedIds);
|
||||
|
||||
// sort ids and scores together
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
long tmpId = sortedIds[i];
|
||||
float tmpScore = scores[i];
|
||||
sortedIds[i] = sortedIds[j];
|
||||
scores[i] = scores[j];
|
||||
sortedIds[j] = tmpId;
|
||||
scores[j] = tmpScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return Long.compare(sortedIds[i], sortedIds[j]);
|
||||
}
|
||||
}.sort(0, sortedIds.length);
|
||||
|
||||
SortSpec sortSpec = rb.getSortSpec();
|
||||
Sort sort = searcher.weightSort(sortSpec.getSort());
|
||||
|
@ -527,7 +549,9 @@ public class QueryComponent extends SearchComponent
|
|||
int lastIdx = -1;
|
||||
int idx = 0;
|
||||
|
||||
for (long idAndPos : sortedIds) {
|
||||
for (int i = 0; i < sortedIds.length; ++i) {
|
||||
long idAndPos = sortedIds[i];
|
||||
float score = scores[i];
|
||||
int doc = (int)(idAndPos >>> 32);
|
||||
int position = (int)idAndPos;
|
||||
|
||||
|
@ -546,6 +570,7 @@ public class QueryComponent extends SearchComponent
|
|||
}
|
||||
|
||||
doc -= currentLeaf.docBase; // adjust for what segment this is in
|
||||
comparator.setScorer(new FakeScorer(doc, score));
|
||||
comparator.copy(0, doc);
|
||||
Object val = comparator.value(0);
|
||||
if (null != ft) val = ft.marshalSortValue(val);
|
||||
|
@ -1157,4 +1182,50 @@ public class QueryComponent extends SearchComponent
|
|||
public URL[] getDocs() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fake scorer for a single document
|
||||
*
|
||||
* TODO: when SOLR-5595 is fixed, this wont be needed, as we dont need to recompute sort values here from the comparator
|
||||
*/
|
||||
private static class FakeScorer extends Scorer {
|
||||
final int docid;
|
||||
final float score;
|
||||
|
||||
FakeScorer(int docid, float score) {
|
||||
super(null);
|
||||
this.docid = docid;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docid;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
|
||||
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
@ -90,7 +91,8 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
|
|||
|
||||
try {
|
||||
return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion,
|
||||
new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars);
|
||||
FSDirectory.open(new File(indexPath)), indexAnalyzer,
|
||||
queryAnalyzer, minPrefixChars);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
|
|
|
@ -23,8 +23,9 @@ import java.io.IOException;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
|
||||
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
|
||||
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester.BlenderType;
|
||||
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
@ -94,7 +95,9 @@ public class BlendedInfixLookupFactory extends AnalyzingInfixLookupFactory {
|
|||
|
||||
try {
|
||||
return new BlendedInfixSuggester(core.getSolrConfig().luceneMatchVersion,
|
||||
new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor);
|
||||
FSDirectory.open(new File(indexPath)),
|
||||
indexAnalyzer, queryAnalyzer, minPrefixChars,
|
||||
blenderType, numFactor);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
|
|
|
@ -24,6 +24,9 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap;
|
||||
import com.googlecode.concurrentlinkedhashmap.EvictionListener;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BlockCache {
|
||||
|
||||
public static final int _128M = 134217728;
|
||||
|
|
|
@ -16,11 +16,22 @@ package org.apache.solr.store.blockcache;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BlockCacheKey implements Cloneable {
|
||||
|
||||
private long block;
|
||||
private int file;
|
||||
private String path;
|
||||
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
public void setPath(String path) {
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
public long getBlock() {
|
||||
return block;
|
||||
|
@ -44,6 +55,7 @@ public class BlockCacheKey implements Cloneable {
|
|||
int result = 1;
|
||||
result = prime * result + (int) (block ^ (block >>> 32));
|
||||
result = prime * result + file;
|
||||
result = prime * result + ((path == null) ? 0 : path.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -55,6 +67,9 @@ public class BlockCacheKey implements Cloneable {
|
|||
BlockCacheKey other = (BlockCacheKey) obj;
|
||||
if (block != other.block) return false;
|
||||
if (file != other.file) return false;
|
||||
if (path == null) {
|
||||
if (other.path != null) return false;
|
||||
} else if (!path.equals(other.path)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,9 @@ package org.apache.solr.store.blockcache;
|
|||
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BlockCacheLocation {
|
||||
|
||||
private int block;
|
||||
|
|
|
@ -34,6 +34,9 @@ import org.apache.solr.store.hdfs.HdfsDirectory;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BlockDirectory extends Directory {
|
||||
public static Logger LOG = LoggerFactory.getLogger(BlockDirectory.class);
|
||||
|
||||
|
@ -82,7 +85,7 @@ public class BlockDirectory extends Directory {
|
|||
private Directory directory;
|
||||
private int blockSize;
|
||||
private String dirName;
|
||||
private Cache cache;
|
||||
private final Cache cache;
|
||||
private Set<String> blockCacheFileTypes;
|
||||
private final boolean blockCacheReadEnabled;
|
||||
private final boolean blockCacheWriteEnabled;
|
||||
|
@ -265,6 +268,15 @@ public class BlockDirectory extends Directory {
|
|||
return dirName + "/" + name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: mostly for tests
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public Cache getCache() {
|
||||
return cache;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(Directory to, String src, String dest, IOContext context)
|
||||
throws IOException {
|
||||
|
@ -383,4 +395,13 @@ public class BlockDirectory extends Directory {
|
|||
return directory;
|
||||
}
|
||||
|
||||
|
||||
public boolean isBlockCacheReadEnabled() {
|
||||
return blockCacheReadEnabled;
|
||||
}
|
||||
|
||||
public boolean isBlockCacheWriteEnabled() {
|
||||
return blockCacheWriteEnabled;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,17 +21,31 @@ import java.util.Map;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BlockDirectoryCache implements Cache {
|
||||
private BlockCache blockCache;
|
||||
private final BlockCache blockCache;
|
||||
private AtomicInteger counter = new AtomicInteger();
|
||||
private Map<String,Integer> names = new ConcurrentHashMap<String,Integer>();
|
||||
private String path;
|
||||
private Metrics metrics;
|
||||
|
||||
public BlockDirectoryCache(BlockCache blockCache, Metrics metrics) {
|
||||
public BlockDirectoryCache(BlockCache blockCache, String path, Metrics metrics) {
|
||||
this.blockCache = blockCache;
|
||||
this.path = path;
|
||||
this.metrics = metrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: mostly for tests
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public BlockCache getBlockCache() {
|
||||
return blockCache;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delete(String name) {
|
||||
names.remove(name);
|
||||
|
@ -46,6 +60,7 @@ public class BlockDirectoryCache implements Cache {
|
|||
names.put(name, file);
|
||||
}
|
||||
BlockCacheKey blockCacheKey = new BlockCacheKey();
|
||||
blockCacheKey.setPath(path);
|
||||
blockCacheKey.setBlock(blockId);
|
||||
blockCacheKey.setFile(file);
|
||||
blockCache.store(blockCacheKey, blockOffset, buffer, offset, length);
|
||||
|
@ -59,6 +74,7 @@ public class BlockDirectoryCache implements Cache {
|
|||
return false;
|
||||
}
|
||||
BlockCacheKey blockCacheKey = new BlockCacheKey();
|
||||
blockCacheKey.setPath(path);
|
||||
blockCacheKey.setBlock(blockId);
|
||||
blockCacheKey.setFile(file);
|
||||
boolean fetch = blockCache.fetch(blockCacheKey, b, blockOffset, off,
|
||||
|
|
|
@ -21,6 +21,9 @@ import java.util.concurrent.atomic.AtomicLongArray;
|
|||
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BlockLocks {
|
||||
|
||||
private AtomicLongArray bits;
|
||||
|
|
|
@ -22,7 +22,9 @@ import java.util.concurrent.BlockingQueue;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BufferStore implements Store {
|
||||
|
||||
private static final Store EMPTY = new Store() {
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface Cache {
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,10 +21,11 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/*
|
||||
/**
|
||||
* Cache the blocks as they are written. The cache file name is the name of
|
||||
* the file until the file is closed, at which point the cache is updated
|
||||
* to include the last modified date (which is unknown until that point).
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CachedIndexOutput extends ReusedBufferedIndexOutput {
|
||||
private final BlockDirectory directory;
|
||||
|
|
|
@ -23,6 +23,9 @@ import java.io.IOException;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class CustomBufferedIndexInput extends IndexInput {
|
||||
|
||||
public static final int BUFFER_SIZE = 32768;
|
||||
|
|
|
@ -29,6 +29,9 @@ import org.apache.hadoop.metrics.MetricsUtil;
|
|||
import org.apache.hadoop.metrics.Updater;
|
||||
import org.apache.hadoop.metrics.jvm.JvmMetrics;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Metrics implements Updater {
|
||||
|
||||
public static class MethodCall {
|
||||
|
|
|
@ -21,6 +21,9 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class ReusedBufferedIndexOutput extends IndexOutput {
|
||||
|
||||
public static final int BUFFER_SIZE = 1024;
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface Store {
|
||||
|
||||
byte[] takeBuffer(int bufferSize);
|
||||
|
|
|
@ -28,6 +28,9 @@ import org.apache.lucene.store.DataInput;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class HdfsFileReader extends DataInput {
|
||||
|
||||
public static Logger LOG = LoggerFactory.getLogger(HdfsFileReader.class);
|
||||
|
|
|
@ -32,6 +32,9 @@ import org.apache.lucene.store.DataOutput;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class HdfsFileWriter extends DataOutput implements Closeable {
|
||||
public static Logger LOG = LoggerFactory.getLogger(HdfsFileWriter.class);
|
||||
|
||||
|
|
|
@ -21,6 +21,9 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class NullIndexOutput extends IndexOutput {
|
||||
|
||||
private long pos;
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<schema name="test-custom-comparator" version="1.5">
|
||||
<types>
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
|
||||
<fieldtype name="text" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldType class="org.apache.solr.schema.WrappedIntField" name="wrapped_int"/>
|
||||
</types>
|
||||
<fields>
|
||||
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="text" type="text" indexed="true" stored="false"/>
|
||||
<field name="payload" type="wrapped_int" indexed="false"
|
||||
stored="true" multiValued="false" docValues="true" required="true"/>
|
||||
|
||||
</fields>
|
||||
<defaultSearchField>text</defaultSearchField>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
</schema>
|
|
@ -287,6 +287,16 @@ valued. -->
|
|||
class="solr.ExternalFileField"/>
|
||||
|
||||
<fieldType name="text_no_analyzer" stored="false" indexed="true" class="solr.TextField" />
|
||||
|
||||
<fieldtype name="text_length" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.LengthFilterFactory" min="2" max="32768"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
</types>
|
||||
|
||||
|
||||
|
@ -324,6 +334,9 @@ valued. -->
|
|||
|
||||
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
|
||||
|
||||
<field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="cat_length" type="text_length" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||
will be used if the name matches any of the patterns.
|
||||
RESTRICTION: the glob-like pattern in the name attribute must have
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
<int name="solr.hdfs.blockcache.blocksperbank">${solr.hdfs.blockcache.blocksperbank:1024}</int>
|
||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
|
||||
</directoryFactory>
|
||||
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
<double name="maxWriteMBPerSecRead">4000000</double>
|
||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
||||
<bool name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</bool>
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
|
||||
</directoryFactory>
|
||||
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||
|
|
|
@ -131,7 +131,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
int threadCount = 1;
|
||||
int i = 0;
|
||||
for (i = 0; i < threadCount; i++) {
|
||||
StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true);
|
||||
StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
|
||||
threads.add(indexThread);
|
||||
indexThread.start();
|
||||
}
|
||||
|
@ -270,7 +270,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
|
||||
public FullThrottleStopableIndexingThread(List<SolrServer> clients,
|
||||
String id, boolean doDeletes) {
|
||||
super(id, doDeletes);
|
||||
super(controlClient, cloudClient, id, doDeletes);
|
||||
setName("FullThrottleStopableIndexingThread");
|
||||
setDaemon(true);
|
||||
this.clients = clients;
|
||||
|
|
|
@ -108,7 +108,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
|
|||
List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
|
||||
int threadCount = 2;
|
||||
for (int i = 0; i < threadCount; i++) {
|
||||
StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true);
|
||||
StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
|
||||
threads.add(indexThread);
|
||||
indexThread.start();
|
||||
}
|
||||
|
|
|
@ -203,14 +203,14 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
|
|||
testCollectionsAPI();
|
||||
testCollectionsAPIAddRemoveStress();
|
||||
testErrorHandling();
|
||||
testNoCollectionSpecified();
|
||||
deletePartiallyCreatedCollection();
|
||||
deleteCollectionRemovesStaleZkCollectionsNode();
|
||||
clusterPropTest();
|
||||
|
||||
addReplicaTest();
|
||||
|
||||
// last
|
||||
deleteCollectionWithDownNodes();
|
||||
|
||||
if (DEBUG) {
|
||||
super.printLayout();
|
||||
}
|
||||
|
@ -579,6 +579,40 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
|
|||
assertTrue(val1.contains("SolrException") || val2.contains("SolrException"));
|
||||
}
|
||||
|
||||
private void testNoCollectionSpecified() throws Exception {
|
||||
|
||||
cloudClient.getZkStateReader().updateClusterState(true);
|
||||
assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
|
||||
assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
|
||||
|
||||
// try and create a SolrCore with no collection name
|
||||
Create createCmd = new Create();
|
||||
createCmd.setCoreName("corewithnocollection");
|
||||
createCmd.setCollection("");
|
||||
String dataDir = SolrTestCaseJ4.dataDir.getAbsolutePath() + File.separator
|
||||
+ System.currentTimeMillis() + "corewithnocollection" + "_1v";
|
||||
createCmd.setDataDir(dataDir);
|
||||
createCmd.setNumShards(1);
|
||||
if (secondConfigSet) {
|
||||
createCmd.setCollectionConfigName("conf1");
|
||||
}
|
||||
|
||||
createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
|
||||
.request(createCmd);
|
||||
|
||||
// try and create a SolrCore with no collection name
|
||||
createCmd.setCollection(null);
|
||||
createCmd.setCoreName("corewithnocollection2");
|
||||
|
||||
createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
|
||||
.request(createCmd);
|
||||
|
||||
// in both cases, the collection should have default to the core name
|
||||
cloudClient.getZkStateReader().updateClusterState(true);
|
||||
assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
|
||||
assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
|
||||
}
|
||||
|
||||
private void testNodesUsedByCreate() throws Exception {
|
||||
// we can use this client because we just want base url
|
||||
final String baseUrl = getBaseUrl((HttpSolrServer) clients.get(0));
|
||||
|
@ -631,7 +665,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
|
|||
boolean disableLegacy = random().nextBoolean();
|
||||
CloudSolrServer client1 = null;
|
||||
|
||||
if(disableLegacy) {
|
||||
if (disableLegacy) {
|
||||
log.info("legacyCloud=false");
|
||||
client1 = createCloudClient(null);
|
||||
setClusterProp(client1, ZkStateReader.LEGACY_CLOUD, "false");
|
||||
|
|
|
@ -64,18 +64,18 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
private List<Overseer> overseers = new ArrayList<Overseer>();
|
||||
private List<ZkStateReader> readers = new ArrayList<ZkStateReader>();
|
||||
|
||||
private String collection = "collection1";
|
||||
|
||||
public static class MockZKController{
|
||||
|
||||
private final SolrZkClient zkClient;
|
||||
private final ZkStateReader zkStateReader;
|
||||
private final String nodeName;
|
||||
private final String collection;
|
||||
private final LeaderElector elector;
|
||||
private final Map<String, ElectionContext> electionContext = Collections.synchronizedMap(new HashMap<String, ElectionContext>());
|
||||
|
||||
public MockZKController(String zkAddress, String nodeName, String collection) throws InterruptedException, TimeoutException, IOException, KeeperException {
|
||||
public MockZKController(String zkAddress, String nodeName) throws InterruptedException, TimeoutException, IOException, KeeperException {
|
||||
this.nodeName = nodeName;
|
||||
this.collection = collection;
|
||||
zkClient = new SolrZkClient(zkAddress, TIMEOUT);
|
||||
zkStateReader = new ZkStateReader(zkClient);
|
||||
zkStateReader.createClusterStateWatchersAndUpdate();
|
||||
|
@ -105,7 +105,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
zkClient.close();
|
||||
}
|
||||
|
||||
public String publishState(String coreName, String coreNodeName, String stateName, int numShards)
|
||||
public String publishState(String collection, String coreName, String coreNodeName, String stateName, int numShards)
|
||||
throws KeeperException, InterruptedException, IOException {
|
||||
if (stateName == null) {
|
||||
ElectionContext ec = electionContext.remove(coreName);
|
||||
|
@ -134,41 +134,40 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
q.offer(ZkStateReader.toJSON(m));
|
||||
}
|
||||
|
||||
for (int i = 0; i < 120; i++) {
|
||||
String shardId = getShardId("http://" + nodeName + "/solr/", coreName);
|
||||
if (shardId != null) {
|
||||
try {
|
||||
zkClient.makePath("/collections/" + collection + "/leader_elect/"
|
||||
+ shardId + "/election", true);
|
||||
} catch (NodeExistsException nee) {}
|
||||
ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
|
||||
"http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP,
|
||||
nodeName, ZkStateReader.CORE_NAME_PROP, coreName,
|
||||
ZkStateReader.SHARD_ID_PROP, shardId,
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
|
||||
ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
|
||||
elector, shardId, collection, nodeName + "_" + coreName, props,
|
||||
zkStateReader);
|
||||
elector.setup(ctx);
|
||||
elector.joinElection(ctx, false);
|
||||
return shardId;
|
||||
if (collection.length() > 0) {
|
||||
for (int i = 0; i < 120; i++) {
|
||||
String shardId = getShardId(collection, coreNodeName);
|
||||
if (shardId != null) {
|
||||
try {
|
||||
zkClient.makePath("/collections/" + collection + "/leader_elect/"
|
||||
+ shardId + "/election", true);
|
||||
} catch (NodeExistsException nee) {}
|
||||
ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
|
||||
"http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP,
|
||||
nodeName, ZkStateReader.CORE_NAME_PROP, coreName,
|
||||
ZkStateReader.SHARD_ID_PROP, shardId,
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
|
||||
ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
|
||||
elector, shardId, collection, nodeName + "_" + coreName, props,
|
||||
zkStateReader);
|
||||
elector.setup(ctx);
|
||||
elector.joinElection(ctx, false);
|
||||
return shardId;
|
||||
}
|
||||
Thread.sleep(500);
|
||||
}
|
||||
Thread.sleep(500);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getShardId(final String baseUrl, final String coreName) {
|
||||
Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap(
|
||||
collection);
|
||||
private String getShardId(String collection, String coreNodeName) {
|
||||
Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap(collection);
|
||||
if (slices != null) {
|
||||
for (Slice slice : slices.values()) {
|
||||
for (Replica replica : slice.getReplicas()) {
|
||||
// TODO: for really large clusters, we could 'index' on this
|
||||
String rbaseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
|
||||
String rcore = replica.getStr(ZkStateReader.CORE_NAME_PROP);
|
||||
if (baseUrl.equals(rbaseUrl) && coreName.equals(rcore)) {
|
||||
String cnn = replica.getName();
|
||||
if (coreNodeName.equals(cnn)) {
|
||||
return slice.getName();
|
||||
}
|
||||
}
|
||||
|
@ -226,17 +225,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader reader = new ZkStateReader(zkClient);
|
||||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1", "collection1");
|
||||
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
|
||||
|
||||
final int numShards=6;
|
||||
|
||||
for (int i = 0; i < numShards; i++) {
|
||||
assertNotNull("shard got no id?", zkController.publishState("core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
|
||||
assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
|
||||
}
|
||||
|
||||
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap().size());
|
||||
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size());
|
||||
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size());
|
||||
Map<String,Replica> rmap = reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap();
|
||||
assertEquals(rmap.toString(), 2, rmap.size());
|
||||
assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size());
|
||||
assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size());
|
||||
|
||||
//make sure leaders are in cloud state
|
||||
assertNotNull(reader.getLeaderUrl("collection1", "shard1", 15000));
|
||||
|
@ -258,6 +257,81 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadQueueItem() throws Exception {
|
||||
String zkDir = dataDir.getAbsolutePath() + File.separator
|
||||
+ "zookeeper/server1/data";
|
||||
|
||||
ZkTestServer server = new ZkTestServer(zkDir);
|
||||
|
||||
MockZKController zkController = null;
|
||||
SolrZkClient zkClient = null;
|
||||
SolrZkClient overseerClient = null;
|
||||
|
||||
try {
|
||||
server.run();
|
||||
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
|
||||
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
|
||||
|
||||
zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
|
||||
zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
ZkStateReader reader = new ZkStateReader(zkClient);
|
||||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
|
||||
|
||||
final int numShards=3;
|
||||
|
||||
for (int i = 0; i < numShards; i++) {
|
||||
assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
|
||||
}
|
||||
|
||||
assertEquals(1, reader.getClusterState().getSlice(collection, "shard1").getReplicasMap().size());
|
||||
assertEquals(1, reader.getClusterState().getSlice(collection, "shard2").getReplicasMap().size());
|
||||
assertEquals(1, reader.getClusterState().getSlice(collection, "shard3").getReplicasMap().size());
|
||||
|
||||
//make sure leaders are in cloud state
|
||||
assertNotNull(reader.getLeaderUrl(collection, "shard1", 15000));
|
||||
assertNotNull(reader.getLeaderUrl(collection, "shard2", 15000));
|
||||
assertNotNull(reader.getLeaderUrl(collection, "shard3", 15000));
|
||||
|
||||
// publish a bad queue item
|
||||
String emptyCollectionName = "";
|
||||
zkController.publishState(emptyCollectionName, "core0", "node0", ZkStateReader.ACTIVE, 1);
|
||||
zkController.publishState(emptyCollectionName, "core0", "node0", null, 1);
|
||||
|
||||
// make sure the Overseer is still processing items
|
||||
for (int i = 0; i < numShards; i++) {
|
||||
assertNotNull("shard got no id?", zkController.publishState("collection2", "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
|
||||
}
|
||||
|
||||
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard1").getReplicasMap().size());
|
||||
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard2").getReplicasMap().size());
|
||||
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard3").getReplicasMap().size());
|
||||
|
||||
//make sure leaders are in cloud state
|
||||
assertNotNull(reader.getLeaderUrl("collection2", "shard1", 15000));
|
||||
assertNotNull(reader.getLeaderUrl("collection2", "shard2", 15000));
|
||||
assertNotNull(reader.getLeaderUrl("collection2", "shard3", 15000));
|
||||
|
||||
} finally {
|
||||
if (DEBUG) {
|
||||
if (zkController != null) {
|
||||
zkClient.printLayoutToStdOut();
|
||||
}
|
||||
}
|
||||
close(zkClient);
|
||||
if (zkController != null) {
|
||||
zkController.close();
|
||||
}
|
||||
close(overseerClient);
|
||||
server.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShardAssignmentBigger() throws Exception {
|
||||
String zkDir = dataDir.getAbsolutePath() + File.separator
|
||||
|
@ -289,7 +363,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
for (int i = 0; i < nodeCount; i++) {
|
||||
controllers[i] = new MockZKController(server.getZkAddress(), "node" + i, "collection1");
|
||||
controllers[i] = new MockZKController(server.getZkAddress(), "node" + i);
|
||||
}
|
||||
for (int i = 0; i < nodeCount; i++) {
|
||||
nodeExecutors[i] = Executors.newFixedThreadPool(1, new DefaultSolrThreadFactory("testShardAssignment"));
|
||||
|
@ -306,7 +380,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
final String coreName = "core" + slot;
|
||||
|
||||
try {
|
||||
ids[slot]=controllers[slot % nodeCount].publishState(coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount);
|
||||
ids[slot]=controllers[slot % nodeCount].publishState(collection, coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount);
|
||||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
fail("register threw exception:" + e.getClass());
|
||||
|
@ -551,21 +625,20 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
reader = new ZkStateReader(zkClient);
|
||||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1",
|
||||
"collection1");
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
Thread.sleep(1000);
|
||||
mockController.publishState("core1", "core_node1",
|
||||
mockController.publishState(collection, "core1", "core_node1",
|
||||
ZkStateReader.RECOVERING, 1);
|
||||
|
||||
waitForCollections(reader, "collection1");
|
||||
waitForCollections(reader, collection);
|
||||
verifyStatus(reader, ZkStateReader.RECOVERING);
|
||||
|
||||
int version = getClusterStateVersion(zkClient);
|
||||
|
||||
mockController.publishState("core1", "core_node1", ZkStateReader.ACTIVE,
|
||||
mockController.publishState(collection, "core1", "core_node1", ZkStateReader.ACTIVE,
|
||||
1);
|
||||
|
||||
while (version == getClusterStateVersion(zkClient));
|
||||
|
@ -575,7 +648,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
overseerClient.close();
|
||||
Thread.sleep(1000); // wait for overseer to get killed
|
||||
|
||||
mockController.publishState("core1", "core_node1",
|
||||
mockController.publishState(collection, "core1", "core_node1",
|
||||
ZkStateReader.RECOVERING, 1);
|
||||
version = getClusterStateVersion(zkClient);
|
||||
|
||||
|
@ -588,13 +661,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
assertEquals("Live nodes count does not match", 1, reader
|
||||
.getClusterState().getLiveNodes().size());
|
||||
assertEquals("Shard count does not match", 1, reader.getClusterState()
|
||||
.getSlice("collection1", "shard1").getReplicasMap().size());
|
||||
.getSlice(collection, "shard1").getReplicasMap().size());
|
||||
version = getClusterStateVersion(zkClient);
|
||||
mockController.publishState("core1", "core_node1", null, 1);
|
||||
mockController.publishState(collection, "core1", "core_node1", null, 1);
|
||||
while (version == getClusterStateVersion(zkClient));
|
||||
Thread.sleep(500);
|
||||
assertFalse("collection1 should be gone after publishing the null state",
|
||||
reader.getClusterState().getCollections().contains("collection1"));
|
||||
reader.getClusterState().getCollections().contains(collection));
|
||||
} finally {
|
||||
close(mockController);
|
||||
close(overseerClient);
|
||||
|
@ -676,17 +749,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
for (int i = 0; i < atLeast(4); i++) {
|
||||
killCounter.incrementAndGet(); //for each round allow 1 kill
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
|
||||
mockController.publishState("core1", "node1", "state1",1);
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
mockController.publishState(collection, "core1", "node1", "state1",1);
|
||||
if(mockController2!=null) {
|
||||
mockController2.close();
|
||||
mockController2 = null;
|
||||
}
|
||||
mockController.publishState("core1", "node1","state2",1);
|
||||
mockController2 = new MockZKController(server.getZkAddress(), "node2", "collection1");
|
||||
mockController.publishState("core1", "node1", "state1",1);
|
||||
mockController.publishState(collection, "core1", "node1","state2",1);
|
||||
mockController2 = new MockZKController(server.getZkAddress(), "node2");
|
||||
mockController.publishState(collection, "core1", "node1", "state1",1);
|
||||
verifyShardLeader(reader, "collection1", "shard1", "core1");
|
||||
mockController2.publishState("core4", "node2", "state2" ,1);
|
||||
mockController2.publishState(collection, "core4", "node2", "state2" ,1);
|
||||
mockController.close();
|
||||
mockController = null;
|
||||
verifyShardLeader(reader, "collection1", "shard1", "core4");
|
||||
|
@ -729,11 +802,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
reader = new ZkStateReader(controllerClient);
|
||||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1);
|
||||
mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
|
||||
|
||||
waitForCollections(reader, "collection1");
|
||||
|
||||
|
@ -743,8 +816,8 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
int version = getClusterStateVersion(controllerClient);
|
||||
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
|
||||
mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1);
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
|
||||
|
||||
while (version == getClusterStateVersion(controllerClient));
|
||||
|
||||
|
@ -794,11 +867,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
reader = new ZkStateReader(controllerClient);
|
||||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
mockController.publishState("core1", "node1", ZkStateReader.RECOVERING, 12);
|
||||
mockController.publishState(collection, "core1", "node1", ZkStateReader.RECOVERING, 12);
|
||||
|
||||
waitForCollections(reader, "collection1");
|
||||
|
||||
|
|
|
@ -66,10 +66,10 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
int maxDoc = maxDocList[random().nextInt(maxDocList.length - 1)];
|
||||
|
||||
indexThread = new StopableIndexingThread("1", true, maxDoc);
|
||||
indexThread = new StopableIndexingThread(controlClient, cloudClient, "1", true, maxDoc);
|
||||
indexThread.start();
|
||||
|
||||
indexThread2 = new StopableIndexingThread("2", true, maxDoc);
|
||||
indexThread2 = new StopableIndexingThread(controlClient, cloudClient, "2", true, maxDoc);
|
||||
|
||||
indexThread2.start();
|
||||
|
||||
|
@ -100,7 +100,7 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
Thread.sleep(1000);
|
||||
|
||||
waitForThingsToLevelOut(45);
|
||||
waitForThingsToLevelOut(90);
|
||||
|
||||
Thread.sleep(2000);
|
||||
|
||||
|
|
|
@ -103,6 +103,8 @@ public class TestDistribDocBasedVersion extends AbstractFullDistribZkTestBase {
|
|||
doTestDocVersions();
|
||||
doTestHardFail();
|
||||
|
||||
commit(); // work arround SOLR-5628
|
||||
|
||||
testFinished = true;
|
||||
} finally {
|
||||
if (!testFinished) {
|
||||
|
|
|
@ -190,7 +190,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
|
|||
cc = getCoreContainer();
|
||||
|
||||
ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
|
||||
"127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
|
||||
"127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
|
||||
|
||||
@Override
|
||||
public List<CoreDescriptor> getCurrentDescriptors() {
|
||||
|
@ -230,7 +230,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
|
|||
cc = getCoreContainer();
|
||||
|
||||
zkController = new ZkController(cc, server.getZkAddress(),
|
||||
TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
|
||||
TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
|
||||
|
||||
@Override
|
||||
public List<CoreDescriptor> getCurrentDescriptors() {
|
||||
|
@ -284,7 +284,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
|
|||
|
||||
try {
|
||||
zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
|
||||
"http://127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
|
||||
"http://127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
|
||||
|
||||
@Override
|
||||
public List<CoreDescriptor> getCurrentDescriptors() {
|
||||
|
|
|
@ -64,6 +64,8 @@ public class HdfsTestUtil {
|
|||
|
||||
System.setProperty("solr.hdfs.home", "/solr_hdfs_home");
|
||||
|
||||
System.setProperty("solr.hdfs.blockcache.global", Boolean.toString(LuceneTestCase.random().nextBoolean()));
|
||||
|
||||
final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
|
||||
dfsCluster.waitActive();
|
||||
|
||||
|
@ -92,6 +94,7 @@ public class HdfsTestUtil {
|
|||
System.clearProperty("test.build.data");
|
||||
System.clearProperty("test.cache.data");
|
||||
System.clearProperty("solr.hdfs.home");
|
||||
System.clearProperty("solr.hdfs.blockcache.global");
|
||||
if (dfsCluster != null) {
|
||||
timers.remove(dfsCluster);
|
||||
dfsCluster.shutdown();
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.cloud.hdfs;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.NRTCachingDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase.Nightly;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrServer;
|
||||
import org.apache.solr.cloud.BasicDistributedZkTest;
|
||||
import org.apache.solr.cloud.StopableIndexingThread;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.HdfsDirectoryFactory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.servlet.SolrDispatchFilter;
|
||||
import org.apache.solr.store.blockcache.BlockCache;
|
||||
import org.apache.solr.store.blockcache.BlockDirectory;
|
||||
import org.apache.solr.store.blockcache.BlockDirectoryCache;
|
||||
import org.apache.solr.store.blockcache.Cache;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||
|
||||
@Slow
|
||||
@Nightly
|
||||
@ThreadLeakScope(Scope.NONE) // hdfs client currently leaks thread(s)
|
||||
public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest {
|
||||
private static final String SOLR_HDFS_HOME = "solr.hdfs.home";
|
||||
private static final String SOLR_HDFS_BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
|
||||
private static final String ACOLLECTION = "acollection";
|
||||
private static MiniDFSCluster dfsCluster;
|
||||
|
||||
@BeforeClass
|
||||
public static void setupClass() throws Exception {
|
||||
schemaString = "schema15.xml"; // we need a string id
|
||||
dfsCluster = HdfsTestUtil.setupClass(new File(TEMP_DIR,
|
||||
HdfsBasicDistributedZk2Test.class.getName() + "_"
|
||||
+ System.currentTimeMillis()).getAbsolutePath());
|
||||
System.setProperty(SOLR_HDFS_HOME, dfsCluster.getURI().toString() + "/solr");
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void teardownClass() throws Exception {
|
||||
HdfsTestUtil.teardownClass(dfsCluster);
|
||||
System.clearProperty(SOLR_HDFS_HOME);
|
||||
dfsCluster = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDataDir(String dataDir) throws IOException {
|
||||
return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
|
||||
}
|
||||
|
||||
public HdfsWriteToMultipleCollectionsTest() {
|
||||
super();
|
||||
sliceCount = 1;
|
||||
shardCount = 3;
|
||||
}
|
||||
|
||||
protected String getSolrXml() {
|
||||
return "solr-no-core.xml";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
int docCount = random().nextInt(1313) + 1;
|
||||
int cnt = random().nextInt(4) + 1;
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
createCollection(ACOLLECTION + i, 2, 2, 9);
|
||||
}
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
waitForRecoveriesToFinish(ACOLLECTION + i, false);
|
||||
}
|
||||
List<CloudSolrServer> cloudServers = new ArrayList<CloudSolrServer>();
|
||||
List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
CloudSolrServer server = new CloudSolrServer(zkServer.getZkAddress());
|
||||
server.setDefaultCollection(ACOLLECTION + i);
|
||||
cloudServers.add(server);
|
||||
StopableIndexingThread indexThread = new StopableIndexingThread(null, server, "1", true, docCount);
|
||||
threads.add(indexThread);
|
||||
indexThread.start();
|
||||
}
|
||||
|
||||
int addCnt = 0;
|
||||
for (StopableIndexingThread thread : threads) {
|
||||
thread.join();
|
||||
addCnt += thread.getNumAdds() - thread.getNumDeletes();
|
||||
}
|
||||
|
||||
long collectionsCount = 0;
|
||||
for (CloudSolrServer server : cloudServers) {
|
||||
server.commit();
|
||||
collectionsCount += server.query(new SolrQuery("*:*")).getResults().getNumFound();
|
||||
}
|
||||
|
||||
for (CloudSolrServer server : cloudServers) {
|
||||
server.shutdown();
|
||||
}
|
||||
|
||||
assertEquals(addCnt, collectionsCount);
|
||||
|
||||
BlockCache lastBlockCache = null;
|
||||
// assert that we are using the block directory and that write and read caching are being used
|
||||
for (JettySolrRunner jetty : jettys) {
|
||||
CoreContainer cores = ((SolrDispatchFilter) jetty.getDispatchFilter()
|
||||
.getFilter()).getCores();
|
||||
Collection<SolrCore> solrCores = cores.getCores();
|
||||
for (SolrCore core : solrCores) {
|
||||
if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName()
|
||||
.startsWith(ACOLLECTION)) {
|
||||
assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory);
|
||||
RefCounted<IndexWriter> iwRef = core.getUpdateHandler()
|
||||
.getSolrCoreState().getIndexWriter(core);
|
||||
try {
|
||||
IndexWriter iw = iwRef.get();
|
||||
NRTCachingDirectory directory = (NRTCachingDirectory) iw
|
||||
.getDirectory();
|
||||
BlockDirectory blockDirectory = (BlockDirectory) directory
|
||||
.getDelegate();
|
||||
assertTrue(blockDirectory.isBlockCacheReadEnabled());
|
||||
assertTrue(blockDirectory.isBlockCacheWriteEnabled());
|
||||
Cache cache = blockDirectory.getCache();
|
||||
// we know its a BlockDirectoryCache, but future proof
|
||||
assertTrue(cache instanceof BlockDirectoryCache);
|
||||
BlockCache blockCache = ((BlockDirectoryCache) cache)
|
||||
.getBlockCache();
|
||||
if (lastBlockCache != null) {
|
||||
if (Boolean.getBoolean(SOLR_HDFS_BLOCKCACHE_GLOBAL)) {
|
||||
assertEquals(lastBlockCache, blockCache);
|
||||
} else {
|
||||
assertNotSame(lastBlockCache, blockCache);
|
||||
}
|
||||
}
|
||||
lastBlockCache = blockCache;
|
||||
} finally {
|
||||
iwRef.decref();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -30,7 +30,11 @@ import org.apache.solr.util.RefCounted;
|
|||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class TestNonNRTOpen extends SolrTestCaseJ4 {
|
||||
private static final Logger log = LoggerFactory.getLogger(TestNonNRTOpen.class);
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
|
@ -80,6 +84,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
|
|||
|
||||
// core reload
|
||||
String core = h.getCore().getName();
|
||||
log.info("Reloading core: " + h.getCore().toString());
|
||||
h.getCoreContainer().reload(core);
|
||||
assertNotNRT(1);
|
||||
|
||||
|
@ -90,6 +95,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
|
|||
|
||||
// add a doc and core reload
|
||||
assertU(adoc("bazz", "doc2"));
|
||||
log.info("Reloading core: " + h.getCore().toString());
|
||||
h.getCoreContainer().reload(core);
|
||||
assertNotNRT(3);
|
||||
}
|
||||
|
@ -127,11 +133,15 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
static void assertNotNRT(int maxDoc) {
|
||||
RefCounted<SolrIndexSearcher> searcher = h.getCore().getSearcher();
|
||||
SolrCore core = h.getCore();
|
||||
log.info("Checking notNRT & maxDoc=" + maxDoc + " of core=" + core.toString());
|
||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||
try {
|
||||
DirectoryReader ir = searcher.get().getIndexReader();
|
||||
assertEquals(maxDoc, ir.maxDoc());
|
||||
assertFalse("expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt"));
|
||||
SolrIndexSearcher s = searcher.get();
|
||||
DirectoryReader ir = s.getIndexReader();
|
||||
assertEquals("SOLR-5815? : wrong maxDoc: core=" + core.toString() +" searcher=" + s.toString(),
|
||||
maxDoc, ir.maxDoc());
|
||||
assertFalse("SOLR-5815? : expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt"));
|
||||
} finally {
|
||||
searcher.decref();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
package org.apache.solr.schema;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.expressions.Expression;
|
||||
import org.apache.lucene.expressions.SimpleBindings;
|
||||
import org.apache.lucene.expressions.js.JavascriptCompiler;
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
||||
/**
|
||||
* Custom field wrapping an int, to test sorting via a custom comparator.
|
||||
*/
|
||||
public class WrappedIntField extends TrieIntField {
|
||||
Expression expr;
|
||||
|
||||
public WrappedIntField() {
|
||||
try {
|
||||
expr = JavascriptCompiler.compile("payload % 3");
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("impossible?", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(final SchemaField field, final boolean reverse) {
|
||||
field.checkSortability();
|
||||
SimpleBindings bindings = new SimpleBindings();
|
||||
bindings.add(super.getSortField(field, reverse));
|
||||
return expr.getSortField(bindings, reverse);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
||||
/**
|
||||
* Test QueryComponent.doFieldSortValues
|
||||
*/
|
||||
@SuppressCodecs({"Lucene3x"})
|
||||
public class TestFieldSortValues extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-minimal.xml", "schema-field-sort-values.xml");
|
||||
}
|
||||
|
||||
public void testCustomComparator() throws Exception {
|
||||
clearIndex();
|
||||
assertU(adoc(sdoc("id", "1", "payload", "2")));
|
||||
assertU(adoc(sdoc("id", "2", "payload", "3")));
|
||||
assertU(adoc(sdoc("id", "3", "payload", "1")));
|
||||
assertU(adoc(sdoc("id", "4", "payload", "5")));
|
||||
assertU(adoc(sdoc("id", "5", "payload", "4")));
|
||||
assertU(commit());
|
||||
|
||||
// payload is backed by a custom sort field which returns the payload value mod 3
|
||||
assertQ(req("q", "*:*", "fl", "id", "sort", "payload asc, id asc", "fsv", "true")
|
||||
, "//result/doc[int='2' and position()=1]"
|
||||
, "//result/doc[int='3' and position()=2]"
|
||||
, "//result/doc[int='5' and position()=3]"
|
||||
, "//result/doc[int='1' and position()=4]"
|
||||
, "//result/doc[int='4' and position()=5]");
|
||||
}
|
||||
}
|
|
@ -51,6 +51,7 @@ public class BlockCacheTest extends LuceneTestCase {
|
|||
int file = 0;
|
||||
blockCacheKey.setBlock(block);
|
||||
blockCacheKey.setFile(file);
|
||||
blockCacheKey.setPath("/");
|
||||
|
||||
if (blockCache.fetch(blockCacheKey, buffer)) {
|
||||
hitsInCache.incrementAndGet();
|
||||
|
@ -91,6 +92,7 @@ public class BlockCacheTest extends LuceneTestCase {
|
|||
BlockCacheKey blockCacheKey = new BlockCacheKey();
|
||||
blockCacheKey.setBlock(0);
|
||||
blockCacheKey.setFile(0);
|
||||
blockCacheKey.setPath("/");
|
||||
byte[] newData = new byte[blockSize*3];
|
||||
byte[] testData = testData(random, blockSize, newData);
|
||||
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
package org.apache.solr.update;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestExceedMaxTermLength extends SolrTestCaseJ4 {
|
||||
|
||||
public final static String TEST_SOLRCONFIG_NAME = "solrconfig.xml";
|
||||
public final static String TEST_SCHEMAXML_NAME = "schema11.xml";
|
||||
|
||||
private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
|
||||
private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() throws Exception {
|
||||
initCore(TEST_SOLRCONFIG_NAME, TEST_SCHEMAXML_NAME);
|
||||
}
|
||||
|
||||
@After
|
||||
public void cleanup() throws Exception {
|
||||
assertU(delQ("*:*"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExceededMaxTermLength(){
|
||||
|
||||
// problematic field
|
||||
final String longFieldName = "cat";
|
||||
final String longFieldValue = TestUtil.randomSimpleString(random(),
|
||||
minTestTermLength,
|
||||
maxTestTermLegnth);
|
||||
|
||||
final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
|
||||
final String okayFieldValue = TestUtil.randomSimpleString(random(),
|
||||
minTestTermLength,
|
||||
maxTestTermLegnth);
|
||||
|
||||
boolean includeOkayFields = random().nextBoolean();
|
||||
|
||||
if(random().nextBoolean()) {
|
||||
//Use XML
|
||||
String doc;
|
||||
if(includeOkayFields) {
|
||||
doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
|
||||
} else {
|
||||
doc = adoc("id", "1", longFieldName, longFieldValue);
|
||||
}
|
||||
assertFailedU(doc);
|
||||
} else {
|
||||
//Use JSON
|
||||
try {
|
||||
if(includeOkayFields) {
|
||||
String jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
|
||||
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue,
|
||||
okayFieldName, okayFieldValue);
|
||||
updateJ(json(jsonStr), null);
|
||||
} else {
|
||||
String jsonStr = "[{'id':'1','%s':'%s'}]";
|
||||
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
|
||||
updateJ(json(jsonStr), null);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
//expected
|
||||
String msg= e.getCause().getMessage();
|
||||
assertTrue(msg.contains("one immense term in field=\"cat\""));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("q", "*:*"), "//*[@numFound='0']");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExceededMaxTermLengthWithLimitingFilter(){
|
||||
|
||||
// problematic field
|
||||
final String longFieldName = "cat_length";
|
||||
final String longFieldValue = TestUtil.randomSimpleString(random(),
|
||||
minTestTermLength,
|
||||
maxTestTermLegnth);
|
||||
|
||||
final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
|
||||
final String okayFieldValue = TestUtil.randomSimpleString(random(),
|
||||
minTestTermLength,
|
||||
maxTestTermLegnth);
|
||||
|
||||
boolean includeOkayFields = random().nextBoolean();
|
||||
|
||||
if(random().nextBoolean()) {
|
||||
//Use XML
|
||||
String doc;
|
||||
if(includeOkayFields) {
|
||||
doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
|
||||
} else {
|
||||
doc = adoc("id", "1", longFieldName, longFieldValue);
|
||||
}
|
||||
assertU(doc);
|
||||
} else {
|
||||
//Use JSON
|
||||
String jsonStr = null;
|
||||
try {
|
||||
if(includeOkayFields) {
|
||||
jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
|
||||
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue,
|
||||
okayFieldName, okayFieldValue);
|
||||
updateJ(json(jsonStr), null);
|
||||
} else {
|
||||
jsonStr = "[{'id':'1','%s':'%s'}]";
|
||||
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
|
||||
updateJ(json(jsonStr), null);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
//expected
|
||||
fail("Should not have failed adding doc " + jsonStr);
|
||||
String msg= e.getCause().getMessage();
|
||||
assertTrue(msg.contains("one immense term in field=\"cat\""));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("q", "*:*"), "//*[@numFound='1']");
|
||||
}
|
||||
}
|
|
@ -129,6 +129,9 @@
|
|||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
||||
<!-- Enable/Disable the hdfs cache. -->
|
||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
||||
|
||||
</directoryFactory>
|
||||
|
||||
|
|
|
@ -626,7 +626,7 @@ public class ZkStateReader {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the baseURL corrisponding to a given node's nodeName --
|
||||
* Returns the baseURL corresponding to a given node's nodeName --
|
||||
* NOTE: does not (currently) imply that the nodeName (or resulting
|
||||
* baseURL) exists in the cluster.
|
||||
* @lucene.experimental
|
||||
|
|
|
@ -1428,122 +1428,13 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
|
|||
return rsp;
|
||||
}
|
||||
|
||||
abstract class StopableThread extends Thread {
|
||||
static abstract class StopableThread extends Thread {
|
||||
public StopableThread(String name) {
|
||||
super(name);
|
||||
}
|
||||
public abstract void safeStop();
|
||||
}
|
||||
|
||||
class StopableIndexingThread extends StopableThread {
|
||||
private volatile boolean stop = false;
|
||||
protected final String id;
|
||||
protected final List<String> deletes = new ArrayList<String>();
|
||||
protected Set<String> addFails = new HashSet<String>();
|
||||
protected Set<String> deleteFails = new HashSet<String>();
|
||||
protected boolean doDeletes;
|
||||
private int numCycles;
|
||||
|
||||
public StopableIndexingThread(String id, boolean doDeletes) {
|
||||
this(id, doDeletes, -1);
|
||||
}
|
||||
|
||||
public StopableIndexingThread(String id, boolean doDeletes, int numCycles) {
|
||||
super("StopableIndexingThread");
|
||||
this.id = id;
|
||||
this.doDeletes = doDeletes;
|
||||
this.numCycles = numCycles;
|
||||
setDaemon(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
int i = 0;
|
||||
int numDone = 0;
|
||||
int numDeletes = 0;
|
||||
int numAdds = 0;
|
||||
|
||||
while (true && !stop) {
|
||||
if (numCycles != -1) {
|
||||
if (numDone > numCycles) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
++numDone;
|
||||
String id = this.id + "-" + i;
|
||||
++i;
|
||||
boolean addFailed = false;
|
||||
|
||||
if (doDeletes && random().nextBoolean() && deletes.size() > 0) {
|
||||
String delete = deletes.remove(0);
|
||||
try {
|
||||
numDeletes++;
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
req.deleteById(delete);
|
||||
req.setParam("CONTROL", "TRUE");
|
||||
req.process(controlClient);
|
||||
|
||||
cloudClient.deleteById(delete);
|
||||
} catch (Exception e) {
|
||||
System.err.println("REQUEST FAILED:");
|
||||
e.printStackTrace();
|
||||
if (e instanceof SolrServerException) {
|
||||
System.err.println("ROOT CAUSE:");
|
||||
((SolrServerException) e).getRootCause().printStackTrace();
|
||||
}
|
||||
deleteFails.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
numAdds++;
|
||||
indexr("id", id, i1, 50, t1,
|
||||
"to come to the aid of their country.");
|
||||
} catch (Exception e) {
|
||||
addFailed = true;
|
||||
System.err.println("REQUEST FAILED:");
|
||||
e.printStackTrace();
|
||||
if (e instanceof SolrServerException) {
|
||||
System.err.println("ROOT CAUSE:");
|
||||
((SolrServerException) e).getRootCause().printStackTrace();
|
||||
}
|
||||
addFails.add(id);
|
||||
}
|
||||
|
||||
if (!addFailed && doDeletes && random().nextBoolean()) {
|
||||
deletes.add(id);
|
||||
}
|
||||
|
||||
try {
|
||||
Thread.currentThread().sleep(random().nextInt(100));
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
|
||||
+ " deletes:" + numDeletes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void safeStop() {
|
||||
stop = true;
|
||||
}
|
||||
|
||||
public Set<String> getAddFails() {
|
||||
return addFails;
|
||||
}
|
||||
|
||||
public Set<String> getDeleteFails() {
|
||||
return deleteFails;
|
||||
}
|
||||
|
||||
public int getFailCount() {
|
||||
return addFails.size() + deleteFails.size();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class StopableSearchThread extends StopableThread {
|
||||
private volatile boolean stop = false;
|
||||
protected final AtomicInteger queryFails = new AtomicInteger();
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class StopableIndexingThread extends AbstractFullDistribZkTestBase.StopableThread {
|
||||
private static String t1 = "a_t";
|
||||
private static String i1 = "a_si";
|
||||
private volatile boolean stop = false;
|
||||
protected final String id;
|
||||
protected final List<String> deletes = new ArrayList<String>();
|
||||
protected Set<String> addFails = new HashSet<String>();
|
||||
protected Set<String> deleteFails = new HashSet<String>();
|
||||
protected boolean doDeletes;
|
||||
private int numCycles;
|
||||
private SolrServer controlClient;
|
||||
private SolrServer cloudClient;
|
||||
private int numDeletes;
|
||||
private int numAdds;
|
||||
|
||||
public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes) {
|
||||
this(controlClient, cloudClient, id, doDeletes, -1);
|
||||
}
|
||||
|
||||
public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes, int numCycles) {
|
||||
super("StopableIndexingThread");
|
||||
this.controlClient = controlClient;
|
||||
this.cloudClient = cloudClient;
|
||||
this.id = id;
|
||||
this.doDeletes = doDeletes;
|
||||
this.numCycles = numCycles;
|
||||
setDaemon(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
int i = 0;
|
||||
int numDone = 0;
|
||||
numDeletes = 0;
|
||||
numAdds = 0;
|
||||
|
||||
while (true && !stop) {
|
||||
if (numCycles != -1) {
|
||||
if (numDone > numCycles) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
++numDone;
|
||||
String id = this.id + "-" + i;
|
||||
++i;
|
||||
boolean addFailed = false;
|
||||
|
||||
if (doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean() && deletes.size() > 0) {
|
||||
String delete = deletes.remove(0);
|
||||
try {
|
||||
numDeletes++;
|
||||
if (controlClient != null) {
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
req.deleteById(delete);
|
||||
req.setParam("CONTROL", "TRUE");
|
||||
req.process(controlClient);
|
||||
}
|
||||
|
||||
cloudClient.deleteById(delete);
|
||||
} catch (Exception e) {
|
||||
System.err.println("REQUEST FAILED:");
|
||||
e.printStackTrace();
|
||||
if (e instanceof SolrServerException) {
|
||||
System.err.println("ROOT CAUSE:");
|
||||
((SolrServerException) e).getRootCause().printStackTrace();
|
||||
}
|
||||
deleteFails.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
numAdds++;
|
||||
indexr("id", id, i1, 50, t1,
|
||||
"to come to the aid of their country.");
|
||||
} catch (Exception e) {
|
||||
addFailed = true;
|
||||
System.err.println("REQUEST FAILED:");
|
||||
e.printStackTrace();
|
||||
if (e instanceof SolrServerException) {
|
||||
System.err.println("ROOT CAUSE:");
|
||||
((SolrServerException) e).getRootCause().printStackTrace();
|
||||
}
|
||||
addFails.add(id);
|
||||
}
|
||||
|
||||
if (!addFailed && doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean()) {
|
||||
deletes.add(id);
|
||||
}
|
||||
|
||||
try {
|
||||
Thread.currentThread().sleep(AbstractFullDistribZkTestBase.random().nextInt(100));
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
|
||||
+ " deletes:" + numDeletes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void safeStop() {
|
||||
stop = true;
|
||||
}
|
||||
|
||||
public Set<String> getAddFails() {
|
||||
return addFails;
|
||||
}
|
||||
|
||||
public Set<String> getDeleteFails() {
|
||||
return deleteFails;
|
||||
}
|
||||
|
||||
public int getFailCount() {
|
||||
return addFails.size() + deleteFails.size();
|
||||
}
|
||||
|
||||
protected void addFields(SolrInputDocument doc, Object... fields) {
|
||||
for (int i = 0; i < fields.length; i += 2) {
|
||||
doc.addField((String) (fields[i]), fields[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
protected void indexr(Object... fields) throws Exception {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
addFields(doc, fields);
|
||||
addFields(doc, "rnd_b", true);
|
||||
indexDoc(doc);
|
||||
}
|
||||
|
||||
protected void indexDoc(SolrInputDocument doc) throws IOException,
|
||||
SolrServerException {
|
||||
|
||||
if (controlClient != null) {
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
req.add(doc);
|
||||
req.setParam("CONTROL", "TRUE");
|
||||
req.process(controlClient);
|
||||
}
|
||||
|
||||
|
||||
UpdateRequest ureq = new UpdateRequest();
|
||||
ureq.add(doc);
|
||||
ureq.process(cloudClient);
|
||||
}
|
||||
|
||||
public int getNumDeletes() {
|
||||
return numDeletes;
|
||||
}
|
||||
|
||||
public int getNumAdds() {
|
||||
return numAdds;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue