LUCENE-5487: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1575397 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-03-07 20:50:45 +00:00
commit d784980654
91 changed files with 2511 additions and 978 deletions

View File

@ -16,6 +16,7 @@
<orderEntry type="library" scope="TEST" name="HSQLDB" level="project" /> <orderEntry type="library" scope="TEST" name="HSQLDB" level="project" />
<orderEntry type="library" scope="TEST" name="Derby" level="project" /> <orderEntry type="library" scope="TEST" name="Derby" level="project" />
<orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" /> <orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" />
<orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
<orderEntry type="library" name="Solr core library" level="project" /> <orderEntry type="library" name="Solr core library" level="project" />
<orderEntry type="library" name="Solrj library" level="project" /> <orderEntry type="library" name="Solrj library" level="project" />
<orderEntry type="library" name="Solr DIH library" level="project" /> <orderEntry type="library" name="Solr DIH library" level="project" />

View File

@ -18,6 +18,7 @@
<orderEntry type="library" name="Solr morphlines core library" level="project" /> <orderEntry type="library" name="Solr morphlines core library" level="project" />
<orderEntry type="library" name="Solr morphlines cell library" level="project" /> <orderEntry type="library" name="Solr morphlines cell library" level="project" />
<orderEntry type="library" scope="TEST" name="Solr morphlines core test library" level="project" /> <orderEntry type="library" scope="TEST" name="Solr morphlines core test library" level="project" />
<orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" /> <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" /> <orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
<orderEntry type="module" module-name="solr-core" /> <orderEntry type="module" module-name="solr-core" />

View File

@ -212,7 +212,7 @@ def checkClassSummaries(fullPath):
if inThing: if inThing:
if lineLower.find('</tr>') != -1: if lineLower.find('</tr>') != -1:
if not hasDesc: if not hasDesc:
missing.append((lastCaption, lastItem)) missing.append((lastCaption, unEscapeURL(lastItem)))
inThing = False inThing = False
continue continue
else: else:
@ -298,6 +298,11 @@ def checkSummary(fullPath):
f.close() f.close()
return anyMissing return anyMissing
def unEscapeURL(s):
# Not exhaustive!!
s = s.replace('%20', ' ')
return s
def unescapeHTML(s): def unescapeHTML(s):
s = s.replace('&lt;', '<') s = s.replace('&lt;', '<')
s = s.replace('&gt;', '>') s = s.replace('&gt;', '>')

View File

@ -731,7 +731,7 @@ def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs
os.chdir('solr') os.chdir('solr')
print(" run tests w/ Java 7 and testArgs='%s'..." % testArgs) print(" run tests w/ Java 7 and testArgs='%s'..." % testArgs)
run('%s; ant clean test %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath) run('%s; ant clean test -Dtests.slow=false %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath)
# test javadocs # test javadocs
print(' generate javadocs w/ Java 7...') print(' generate javadocs w/ Java 7...')

View File

@ -68,6 +68,13 @@ Optimizations
======================= Lucene 4.8.0 ======================= ======================= Lucene 4.8.0 =======================
Changes in Runtime Behavior
* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException
if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous
behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
(hossman, Mike McCandless, Varun Thacker)
New Features New Features
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting * LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
@ -89,6 +96,13 @@ New Features
* LUCENE-5485: Add circumfix support to HunspellStemFilter. (Robert Muir) * LUCENE-5485: Add circumfix support to HunspellStemFilter. (Robert Muir)
* LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter.
(Robert Muir)
* LUCENE-5493: SortingMergePolicy, and EarlyTerminatingSortingCollector
support arbitrary Sort specifications.
(Robert Muir, Mike McCandless, Adrien Grand)
API Changes API Changes
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
@ -96,6 +110,12 @@ API Changes
* LUCENE-5468: Move offline Sort (from suggest module) to OfflineSort. (Robert Muir) * LUCENE-5468: Move offline Sort (from suggest module) to OfflineSort. (Robert Muir)
* LUCENE-5493: SortingMergePolicy and EarlyTerminatingSortingCollector take
Sort instead of Sorter. BlockJoinSorter is removed, replaced with
BlockJoinComparatorSource, which can take a Sort for ordering of parents
and a separate Sort for ordering of children within a block.
(Robert Muir, Mike McCandless, Adrien Grand)
Optimizations Optimizations
* LUCENE-5468: HunspellStemFilter uses 10 to 100x less RAM. It also loads * LUCENE-5468: HunspellStemFilter uses 10 to 100x less RAM. It also loads

View File

@ -21,14 +21,17 @@ import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OfflineSorter; import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesReader; import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter; import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.CharSequenceOutputs;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.IntSequenceOutputs; import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
@ -67,6 +70,9 @@ public class Dictionary {
private static final String FLAG_KEY = "FLAG"; private static final String FLAG_KEY = "FLAG";
private static final String COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES"; private static final String COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
private static final String CIRCUMFIX_KEY = "CIRCUMFIX"; private static final String CIRCUMFIX_KEY = "CIRCUMFIX";
private static final String IGNORE_KEY = "IGNORE";
private static final String ICONV_KEY = "ICONV";
private static final String OCONV_KEY = "OCONV";
private static final String NUM_FLAG_TYPE = "num"; private static final String NUM_FLAG_TYPE = "num";
private static final String UTF8_FLAG_TYPE = "UTF-8"; private static final String UTF8_FLAG_TYPE = "UTF-8";
@ -110,6 +116,16 @@ public class Dictionary {
int circumfix = -1; // circumfix flag, or -1 if one is not defined int circumfix = -1; // circumfix flag, or -1 if one is not defined
// ignored characters (dictionary, affix, inputs)
private char[] ignore;
// FSTs used for ICONV/OCONV, output ord pointing to replacement text
FST<CharsRef> iconv;
FST<CharsRef> oconv;
boolean needsInputCleaning;
boolean needsOutputCleaning;
/** /**
* Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
* and dictionary files. * and dictionary files.
@ -136,9 +152,13 @@ public class Dictionary {
*/ */
public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException { public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
this.ignoreCase = ignoreCase; this.ignoreCase = ignoreCase;
// hungarian has thousands of AF before the SET, so a 32k buffer is needed this.needsInputCleaning = ignoreCase;
BufferedInputStream buffered = new BufferedInputStream(affix, 32768); this.needsOutputCleaning = false; // set if we have an OCONV
buffered.mark(32768); // TODO: we really need to probably buffer this on disk since so many newer dictionaries
// (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare
// their encoding... but for now this large buffer is a workaround
BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
buffered.mark(65536);
String encoding = getDictionaryEncoding(buffered); String encoding = getDictionaryEncoding(buffered);
buffered.reset(); buffered.reset();
CharsetDecoder decoder = getJavaEncoding(encoding); CharsetDecoder decoder = getJavaEncoding(encoding);
@ -249,6 +269,29 @@ public class Dictionary {
throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber()); throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber());
} }
circumfix = flagParsingStrategy.parseFlag(parts[1]); circumfix = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(IGNORE_KEY)) {
String parts[] = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal IGNORE declaration", reader.getLineNumber());
}
ignore = parts[1].toCharArray();
Arrays.sort(ignore);
needsInputCleaning = true;
} else if (line.startsWith(ICONV_KEY) || line.startsWith(OCONV_KEY)) {
String parts[] = line.split("\\s+");
String type = parts[0];
if (parts.length != 2) {
throw new ParseException("Illegal " + type + " declaration", reader.getLineNumber());
}
int num = Integer.parseInt(parts[1]);
FST<CharsRef> res = parseConversions(reader, num);
if (type.equals("ICONV")) {
iconv = res;
needsInputCleaning |= iconv != null;
} else {
oconv = res;
needsOutputCleaning |= oconv != null;
}
} }
} }
@ -291,6 +334,7 @@ public class Dictionary {
Map<String,Integer> seenPatterns) throws IOException, ParseException { Map<String,Integer> seenPatterns) throws IOException, ParseException {
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
StringBuilder sb = new StringBuilder();
String args[] = header.split("\\s+"); String args[] = header.split("\\s+");
boolean crossProduct = args[2].equals("Y"); boolean crossProduct = args[2].equals("Y");
@ -300,9 +344,6 @@ public class Dictionary {
ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3); ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
for (int i = 0; i < numLines; i++) { for (int i = 0; i < numLines; i++) {
if (currentAffix > Short.MAX_VALUE) {
throw new UnsupportedOperationException("Too many affixes, please report this to dev@lucene.apache.org");
}
assert affixWriter.getPosition() == currentAffix << 3; assert affixWriter.getPosition() == currentAffix << 3;
String line = reader.readLine(); String line = reader.readLine();
String ruleArgs[] = line.split("\\s+"); String ruleArgs[] = line.split("\\s+");
@ -345,6 +386,9 @@ public class Dictionary {
Integer patternIndex = seenPatterns.get(regex); Integer patternIndex = seenPatterns.get(regex);
if (patternIndex == null) { if (patternIndex == null) {
patternIndex = patterns.size(); patternIndex = patterns.size();
if (patternIndex > Short.MAX_VALUE) {
throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org");
}
seenPatterns.put(regex, patternIndex); seenPatterns.put(regex, patternIndex);
Pattern pattern = Pattern.compile(regex); Pattern pattern = Pattern.compile(regex);
patterns.add(pattern); patterns.add(pattern);
@ -355,6 +399,8 @@ public class Dictionary {
if (stripOrd < 0) { if (stripOrd < 0) {
// already exists in our hash // already exists in our hash
stripOrd = (-stripOrd)-1; stripOrd = (-stripOrd)-1;
} else if (stripOrd > Character.MAX_VALUE) {
throw new UnsupportedOperationException("Too many unique strips, please report this to dev@lucene.apache.org");
} }
if (appendFlags == null) { if (appendFlags == null) {
@ -368,7 +414,7 @@ public class Dictionary {
appendFlagsOrd = (-appendFlagsOrd)-1; appendFlagsOrd = (-appendFlagsOrd)-1;
} else if (appendFlagsOrd > Short.MAX_VALUE) { } else if (appendFlagsOrd > Short.MAX_VALUE) {
// this limit is probably flexible, but its a good sanity check too // this limit is probably flexible, but its a good sanity check too
throw new UnsupportedOperationException("Too many unique flags, please report this to dev@lucene.apache.org"); throw new UnsupportedOperationException("Too many unique append flags, please report this to dev@lucene.apache.org");
} }
affixWriter.writeShort((short)flag); affixWriter.writeShort((short)flag);
@ -378,6 +424,11 @@ public class Dictionary {
affixWriter.writeShort((short)patternOrd); affixWriter.writeShort((short)patternOrd);
affixWriter.writeShort((short)appendFlagsOrd); affixWriter.writeShort((short)appendFlagsOrd);
if (needsInputCleaning) {
CharSequence cleaned = cleanInput(affixArg, sb);
affixArg = cleaned.toString();
}
List<Character> list = affixes.get(affixArg); List<Character> list = affixes.get(affixArg);
if (list == null) { if (list == null) {
list = new ArrayList<Character>(); list = new ArrayList<Character>();
@ -388,6 +439,31 @@ public class Dictionary {
currentAffix++; currentAffix++;
} }
} }
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
Map<String,String> mappings = new TreeMap<>();
for (int i = 0; i < num; i++) {
String line = reader.readLine();
String parts[] = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
}
if (mappings.put(parts[1], parts[2]) != null) {
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
}
}
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRef scratchInts = new IntsRef();
for (Map.Entry<String,String> entry : mappings.entrySet()) {
Util.toUTF16(entry.getKey(), scratchInts);
builder.add(scratchInts, new CharsRef(entry.getValue()));
}
return builder.finish();
}
/** /**
* Parses the encoding specified in the affix file readable through the provided InputStream * Parses the encoding specified in the affix file readable through the provided InputStream
@ -485,6 +561,8 @@ public class Dictionary {
BytesRef flagsScratch = new BytesRef(); BytesRef flagsScratch = new BytesRef();
IntsRef scratchInts = new IntsRef(); IntsRef scratchInts = new IntsRef();
StringBuilder sb = new StringBuilder();
File unsorted = File.createTempFile("unsorted", "dat", tempDir); File unsorted = File.createTempFile("unsorted", "dat", tempDir);
try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) { try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
for (InputStream dictionary : dictionaries) { for (InputStream dictionary : dictionaries) {
@ -492,16 +570,19 @@ public class Dictionary {
String line = lines.readLine(); // first line is number of entries (approximately, sometimes) String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
while ((line = lines.readLine()) != null) { while ((line = lines.readLine()) != null) {
if (ignoreCase) { if (needsInputCleaning) {
int flagSep = line.lastIndexOf('/'); int flagSep = line.lastIndexOf('/');
if (flagSep == -1) { if (flagSep == -1) {
writer.write(line.toLowerCase(Locale.ROOT).getBytes(IOUtils.CHARSET_UTF_8)); CharSequence cleansed = cleanInput(line, sb);
writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
} else { } else {
StringBuilder sb = new StringBuilder(); String text = line.substring(0, flagSep);
sb.append(line.substring(0, flagSep).toLowerCase(Locale.ROOT)); CharSequence cleansed = cleanInput(text, sb);
if (flagSep < line.length()) { if (cleansed != sb) {
sb.append(line.substring(flagSep, line.length())); sb.setLength(0);
sb.append(cleansed);
} }
sb.append(line.substring(flagSep));
writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8)); writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8));
} }
} else { } else {
@ -761,4 +842,76 @@ public class Dictionary {
static boolean hasFlag(char flags[], char flag) { static boolean hasFlag(char flags[], char flag) {
return Arrays.binarySearch(flags, flag) >= 0; return Arrays.binarySearch(flags, flag) >= 0;
} }
CharSequence cleanInput(CharSequence input, StringBuilder reuse) {
reuse.setLength(0);
for (int i = 0; i < input.length(); i++) {
char ch = input.charAt(i);
if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0) {
continue;
}
if (ignoreCase && iconv == null) {
// if we have no input conversion mappings, do this on-the-fly
ch = Character.toLowerCase(ch);
}
reuse.append(ch);
}
if (iconv != null) {
try {
applyMappings(iconv, reuse);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
if (ignoreCase) {
for (int i = 0; i < reuse.length(); i++) {
reuse.setCharAt(i, Character.toLowerCase(reuse.charAt(i)));
}
}
}
return reuse;
}
// TODO: this could be more efficient!
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
final FST.BytesReader bytesReader = fst.getBytesReader();
final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
final CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
// temporary stuff
final FST.Arc<CharsRef> arc = new FST.Arc<>();
int longestMatch;
CharsRef longestOutput;
for (int i = 0; i < sb.length(); i++) {
arc.copyFrom(firstArc);
CharsRef output = NO_OUTPUT;
longestMatch = -1;
longestOutput = null;
for (int j = i; j < sb.length(); j++) {
char ch = sb.charAt(j);
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
break;
} else {
output = fst.outputs.add(output, arc.output);
}
if (arc.isFinal()) {
longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
longestMatch = j;
}
}
if (longestMatch >= 0) {
sb.delete(i, longestMatch+1);
sb.insert(i, longestOutput);
i += (longestOutput.length - 1);
}
}
}
} }

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -24,8 +25,8 @@ import java.util.List;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
@ -40,8 +41,11 @@ final class Stemmer {
private final BytesRef scratch = new BytesRef(); private final BytesRef scratch = new BytesRef();
private final StringBuilder segment = new StringBuilder(); private final StringBuilder segment = new StringBuilder();
private final ByteArrayDataInput affixReader; private final ByteArrayDataInput affixReader;
private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT);
// used for normalization
private final StringBuilder scratchSegment = new StringBuilder();
private char scratchBuffer[] = new char[32];
/** /**
* Constructs a new Stemmer which will use the provided Dictionary to create its stems. * Constructs a new Stemmer which will use the provided Dictionary to create its stems.
* *
@ -68,17 +72,25 @@ final class Stemmer {
* @param word Word to find the stems for * @param word Word to find the stems for
* @return List of stems for the word * @return List of stems for the word
*/ */
public List<CharsRef> stem(char word[], int length) { public List<CharsRef> stem(char word[], int length) {
if (dictionary.ignoreCase) {
charUtils.toLowerCase(word, 0, length); if (dictionary.needsInputCleaning) {
scratchSegment.setLength(0);
scratchSegment.append(word, 0, length);
CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
length = segment.length();
segment.getChars(0, length, scratchBuffer, 0);
word = scratchBuffer;
} }
List<CharsRef> stems = new ArrayList<CharsRef>(); List<CharsRef> stems = new ArrayList<CharsRef>();
IntsRef forms = dictionary.lookupWord(word, 0, length); IntsRef forms = dictionary.lookupWord(word, 0, length);
if (forms != null) { if (forms != null) {
// TODO: some forms should not be added, e.g. ONLYINCOMPOUND // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
// just because it exists, does not make it valid... // just because it exists, does not make it valid...
for (int i = 0; i < forms.length; i++) { for (int i = 0; i < forms.length; i++) {
stems.add(new CharsRef(word, 0, length)); stems.add(newStem(word, length));
} }
} }
stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false)); stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false));
@ -106,6 +118,23 @@ final class Stemmer {
} }
return deduped; return deduped;
} }
private CharsRef newStem(char buffer[], int length) {
if (dictionary.needsOutputCleaning) {
scratchSegment.setLength(0);
scratchSegment.append(buffer, 0, length);
try {
Dictionary.applyMappings(dictionary.oconv, scratchSegment);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
char cleaned[] = new char[scratchSegment.length()];
scratchSegment.getChars(0, cleaned.length, cleaned, 0);
return new CharsRef(cleaned, 0, cleaned.length);
} else {
return new CharsRef(buffer, 0, length);
}
}
// ================================================= Helper Methods ================================================ // ================================================= Helper Methods ================================================
@ -292,7 +321,7 @@ final class Stemmer {
continue; continue;
} }
} }
stems.add(new CharsRef(strippedWord, 0, length)); stems.add(newStem(strippedWord, length));
} }
} }
} }

View File

@ -0,0 +1,219 @@
package org.apache.lucene.analysis.hunspell;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.InputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageEstimator;
import org.junit.Ignore;
/**
* These thunderbird dictionaries can be retrieved via:
* https://addons.mozilla.org/en-US/thunderbird/language-tools/
* You must click and download every file: sorry!
*/
@Ignore("enable manually")
public class TestAllDictionaries2 extends LuceneTestCase {
// set this to the location of where you downloaded all the files
static final File DICTIONARY_HOME =
new File("/data/thunderbirdDicts");
final String tests[] = {
/* zip file */ /* dictionary */ /* affix */
"addon-0.4.5-an+fx+tb+fn+sm.xpi", "dictionaries/ru.dic", "dictionaries/ru.aff",
"addon-0.5.5-fx+tb.xpi", "dictionaries/ko-KR.dic", "dictionaries/ko-KR.aff",
"afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff",
"albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff",
"amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff",
//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
"azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff",
"belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff",
"belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff",
"bengali_bangladesh_dictionary-0.08-sm+tb+fx.xpi", "dictionaries/bn-BD.dic", "dictionaries/bn-BD.aff",
"brazilian_portuguese_dictionary_former_spelling-28.20140203-tb+sm+fx.xpi", "dictionaries/pt-BR-antigo.dic", "dictionaries/pt-BR-antigo.aff",
"brazilian_portuguese_dictionary_new_spelling-28.20140203-fx+sm+tb.xpi", "dictionaries/pt-BR.dic", "dictionaries/pt-BR.aff",
"british_english_dictionary_updated-1.19.5-sm+fx+tb.xpi", "dictionaries/en-GB.dic", "dictionaries/en-GB.aff",
"bulgarian_dictionary-4.3-fx+tb+sm.xpi", "dictionaries/bg.dic", "dictionaries/bg.aff",
"canadian_english_dictionary-2.0.8-fx+sm+tb.xpi", "dictionaries/en-CA.dic", "dictionaries/en-CA.aff",
"ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff",
"chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff",
"corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff",
"corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff",
"corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff",
"croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff",
"croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff",
"dansk_ordbog_til_stavekontrollen-2.2.1-sm+tb+fx.xpi", "dictionaries/da.dic", "dictionaries/da.aff",
"deutsches_worterbuch_de_de_alte_rechtschreibung-2.1.8-sm.xpi", "dictionaries/de-DE-1901.dic", "dictionaries/de-DE-1901.aff",
"diccionario_de_espanolespana-1.7-sm+tb+fn+fx.xpi", "dictionaries/es-ES.dic", "dictionaries/es-ES.aff",
"diccionario_en_espanol_para_venezuela-1.1.17-sm+an+tb+fn+fx.xpi", "dictionaries/es_VE.dic", "dictionaries/es_VE.aff",
"diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff",
"diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff",
"diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff",
//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff",
"difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff",
//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
"dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff",
"dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff",
"eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff",
"english_australian_dictionary-2.1.2-tb+fx+sm.xpi", "dictionaries/en-AU.dic", "dictionaries/en-AU.aff",
"esperanta_vortaro-1.0.2-fx+tb+sm.xpi", "dictionaries/eo-EO.dic", "dictionaries/eo-EO.aff",
"european_portuguese_spellchecker-14.1.1.1-tb+fx.xpi", "dictionaries/pt-PT.dic", "dictionaries/pt-PT.aff",
"faroese_spell_checker_faroe_islands-2.0-tb+sm+fx+fn.xpi", "dictionaries/fo_FO.dic", "dictionaries/fo_FO.aff",
"frysk_wurdboek-2.1.1-fn+sm+fx+an+tb.xpi", "dictionaries/fy.dic", "dictionaries/fy.aff",
"geiriadur_cymraeg-1.08-tb+sm+fx.xpi", "dictionaries/cy_GB.dic", "dictionaries/cy_GB.aff",
"general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi", "dictionaries/ca.dic", "dictionaries/ca.aff",
"german_dictionary-2.0.3-fn+fx+sm+tb.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff",
"german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff",
"german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff",
"german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff",
"german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff",
"german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff",
"greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi", "dictionaries/el-GR.dic", "dictionaries/el-GR.aff",
"gujarati_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/gu_IN.dic", "dictionaries/gu_IN.aff",
"haitian_creole_spell_checker-0.08-tb+sm+fx.xpi", "dictionaries/ht-HT.dic", "dictionaries/ht-HT.aff",
"hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff",
"hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff",
"hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff",
//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff",
//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
"kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff",
//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
"kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff",
"kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff",
"kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff",
"lao_spellchecking_dictionary-0-fx+tb+sm+fn+an.xpi", "dictionaries/lo_LA.dic", "dictionaries/lo_LA.aff",
"latviesu_valodas_pareizrakstibas_parbaudes_vardnica-1.0.0-fn+fx+tb+sm.xpi", "dictionaries/lv_LV.dic", "dictionaries/lv_LV.aff",
"lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff",
"litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff",
"litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff",
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
"malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff",
"marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff",
"ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff",
"nepali_dictionary-1.2-fx+tb.xpi", "dictionaries/ne_NP.dic", "dictionaries/ne_NP.aff",
"norsk_bokmal_ordliste-2.0.10.2-fx+tb+sm.xpi", "dictionaries/nb.dic", "dictionaries/nb.aff",
"norsk_nynorsk_ordliste-2.1.0-sm+fx+tb.xpi", "dictionaries/nn.dic", "dictionaries/nn.aff",
"northern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nso-ZA.dic", "dictionaries/nso-ZA.aff",
"oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff",
"polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff",
"punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff",
//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
"sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff",
"scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff",
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff",
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Latn.dic", "dictionaries/sr-RS-Latn.aff",
"slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK.dic", "dictionaries/sk-SK.aff",
"slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK-ascii.dic", "dictionaries/sk-SK-ascii.aff",
"slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi", "dictionaries/sl.dic", "dictionaries/sl.aff",
"songhay_spell_checker-0.03-fx+tb+sm.xpi", "dictionaries/Songhay - Mali.dic", "dictionaries/Songhay - Mali.aff",
"southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/st-ZA.dic", "dictionaries/st-ZA.aff",
"sownik_acinski-0.41.20110603-tb+fx+sm.xpi", "dictionaries/la.dic", "dictionaries/la.aff",
"sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi", "dictionaries/dsb.dic", "dictionaries/dsb.aff",
"srpska_latinica-0.1-fx+tb+sm.xpi", "dictionaries/Srpski_latinica.dic", "dictionaries/Srpski_latinica.aff",
"svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv.dic", "dictionaries/sv.aff",
"svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv_FI.dic", "dictionaries/sv_FI.aff",
"swati_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ss-ZA.dic", "dictionaries/ss-ZA.aff",
"tamil_spell_checker_for_firefox-0.4-tb+fx.xpi", "dictionaries/ta-TA.dic", "dictionaries/ta-TA.aff",
"telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff",
//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",
"upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff",
//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
"uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff",
"valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff",
"venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff",
"verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff",
//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
"xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff",
"xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff",
"yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff",
"zulu_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/zu-ZA.dic", "dictionaries/zu-ZA.aff"
};
public void test() throws Exception {
for (int i = 0; i < tests.length; i += 3) {
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
assert affEntry != null;
try (InputStream dictionary = zip.getInputStream(dicEntry);
InputStream affix = zip.getInputStream(affEntry)) {
Dictionary dic = new Dictionary(affix, dictionary);
System.out.println(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" +
"words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " +
"flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " +
"strips=" + RamUsageEstimator.humanSizeOf(dic.stripLookup) + ", " +
"conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " +
"affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " +
"prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " +
"suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
}
}
}
}
public void testOneDictionary() throws Exception {
String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
for (int i = 0; i < tests.length; i++) {
if (tests[i].equals(toTest)) {
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
assert affEntry != null;
try (InputStream dictionary = zip.getInputStream(dicEntry);
InputStream affix = zip.getInputStream(affEntry)) {
new Dictionary(affix, dictionary);
}
}
}
}
}
}

View File

@ -0,0 +1,36 @@
package org.apache.lucene.analysis.hunspell;
import org.junit.BeforeClass;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestConv extends StemmerTestBase {
@BeforeClass
public static void beforeClass() throws Exception {
init("conv.aff", "conv.dic");
}
public void testConversion() {
assertStemsTo("drink", "drInk");
assertStemsTo("drInk", "drInk");
assertStemsTo("drInkAble", "drInk");
assertStemsTo("drInkABle", "drInk");
assertStemsTo("drinkABle", "drInk");
}
}

View File

@ -22,10 +22,15 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.text.ParseException; import java.text.ParseException;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.CharSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
public class TestDictionary extends LuceneTestCase { public class TestDictionary extends LuceneTestCase {
@ -123,4 +128,54 @@ public class TestDictionary extends LuceneTestCase {
assertTrue(affixStream.isClosed()); assertTrue(affixStream.isClosed());
assertTrue(dictStream.isClosed()); assertTrue(dictStream.isClosed());
} }
public void testReplacements() throws Exception {
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRef scratchInts = new IntsRef();
// a -> b
Util.toUTF16("a", scratchInts);
builder.add(scratchInts, new CharsRef("b"));
// ab -> c
Util.toUTF16("ab", scratchInts);
builder.add(scratchInts, new CharsRef("c"));
// c -> de
Util.toUTF16("c", scratchInts);
builder.add(scratchInts, new CharsRef("de"));
// def -> gh
Util.toUTF16("def", scratchInts);
builder.add(scratchInts, new CharsRef("gh"));
FST<CharsRef> fst = builder.finish();
StringBuilder sb = new StringBuilder("atestanother");
Dictionary.applyMappings(fst, sb);
assertEquals("btestbnother", sb.toString());
sb = new StringBuilder("abtestanother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestbnother", sb.toString());
sb = new StringBuilder("atestabnother");
Dictionary.applyMappings(fst, sb);
assertEquals("btestcnother", sb.toString());
sb = new StringBuilder("abtestabnother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestcnother", sb.toString());
sb = new StringBuilder("abtestabcnother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestcdenother", sb.toString());
sb = new StringBuilder("defdefdefc");
Dictionary.applyMappings(fst, sb);
assertEquals("ghghghde", sb.toString());
}
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -30,7 +31,6 @@ import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.hunspell.HunspellStemFilter; import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -94,4 +94,20 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
}; };
checkOneTerm(a, "", ""); checkOneTerm(a, "", "");
} }
public void testIgnoreCaseNoSideEffects() throws Exception {
final Dictionary d;
try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
}
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
}
};
checkOneTerm(a, "NoChAnGy", "NoChAnGy");
}
} }

View File

@ -0,0 +1,36 @@
package org.apache.lucene.analysis.hunspell;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.junit.BeforeClass;
public class TestIgnore extends StemmerTestBase {
@BeforeClass
public static void beforeClass() throws Exception {
init("ignore.aff", "ignore.dic");
}
public void testExamples() {
assertStemsTo("drink", "drink");
assertStemsTo("drinkable", "drink");
assertStemsTo("dr'ink-able", "drink");
assertStemsTo("drank-able", "drank");
assertStemsTo("'-'-'-");
}
}

View File

@ -0,0 +1,16 @@
SET UTF-8
ICONV 4
ICONV A a
ICONV B b
ICONV C c
ICONV I i
OCONV 4
OCONV a A
OCONV b B
OCONV c C
OCONV i I
SFX X Y 1
SFX X 0 able . +ABLE

View File

@ -0,0 +1,2 @@
1
drink/X [VERB]

View File

@ -0,0 +1,6 @@
SET UTF-8
IGNORE '-
SFX X Y 1
SFX X 0 able . +ABLE

View File

@ -0,0 +1,3 @@
1
drink/X [VERB]
dr-ank/X [VERB]

View File

@ -209,11 +209,6 @@ final class DocFieldProcessor extends DocConsumer {
final DocFieldProcessorPerField perField = fields[i]; final DocFieldProcessorPerField perField = fields[i];
perField.consumer.processFields(perField.fields, perField.fieldCount); perField.consumer.processFields(perField.fields, perField.fieldCount);
} }
if (docState.maxTermPrefix != null && docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", "WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
docState.maxTermPrefix = null;
}
} }
private DocFieldProcessorPerField processField(FieldInfos.Builder fieldInfos, private DocFieldProcessorPerField processField(FieldInfos.Builder fieldInfos,

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.IOUtils;
/** /**
* Holds state for inverting all occurrences of a single * Holds state for inverting all occurrences of a single
@ -182,6 +181,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
// when we come back around to the field... // when we come back around to the field...
fieldState.position += posIncrAttribute.getPositionIncrement(); fieldState.position += posIncrAttribute.getPositionIncrement();
fieldState.offset += offsetAttribute.endOffset(); fieldState.offset += offsetAttribute.endOffset();
if (docState.maxTermPrefix != null) {
final String msg = "Document contains at least one immense term in field=\"" + fieldInfo.name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'";
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", "ERROR: " + msg);
}
docState.maxTermPrefix = null;
throw new IllegalArgumentException(msg);
}
/* if success was false above there is an exception coming through and we won't get here.*/ /* if success was false above there is an exception coming through and we won't get here.*/
succeededInProcessingField = true; succeededInProcessingField = true;
} finally { } finally {

View File

@ -207,8 +207,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
/** /**
* Absolute hard maximum length for a term, in bytes once * Absolute hard maximum length for a term, in bytes once
* encoded as UTF8. If a term arrives from the analyzer * encoded as UTF8. If a term arrives from the analyzer
* longer than this length, it is skipped and a message is * longer than this length, an
* printed to infoStream, if set (see {@link * <code>IllegalArgumentException</code> is thrown
* and a message is printed to infoStream, if set (see {@link
* IndexWriterConfig#setInfoStream(InfoStream)}). * IndexWriterConfig#setInfoStream(InfoStream)}).
*/ */
public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
@ -1159,7 +1160,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
* merge policy. * merge policy.
* *
* <p>Note that each term in the document can be no longer * <p>Note that each term in the document can be no longer
* than 16383 characters, otherwise an * than {@link #MAX_TERM_LENGTH} in bytes, otherwise an
* IllegalArgumentException will be thrown.</p> * IllegalArgumentException will be thrown.</p>
* *
* <p>Note that it's possible to create an invalid Unicode * <p>Note that it's possible to create an invalid Unicode

View File

@ -179,12 +179,11 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
try { try {
termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef()); termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
} catch (MaxBytesLengthExceededException e) { } catch (MaxBytesLengthExceededException e) {
// Not enough room in current block // Term is too large; record this here (can't throw an
// Just skip this term, to remain as robust as // exc because DocInverterPerField will then abort the
// possible during indexing. A TokenFilter // entire segment) and then throw an exc later in
// can be inserted into the analyzer chain if // DocInverterPerField.java. LengthFilter can always be
// other behavior is wanted (pruning the term // used to prune the term before indexing:
// to a prefix, throwing an exception, etc).
if (docState.maxTermPrefix == null) { if (docState.maxTermPrefix == null) {
final int saved = termBytesRef.length; final int saved = termBytesRef.length;
try { try {

View File

@ -202,8 +202,8 @@ public class Sort {
return 0x45aaf665 + Arrays.hashCode(fields); return 0x45aaf665 + Arrays.hashCode(fields);
} }
/** Whether the relevance score is needed to sort documents. */ /** Returns true if the relevance score is needed to sort documents. */
boolean needsScores() { public boolean needsScores() {
for (SortField sortField : fields) { for (SortField sortField : fields) {
if (sortField.needsScores()) { if (sortField.needsScores()) {
return true; return true;

View File

@ -0,0 +1,105 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import org.junit.Before;
import org.junit.After;
/**
* Tests that a useful exception is thrown when attempting to index a term that is
* too large
*
* @see IndexWriter#MAX_TERM_LENGTH
*/
public class TestExceedMaxTermLength extends LuceneTestCase {
private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
Directory dir = null;
@Before
public void createDir() {
dir = newDirectory();
}
@After
public void destroyDir() throws IOException {
dir.close();
dir = null;
}
public void test() throws Exception {
IndexWriter w = new IndexWriter
(dir, newIndexWriterConfig(random(),
TEST_VERSION_CURRENT,
new MockAnalyzer(random())));
try {
final FieldType ft = new FieldType();
ft.setIndexed(true);
ft.setStored(random().nextBoolean());
ft.freeze();
final Document doc = new Document();
if (random().nextBoolean()) {
// totally ok short field value
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
TestUtil.randomSimpleString(random(), 1, 10),
ft));
}
// problematic field
final String name = TestUtil.randomSimpleString(random(), 1, 50);
final String value = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
final Field f = new Field(name, value, ft);
if (random().nextBoolean()) {
// totally ok short field value
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
TestUtil.randomSimpleString(random(), 1, 10),
ft));
}
doc.add(f);
try {
w.addDocument(doc);
fail("Did not get an exception from adding a monster term");
} catch (IllegalArgumentException e) {
final String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH);
final String msg = e.getMessage();
assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg,
msg.contains("immense term"));
assertTrue("IllegalArgumentException didn't mention max length ("+maxLengthMsg+"): " + msg,
msg.contains(maxLengthMsg));
assertTrue("IllegalArgumentException didn't mention field name ("+name+"): " + msg,
msg.contains(name));
}
} finally {
w.close();
}
}
}

View File

@ -1660,32 +1660,32 @@ public class TestIndexWriter extends LuceneTestCase {
// This contents produces a too-long term: // This contents produces a too-long term:
String contents = "abc xyz x" + bigTerm + " another term"; String contents = "abc xyz x" + bigTerm + " another term";
doc.add(new TextField("content", contents, Field.Store.NO)); doc.add(new TextField("content", contents, Field.Store.NO));
w.addDocument(doc); try {
w.addDocument(doc);
fail("should have hit exception");
} catch (IllegalArgumentException iae) {
// expected
}
// Make sure we can add another normal document // Make sure we can add another normal document
doc = new Document(); doc = new Document();
doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
w.addDocument(doc); w.addDocument(doc);
// So we remove the deleted doc:
w.forceMerge(1);
IndexReader reader = w.getReader(); IndexReader reader = w.getReader();
w.close(); w.close();
// Make sure all terms < max size were indexed // Make sure all terms < max size were indexed
assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "abc")));
assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "bbb")));
assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(0, reader.docFreq(new Term("content", "term")));
assertEquals(1, reader.docFreq(new Term("content", "another")));
// Make sure position is still incremented when // Make sure the doc that has the massive term is NOT in
// massive term is skipped:
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another"));
assertEquals(0, tps.nextDoc());
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());
// Make sure the doc that has the massive term is in
// the index: // the index:
assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); assertEquals("document with wicked long term is in the index!", 1, reader.numDocs());
reader.close(); reader.close();
dir.close(); dir.close();

View File

@ -0,0 +1,223 @@
package org.apache.lucene.index.sorter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher; // javadocs
import org.apache.lucene.search.Query; // javadocs
import org.apache.lucene.search.ScoreDoc; // javadocs
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.FixedBitSet;
/**
* Helper class to sort readers that contain blocks of documents.
* <p>
* Note that this class is intended to used with {@link SortingMergePolicy},
* and for other purposes has some limitations:
* <ul>
* <li>Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter}
* <li>Filling sort field values is not yet supported.
* </ul>
* @lucene.experimental
*/
// TODO: can/should we clean this thing up (e.g. return a proper sort value)
// and move to the join/ module?
public class BlockJoinComparatorSource extends FieldComparatorSource {
final Filter parentsFilter;
final Sort parentSort;
final Sort childSort;
/**
* Create a new BlockJoinComparatorSource, sorting only blocks of documents
* with {@code parentSort} and not reordering children with a block.
*
* @param parentsFilter Filter identifying parent documents
* @param parentSort Sort for parent documents
*/
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) {
this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC));
}
/**
* Create a new BlockJoinComparatorSource, specifying the sort order for both
* blocks of documents and children within a block.
*
* @param parentsFilter Filter identifying parent documents
* @param parentSort Sort for parent documents
* @param childSort Sort for child documents in the same block
*/
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort) {
this.parentsFilter = parentsFilter;
this.parentSort = parentSort;
this.childSort = childSort;
}
@Override
public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
// we keep parallel slots: the parent ids and the child ids
final int parentSlots[] = new int[numHits];
final int childSlots[] = new int[numHits];
SortField parentFields[] = parentSort.getSort();
final int parentReverseMul[] = new int[parentFields.length];
final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length];
for (int i = 0; i < parentFields.length; i++) {
parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
parentComparators[i] = parentFields[i].getComparator(1, i);
}
SortField childFields[] = childSort.getSort();
final int childReverseMul[] = new int[childFields.length];
final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length];
for (int i = 0; i < childFields.length; i++) {
childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
childComparators[i] = childFields[i].getComparator(1, i);
}
// NOTE: we could return parent ID as value but really our sort "value" is more complex...
// So we throw UOE for now. At the moment you really should only use this at indexing time.
return new FieldComparator<Integer>() {
int bottomParent;
int bottomChild;
FixedBitSet parentBits;
@Override
public int compare(int slot1, int slot2) {
try {
return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void setBottom(int slot) {
bottomParent = parentSlots[slot];
bottomChild = childSlots[slot];
}
@Override
public void setTopValue(Integer value) {
// we dont have enough information (the docid is needed)
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
}
@Override
public int compareBottom(int doc) throws IOException {
return compare(bottomChild, bottomParent, doc, parent(doc));
}
@Override
public int compareTop(int doc) throws IOException {
// we dont have enough information (the docid is needed)
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
}
@Override
public void copy(int slot, int doc) throws IOException {
childSlots[slot] = doc;
parentSlots[slot] = parent(doc);
}
@Override
public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
final DocIdSet parents = parentsFilter.getDocIdSet(context, null);
if (parents == null) {
throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!");
}
if (!(parents instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
parentBits = (FixedBitSet) parents;
for (int i = 0; i < parentComparators.length; i++) {
parentComparators[i] = parentComparators[i].setNextReader(context);
}
for (int i = 0; i < childComparators.length; i++) {
childComparators[i] = childComparators[i].setNextReader(context);
}
return this;
}
@Override
public Integer value(int slot) {
// really our sort "value" is more complex...
throw new UnsupportedOperationException("filling sort field values is not yet supported");
}
@Override
public void setScorer(Scorer scorer) {
super.setScorer(scorer);
for (FieldComparator<?> comp : parentComparators) {
comp.setScorer(scorer);
}
for (FieldComparator<?> comp : childComparators) {
comp.setScorer(scorer);
}
}
int parent(int doc) {
return parentBits.nextSetBit(doc);
}
int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
if (parent1 == parent2) { // both are in the same block
if (docID1 == parent1 || docID2 == parent2) {
// keep parents at the end of blocks
return docID1 - docID2;
} else {
return compare(docID1, docID2, childComparators, childReverseMul);
}
} else {
int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
if (cmp == 0) {
return parent1 - parent2;
} else {
return cmp;
}
}
}
int compare(int docID1, int docID2, FieldComparator<?> comparators[], int reverseMul[]) throws IOException {
for (int i = 0; i < comparators.length; i++) {
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
// the segments are always the same here...
comparators[i].copy(0, docID1);
comparators[i].setBottom(0);
int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
if (comp != 0) {
return comp;
}
}
return 0; // no need to docid tiebreak
}
};
}
@Override
public String toString() {
return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
}
}

View File

@ -1,88 +0,0 @@
package org.apache.lucene.index.sorter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.FixedBitSet;
/**
* Helper class to sort readers that contain blocks of documents.
*/
public abstract class BlockJoinSorter extends Sorter {
protected final Filter parentsFilter;
/** Sole constructor. */
public BlockJoinSorter(Filter parentsFilter) {
this.parentsFilter = parentsFilter;
}
/** Return a {@link Sorter.DocComparator} instance that will be called on
* parent doc IDs. */
protected abstract DocComparator getParentComparator(AtomicReader reader);
/** Return a {@link Sorter.DocComparator} instance that will be called on
* children of the same parent. By default, children of the same parent are
* not reordered. */
protected DocComparator getChildComparator(AtomicReader reader) {
return INDEX_ORDER_COMPARATOR;
}
@Override
public final DocMap sort(AtomicReader reader) throws IOException {
final DocIdSet parents = parentsFilter.getDocIdSet(reader.getContext(), null);
if (parents == null) {
throw new IllegalStateException("AtomicReader " + reader + " contains no parents!");
}
if (!(parents instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
final FixedBitSet parentBits = (FixedBitSet) parents;
final DocComparator parentComparator = getParentComparator(reader);
final DocComparator childComparator = getChildComparator(reader);
final DocComparator comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final int parent1 = parentBits.nextSetBit(docID1);
final int parent2 = parentBits.nextSetBit(docID2);
if (parent1 == parent2) { // both are in the same block
if (docID1 == parent1 || docID2 == parent2) {
// keep parents at the end of blocks
return docID1 - docID2;
} else {
return childComparator.compare(docID1, docID2);
}
} else {
int cmp = parentComparator.compare(parent1, parent2);
if (cmp == 0) {
cmp = parent1 - parent2;
}
return cmp;
}
}
};
return sort(reader.maxDoc(), comparator);
}
}

View File

@ -24,50 +24,53 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.Collector; import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.TotalHitCountCollector;
/** /**
* A {@link Collector} that early terminates collection of documents on a * A {@link Collector} that early terminates collection of documents on a
* per-segment basis, if the segment was sorted according to the given * per-segment basis, if the segment was sorted according to the given
* {@link Sorter}. * {@link Sort}.
* *
* <p> * <p>
* <b>NOTE:</b> the {@link Collector} detects sorted segments according to * <b>NOTE:</b> the {@code Collector} detects sorted segments according to
* {@link SortingMergePolicy}, so it's best used in conjunction with it. Also, * {@link SortingMergePolicy}, so it's best used in conjunction with it. Also,
* it collects up to a specified num docs from each segment, and therefore is * it collects up to a specified {@code numDocsToCollect} from each segment,
* mostly suitable for use in conjunction with collectors such as * and therefore is mostly suitable for use in conjunction with collectors such as
* {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}. * {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}.
* <p> * <p>
* <b>NOTE</b>: If you wrap a {@link TopDocsCollector} that sorts in the same * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
* order as the index order, the returned {@link TopDocsCollector#topDocs()} * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
* will be correct. However the total of {@link TopDocsCollector#getTotalHits() * will be correct. However the total of {@link TopDocsCollector#getTotalHits()
* hit count} will be underestimated since not all matching documents will have * hit count} will be underestimated since not all matching documents will have
* been collected. * been collected.
* <p> * <p>
* <b>NOTE</b>: This {@link Collector} uses {@link Sorter#getID()} to detect * <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect
* whether a segment was sorted with the same {@link Sorter} as the one given in * whether a segment was sorted with the same {@code Sort}. This has
* {@link #EarlyTerminatingSortingCollector(Collector, Sorter, int)}. This has
* two implications: * two implications:
* <ul> * <ul>
* <li>if {@link Sorter#getID()} is not implemented correctly and returns * <li>if a custom comparator is not implemented correctly and returns
* different identifiers for equivalent {@link Sorter}s, this collector will not * different identifiers for equivalent instances, this collector will not
* detect sorted segments,</li> * detect sorted segments,</li>
* <li>if you suddenly change the {@link IndexWriter}'s * <li>if you suddenly change the {@link IndexWriter}'s
* {@link SortingMergePolicy} to sort according to another criterion and if both * {@code SortingMergePolicy} to sort according to another criterion and if both
* the old and the new {@link Sorter}s have the same identifier, this * the old and the new {@code Sort}s have the same identifier, this
* {@link Collector} will incorrectly detect sorted segments.</li> * {@code Collector} will incorrectly detect sorted segments.</li>
* </ul> * </ul>
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class EarlyTerminatingSortingCollector extends Collector { public class EarlyTerminatingSortingCollector extends Collector {
/** The wrapped Collector */
protected final Collector in; protected final Collector in;
protected final Sorter sorter; /** Sort used to sort the search results */
protected final Sort sort;
/** Number of documents to collect in each segment */
protected final int numDocsToCollect; protected final int numDocsToCollect;
/** Number of documents to collect in the current segment being processed */
protected int segmentTotalCollect; protected int segmentTotalCollect;
/** True if the current segment being processed is sorted by {@link #sort} */
protected boolean segmentSorted; protected boolean segmentSorted;
private int numCollected; private int numCollected;
@ -77,20 +80,19 @@ public class EarlyTerminatingSortingCollector extends Collector {
* *
* @param in * @param in
* the collector to wrap * the collector to wrap
* @param sorter * @param sort
* the same sorter as the one which is used by {@link IndexWriter}'s * the sort you are sorting the search results on
* {@link SortingMergePolicy}
* @param numDocsToCollect * @param numDocsToCollect
* the number of documents to collect on each segment. When wrapping * the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of * a {@link TopDocsCollector}, this number should be the number of
* hits. * hits.
*/ */
public EarlyTerminatingSortingCollector(Collector in, Sorter sorter, int numDocsToCollect) { public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
if (numDocsToCollect <= 0) { if (numDocsToCollect <= 0) {
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect); throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
} }
this.in = in; this.in = in;
this.sorter = sorter; this.sort = sort;
this.numDocsToCollect = numDocsToCollect; this.numDocsToCollect = numDocsToCollect;
} }
@ -110,7 +112,7 @@ public class EarlyTerminatingSortingCollector extends Collector {
@Override @Override
public void setNextReader(AtomicReaderContext context) throws IOException { public void setNextReader(AtomicReaderContext context) throws IOException {
in.setNextReader(context); in.setNextReader(context);
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sorter); segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort);
segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE; segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE;
numCollected = 0; numCollected = 0;
} }

View File

@ -1,81 +0,0 @@
package org.apache.lucene.index.sorter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.NumericDocValues;
/**
* A {@link Sorter} which sorts documents according to their
* {@link NumericDocValues}. One can specify ascending or descending sort order.
*
* @lucene.experimental
*/
public class NumericDocValuesSorter extends Sorter {
private final String fieldName;
private final boolean ascending;
/** Constructor over the given field name, and ascending sort order. */
public NumericDocValuesSorter(final String fieldName) {
this(fieldName, true);
}
/**
* Constructor over the given field name, and whether sorting should be
* ascending ({@code true}) or descending ({@code false}).
*/
public NumericDocValuesSorter(final String fieldName, boolean ascending) {
this.fieldName = fieldName;
this.ascending = ascending;
}
@Override
public Sorter.DocMap sort(final AtomicReader reader) throws IOException {
final NumericDocValues ndv = reader.getNumericDocValues(fieldName);
final DocComparator comparator;
if (ascending) {
comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final long v1 = ndv.get(docID1);
final long v2 = ndv.get(docID2);
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
}
};
} else {
comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final long v1 = ndv.get(docID1);
final long v2 = ndv.get(docID2);
return v1 > v2 ? -1 : v1 == v2 ? 0 : 1;
}
};
}
return sort(reader.maxDoc(), comparator);
}
@Override
public String getID() {
return "DocValues(" + fieldName + "," + (ascending ? "ascending" : "descending") + ")";
}
}

View File

@ -22,47 +22,47 @@ import java.util.Comparator;
import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.TimSorter; import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/** /**
* Sorts documents of a given index by returning a permutation on the document * Sorts documents of a given index by returning a permutation on the document
* IDs. * IDs.
* <p><b>NOTE</b>: A {@link Sorter} implementation can be easily written from
* a {@link DocComparator document comparator} by using the
* {@link #sort(int, DocComparator)} helper method. This is especially useful
* when documents are directly comparable by their field values.
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class Sorter { final class Sorter {
final Sort sort;
/** A comparator that keeps documents in index order. */
public static final DocComparator INDEX_ORDER_COMPARATOR = new DocComparator() { /** Creates a new Sorter to sort the index with {@code sort} */
@Override Sorter(Sort sort) {
public int compare(int docID1, int docID2) { if (sort.needsScores()) {
return docID1 - docID2; throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
} }
}; this.sort = sort;
}
/** /**
* A permutation of doc IDs. For every document ID between <tt>0</tt> and * A permutation of doc IDs. For every document ID between <tt>0</tt> and
* {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
* return <code>docID</code>. * return <code>docID</code>.
*/ */
public static abstract class DocMap { static abstract class DocMap {
/** Given a doc ID from the original index, return its ordinal in the /** Given a doc ID from the original index, return its ordinal in the
* sorted index. */ * sorted index. */
public abstract int oldToNew(int docID); abstract int oldToNew(int docID);
/** Given the ordinal of a doc ID, return its doc ID in the original index. */ /** Given the ordinal of a doc ID, return its doc ID in the original index. */
public abstract int newToOld(int docID); abstract int newToOld(int docID);
/** Return the number of documents in this map. This must be equal to the /** Return the number of documents in this map. This must be equal to the
* {@link AtomicReader#maxDoc() number of documents} of the * {@link AtomicReader#maxDoc() number of documents} of the
* {@link AtomicReader} which is sorted. */ * {@link AtomicReader} which is sorted. */
public abstract int size(); abstract int size();
} }
/** Check consistency of a {@link DocMap}, useful for assertions. */ /** Check consistency of a {@link DocMap}, useful for assertions. */
@ -81,7 +81,7 @@ public abstract class Sorter {
} }
/** A comparator of doc IDs. */ /** A comparator of doc IDs. */
public static abstract class DocComparator { static abstract class DocComparator {
/** Compare docID1 against docID2. The contract for the return value is the /** Compare docID1 against docID2. The contract for the return value is the
* same as {@link Comparator#compare(Object, Object)}. */ * same as {@link Comparator#compare(Object, Object)}. */
@ -89,45 +89,13 @@ public abstract class Sorter {
} }
/**
* Sorts documents in reverse order. <b>NOTE</b>: This {@link Sorter} is not
* idempotent. Sorting an {@link AtomicReader} once or twice will return two
* different {@link AtomicReader} views. This {@link Sorter} should not be
* used with {@link SortingMergePolicy}.
*/
public static final Sorter REVERSE_DOCS = new Sorter() {
@Override
public DocMap sort(final AtomicReader reader) throws IOException {
final int maxDoc = reader.maxDoc();
return new DocMap() {
@Override
public int oldToNew(int docID) {
return maxDoc - docID - 1;
}
@Override
public int newToOld(int docID) {
return maxDoc - docID - 1;
}
@Override
public int size() {
return maxDoc;
}
};
}
@Override
public String getID() {
return "ReverseDocs";
}
};
private static final class DocValueSorter extends TimSorter { private static final class DocValueSorter extends TimSorter {
private final int[] docs; private final int[] docs;
private final Sorter.DocComparator comparator; private final Sorter.DocComparator comparator;
private final int[] tmp; private final int[] tmp;
public DocValueSorter(int[] docs, Sorter.DocComparator comparator) { DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
super(docs.length / 64); super(docs.length / 64);
this.docs = docs; this.docs = docs;
this.comparator = comparator; this.comparator = comparator;
@ -168,7 +136,7 @@ public abstract class Sorter {
} }
/** Computes the old-to-new permutation over the given comparator. */ /** Computes the old-to-new permutation over the given comparator. */
protected static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) { private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
// check if the index is sorted // check if the index is sorted
boolean sorted = true; boolean sorted = true;
for (int i = 1; i < maxDoc; ++i) { for (int i = 1; i < maxDoc; ++i) {
@ -242,20 +210,75 @@ public abstract class Sorter {
* <b>NOTE:</b> deleted documents are expected to appear in the mapping as * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
* well, they will however be marked as deleted in the sorted view. * well, they will however be marked as deleted in the sorted view.
*/ */
public abstract DocMap sort(AtomicReader reader) throws IOException; DocMap sort(AtomicReader reader) throws IOException {
SortField fields[] = sort.getSort();
final int reverseMul[] = new int[fields.length];
final FieldComparator<?> comparators[] = new FieldComparator[fields.length];
for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, i);
comparators[i].setNextReader(reader.getContext());
comparators[i].setScorer(FAKESCORER);
}
final DocComparator comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
try {
for (int i = 0; i < comparators.length; i++) {
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
// the segments are always the same here...
comparators[i].copy(0, docID1);
comparators[i].setBottom(0);
int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
if (comp != 0) {
return comp;
}
}
return Integer.compare(docID1, docID2); // docid order tiebreak
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
return sort(reader.maxDoc(), comparator);
}
/** /**
* Returns the identifier of this {@link Sorter}. * Returns the identifier of this {@link Sorter}.
* <p>This identifier is similar to {@link Object#hashCode()} and should be * <p>This identifier is similar to {@link Object#hashCode()} and should be
* chosen so that two instances of this class that sort documents likewise * chosen so that two instances of this class that sort documents likewise
* will have the same identifier. On the contrary, this identifier should be * will have the same identifier. On the contrary, this identifier should be
* different on different {@link Sorter sorters}. * different on different {@link Sort sorts}.
*/ */
public abstract String getID(); public String getID() {
return sort.toString();
}
@Override @Override
public String toString() { public String toString() {
return getID(); return getID();
} }
static final Scorer FAKESCORER = new Scorer(null) {
@Override
public float score() throws IOException { throw new UnsupportedOperationException(); }
@Override
public int freq() throws IOException { throw new UnsupportedOperationException(); }
@Override
public int docID() { throw new UnsupportedOperationException(); }
@Override
public int nextDoc() throws IOException { throw new UnsupportedOperationException(); }
@Override
public int advance(int target) throws IOException { throw new UnsupportedOperationException(); }
@Override
public long cost() { throw new UnsupportedOperationException(); }
};
} }

View File

@ -35,6 +35,7 @@ import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMFile; import org.apache.lucene.store.RAMFile;
@ -48,13 +49,13 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
/** /**
* An {@link AtomicReader} which supports sorting documents by a given * An {@link AtomicReader} which supports sorting documents by a given
* {@link Sorter}. You can use this class to sort an index as follows: * {@link Sort}. You can use this class to sort an index as follows:
* *
* <pre class="prettyprint"> * <pre class="prettyprint">
* IndexWriter writer; // writer to which the sorted index will be added * IndexWriter writer; // writer to which the sorted index will be added
* DirectoryReader reader; // reader on the input index * DirectoryReader reader; // reader on the input index
* Sorter sorter; // determines how the documents are sorted * Sort sort; // determines how the documents are sorted
* AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter); * AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
* writer.addIndexes(reader); * writer.addIndexes(reader);
* writer.close(); * writer.close();
* reader.close(); * reader.close();
@ -480,7 +481,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
/** /**
* A {@link Sorter} which sorts two parallel arrays of doc IDs and * A {@link TimSorter} which sorts two parallel arrays of doc IDs and
* offsets in one go. Everytime a doc ID is 'swapped', its correponding offset * offsets in one go. Everytime a doc ID is 'swapped', its correponding offset
* is swapped too. * is swapped too.
*/ */
@ -708,14 +709,14 @@ public class SortingAtomicReader extends FilterAtomicReader {
} }
/** Return a sorted view of <code>reader</code> according to the order /** Return a sorted view of <code>reader</code> according to the order
* defined by <code>sorter</code>. If the reader is already sorted, this * defined by <code>sort</code>. If the reader is already sorted, this
* method might return the reader as-is. */ * method might return the reader as-is. */
public static AtomicReader wrap(AtomicReader reader, Sorter sorter) throws IOException { public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
return wrap(reader, sorter.sort(reader)); return wrap(reader, new Sorter(sort).sort(reader));
} }
/** Expert: same as {@link #wrap(AtomicReader, Sorter)} but operates directly on a {@link Sorter.DocMap}. */ /** Expert: same as {@link #wrap(AtomicReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
public static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) { static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
if (docMap == null) { if (docMap == null) {
// the reader is already sorter // the reader is already sorter
return reader; return reader;

View File

@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
@ -33,22 +34,23 @@ import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/** A {@link MergePolicy} that reorders documents according to a {@link Sorter} /** A {@link MergePolicy} that reorders documents according to a {@link Sort}
* before merging them. As a consequence, all segments resulting from a merge * before merging them. As a consequence, all segments resulting from a merge
* will be sorted while segments resulting from a flush will be in the order * will be sorted while segments resulting from a flush will be in the order
* in which documents have been added. * in which documents have been added.
* <p><b>NOTE</b>: Never use this {@link MergePolicy} if you rely on * <p><b>NOTE</b>: Never use this policy if you rely on
* {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)} * {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments}
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs. * to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
* <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent * <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
* {@link Sorter}s so that the order of segments is predictable. For example, * so that the order of segments is predictable. For example, using
* using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make
* not idempotent) will make the order of documents in a segment depend on the * the order of documents in a segment depend on the number of times the segment
* number of times the segment has been merged. * has been merged.
* @lucene.experimental */ * @lucene.experimental */
public final class SortingMergePolicy extends MergePolicy { public final class SortingMergePolicy extends MergePolicy {
@ -147,12 +149,12 @@ public final class SortingMergePolicy extends MergePolicy {
} }
/** Returns true if the given reader is sorted by the given sorter. */ /** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */
public static boolean isSorted(AtomicReader reader, Sorter sorter) { public static boolean isSorted(AtomicReader reader, Sort sort) {
if (reader instanceof SegmentReader) { if (reader instanceof SegmentReader) {
final SegmentReader segReader = (SegmentReader) reader; final SegmentReader segReader = (SegmentReader) reader;
final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics(); final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
if (diagnostics != null && sorter.getID().equals(diagnostics.get(SORTER_ID_PROP))) { if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) {
return true; return true;
} }
} }
@ -172,11 +174,13 @@ public final class SortingMergePolicy extends MergePolicy {
final MergePolicy in; final MergePolicy in;
final Sorter sorter; final Sorter sorter;
final Sort sort;
/** Create a new {@link MergePolicy} that sorts documents with <code>sorter</code>. */ /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
public SortingMergePolicy(MergePolicy in, Sorter sorter) { public SortingMergePolicy(MergePolicy in, Sort sort) {
this.in = in; this.in = in;
this.sorter = sorter; this.sorter = new Sorter(sort);
this.sort = sort;
} }
@Override @Override
@ -200,7 +204,7 @@ public final class SortingMergePolicy extends MergePolicy {
@Override @Override
public MergePolicy clone() { public MergePolicy clone() {
return new SortingMergePolicy(in.clone(), sorter); return new SortingMergePolicy(in.clone(), sort);
} }
@Override @Override

View File

@ -17,19 +17,16 @@
--> -->
<html> <html>
<body> <body>
<p>Provides index sorting capablities. The application can use one of the <p>Provides index sorting capablities. The application can use any
pre-existing Sorter implementations, e.g. to sort by a Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to
{@link org.apache.lucene.index.sorter.NumericDocValuesSorter} reverse the order of the documents (by using SortField.Type.DOC in reverse).
or {@link org.apache.lucene.index.sorter.Sorter#REVERSE_DOCS reverse} the order Multi-level sorts can be specified the same way you would when searching, by
of the documents. Additionally, the application can implement a custom building Sort from multiple SortFields.
{@link org.apache.lucene.index.sorter.Sorter} which returns a permutation on
a source {@link org.apache.lucene.index.AtomicReader}'s document IDs, to sort
the input documents by additional criteria.
<p>{@link org.apache.lucene.index.sorter.SortingMergePolicy} can be used to <p>{@link org.apache.lucene.index.sorter.SortingMergePolicy} can be used to
make Lucene sort segments before merging them. This will ensure that every make Lucene sort segments before merging them. This will ensure that every
segment resulting from a merge will be sorted according to the provided segment resulting from a merge will be sorted according to the provided
{@link org.apache.lucene.index.sorter.Sorter}. This however makes merging and {@link org.apache.lucene.search.Sort}. This however makes merging and
thus indexing slower. thus indexing slower.
<p>Sorted segments allow for early query termination when the sort order <p>Sorted segments allow for early query termination when the sort order

View File

@ -24,6 +24,8 @@ import java.util.List;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@ -31,9 +33,9 @@ import org.junit.BeforeClass;
public class IndexSortingTest extends SorterTestBase { public class IndexSortingTest extends SorterTestBase {
private static final Sorter[] SORTERS = new Sorter[] { private static final Sort[] SORT = new Sort[] {
new NumericDocValuesSorter(NUMERIC_DV_FIELD, true), new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
Sorter.REVERSE_DOCS, new Sort(new SortField(null, SortField.Type.DOC, true))
}; };
@BeforeClass @BeforeClass
@ -47,13 +49,14 @@ public class IndexSortingTest extends SorterTestBase {
values.add(Integer.valueOf(reader.document(i).get(ID_FIELD))); values.add(Integer.valueOf(reader.document(i).get(ID_FIELD)));
} }
} }
Sorter sorter = SORTERS[random().nextInt(SORTERS.length)]; int idx = random().nextInt(SORT.length);
if (sorter == Sorter.REVERSE_DOCS) { Sort sorter = SORT[idx];
if (idx == 1) { // reverse doc sort
Collections.reverse(values); Collections.reverse(values);
} else { } else {
Collections.sort(values); Collections.sort(values);
if (sorter instanceof NumericDocValuesSorter && random().nextBoolean()) { if (random().nextBoolean()) {
sorter = new NumericDocValuesSorter(NUMERIC_DV_FIELD, false); // descending sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
Collections.reverse(values); Collections.reverse(values);
} }
} }

View File

@ -17,56 +17,37 @@ package org.apache.lucene.index.sorter;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass; import org.junit.BeforeClass;
public class SortingAtomicReaderTest extends SorterTestBase { public class SortingAtomicReaderTest extends SorterTestBase {
@BeforeClass @BeforeClass
public static void beforeClassSortingAtomicReaderTest() throws Exception { public static void beforeClassSortingAtomicReaderTest() throws Exception {
// build the mapping from the reader, since we deleted documents, some of
// them might have disappeared from the index (e.g. if an entire segment is // sort the index by id (as integer, in NUMERIC_DV_FIELD)
// dropped b/c all its docs are deleted) Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT));
final int[] values = new int[reader.maxDoc()]; final Sorter.DocMap docMap = new Sorter(sort).sort(reader);
for (int i = 0; i < reader.maxDoc(); i++) {
values[i] = Integer.valueOf(reader.document(i).get(ID_FIELD));
}
final Sorter.DocComparator comparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final int v1 = values[docID1];
final int v2 = values[docID2];
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
}
};
final Sorter.DocMap docMap = Sorter.sort(reader.maxDoc(), comparator);
// Sorter.compute also sorts the values // Sorter.compute also sorts the values
NumericDocValues dv = reader.getNumericDocValues(NUMERIC_DV_FIELD);
sortedValues = new Integer[reader.maxDoc()]; sortedValues = new Integer[reader.maxDoc()];
for (int i = 0; i < reader.maxDoc(); ++i) { for (int i = 0; i < reader.maxDoc(); ++i) {
sortedValues[docMap.oldToNew(i)] = values[i]; sortedValues[docMap.oldToNew(i)] = (int)dv.get(i);
} }
if (VERBOSE) { if (VERBOSE) {
System.out.println("docMap: " + docMap); System.out.println("docMap: " + docMap);
System.out.println("sortedValues: " + Arrays.toString(sortedValues)); System.out.println("sortedValues: " + Arrays.toString(sortedValues));
} }
reader = SortingAtomicReader.wrap(reader, new Sorter() { // sort the index by id (as integer, in NUMERIC_DV_FIELD)
@Override reader = SortingAtomicReader.wrap(reader, sort);
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
return docMap;
}
@Override
public String getID() {
return ID_FIELD;
}
});
if (VERBOSE) { if (VERBOSE) {
System.out.print("mapped-deleted-docs: "); System.out.print("mapped-deleted-docs: ");
@ -81,5 +62,14 @@ public class SortingAtomicReaderTest extends SorterTestBase {
TestUtil.checkReader(reader); TestUtil.checkReader(reader);
} }
public void testBadSort() throws Exception {
try {
SortingAtomicReader.wrap(reader, Sort.RELEVANCE);
fail("Didn't get expected exception");
} catch (IllegalArgumentException e) {
assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
}
}
} }

View File

@ -37,6 +37,8 @@ import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -89,47 +91,14 @@ public class TestBlockJoinSorter extends LuceneTestCase {
final AtomicReader reader = getOnlySegmentReader(indexReader); final AtomicReader reader = getOnlySegmentReader(indexReader);
final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
final FixedBitSet parentBits = (FixedBitSet) parentsFilter.getDocIdSet(reader.getContext(), null); final FixedBitSet parentBits = (FixedBitSet) parentsFilter.getDocIdSet(reader.getContext(), null);
final NumericDocValues parentValues = reader.getNumericDocValues("parent_val"); final NumericDocValues parentValues = reader.getNumericDocValues("parent_val");
final Sorter.DocComparator parentComparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
assertTrue(parentBits.get(docID1));
assertTrue(parentBits.get(docID2));
return Long.compare(parentValues.get(docID1), parentValues.get(docID2));
}
};
final NumericDocValues childValues = reader.getNumericDocValues("child_val"); final NumericDocValues childValues = reader.getNumericDocValues("child_val");
final Sorter.DocComparator childComparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
assertFalse(parentBits.get(docID1));
assertFalse(parentBits.get(docID2));
return Long.compare(childValues.get(docID1), childValues.get(docID2));
}
};
final Sorter sorter = new BlockJoinSorter(parentsFilter) { final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG));
final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
@Override
public String getID() {
return "Dummy";
}
@Override
protected DocComparator getParentComparator(AtomicReader r) {
assertEquals(reader, r);
return parentComparator;
}
@Override final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
protected DocComparator getChildComparator(AtomicReader r) { final Sorter sorter = new Sorter(sort);
assertEquals(reader, r);
return childComparator;
}
};
final Sorter.DocMap docMap = sorter.sort(reader); final Sorter.DocMap docMap = sorter.sort(reader);
assertEquals(reader.maxDoc(), docMap.size()); assertEquals(reader.maxDoc(), docMap.size());

View File

@ -51,14 +51,14 @@ public class TestEarlyTermination extends LuceneTestCase {
private int numDocs; private int numDocs;
private List<String> terms; private List<String> terms;
private Directory dir; private Directory dir;
private Sorter sorter; private Sort sort;
private RandomIndexWriter iw; private RandomIndexWriter iw;
private IndexReader reader; private IndexReader reader;
@Override @Override
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
sorter = new NumericDocValuesSorter("ndv1"); sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
} }
private Document randomDocument() { private Document randomDocument() {
@ -80,7 +80,7 @@ public class TestEarlyTermination extends LuceneTestCase {
terms = new ArrayList<String>(randomTerms); terms = new ArrayList<String>(randomTerms);
final long seed = random().nextLong(); final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sorter)); iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
iw = new RandomIndexWriter(new Random(seed), dir, iwc); iw = new RandomIndexWriter(new Random(seed), dir, iwc);
for (int i = 0; i < numDocs; ++i) { for (int i = 0; i < numDocs; ++i) {
final Document doc = randomDocument(); final Document doc = randomDocument();
@ -120,7 +120,7 @@ public class TestEarlyTermination extends LuceneTestCase {
for (int i = 0; i < iters; ++i) { for (int i = 0; i < iters; ++i) {
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
searcher.search(query, collector1); searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sorter, numHits)); searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
} }
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
@ -144,7 +144,8 @@ public class TestEarlyTermination extends LuceneTestCase {
for (int i = 0; i < iters; ++i) { for (int i = 0; i < iters; ++i) {
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
searcher.search(query, collector1); searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, new NumericDocValuesSorter("ndv2"), numHits) { Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
@Override @Override
public void setNextReader(AtomicReaderContext context) throws IOException { public void setNextReader(AtomicReaderContext context) throws IOException {
super.setNextReader(context); super.setNextReader(context);

View File

@ -40,6 +40,8 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@ -50,14 +52,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
private List<String> terms; private List<String> terms;
private Directory dir1, dir2; private Directory dir1, dir2;
private Sorter sorter; private Sort sort;
private IndexReader reader; private IndexReader reader;
private IndexReader sortedReader; private IndexReader sortedReader;
@Override @Override
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
sorter = new NumericDocValuesSorter("ndv"); sort = new Sort(new SortField("ndv", SortField.Type.LONG));
createRandomIndexes(); createRandomIndexes();
} }
@ -68,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
return doc; return doc;
} }
static MergePolicy newSortingMergePolicy(Sorter sorter) { static MergePolicy newSortingMergePolicy(Sort sort) {
// create a MP with a low merge factor so that many merges happen // create a MP with a low merge factor so that many merges happen
MergePolicy mp; MergePolicy mp;
if (random().nextBoolean()) { if (random().nextBoolean()) {
@ -83,7 +85,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
mp = lmp; mp = lmp;
} }
// wrap it with a sorting mp // wrap it with a sorting mp
return new SortingMergePolicy(mp, sorter); return new SortingMergePolicy(mp, sort);
} }
private void createRandomIndexes() throws IOException { private void createRandomIndexes() throws IOException {
@ -99,7 +101,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
final long seed = random().nextLong(); final long seed = random().nextLong();
final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
iwc2.setMergePolicy(newSortingMergePolicy(sorter)); iwc2.setMergePolicy(newSortingMergePolicy(sort));
final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1); final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2); final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
for (int i = 0; i < numDocs; ++i) { for (int i = 0; i < numDocs; ++i) {
@ -162,7 +164,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
} }
public void testSortingMP() throws IOException { public void testSortingMP() throws IOException {
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter); final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader); final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
assertSorted(sortedReader1); assertSorted(sortedReader1);
@ -170,5 +172,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
assertReaderEquals("", sortedReader1, sortedReader2); assertReaderEquals("", sortedReader1, sortedReader2);
} }
public void testBadSort() throws Exception {
try {
new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE);
fail("Didn't get expected exception");
} catch (IllegalArgumentException e) {
assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
}
}
} }

View File

@ -46,17 +46,12 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FilterAtomicReader; import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector; import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
import org.apache.lucene.index.sorter.Sorter;
import org.apache.lucene.index.sorter.SortingAtomicReader;
import org.apache.lucene.index.sorter.SortingMergePolicy; import org.apache.lucene.index.sorter.SortingMergePolicy;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
@ -117,9 +112,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
/** Analyzer used at index time */ /** Analyzer used at index time */
protected final Analyzer indexAnalyzer; protected final Analyzer indexAnalyzer;
final Version matchVersion; final Version matchVersion;
private final File indexPath; private final Directory dir;
final int minPrefixChars; final int minPrefixChars;
private Directory dir;
/** Used for ongoing NRT additions/updates. */ /** Used for ongoing NRT additions/updates. */
private IndexWriter writer; private IndexWriter writer;
@ -131,16 +125,19 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* PrefixQuery is used (4). */ * PrefixQuery is used (4). */
public static final int DEFAULT_MIN_PREFIX_CHARS = 4; public static final int DEFAULT_MIN_PREFIX_CHARS = 4;
private Sorter sorter; /** How we sort the postings and search results. */
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
/** Create a new instance, loading from a previously built /** Create a new instance, loading from a previously built
* directory, if it exists. */ * directory, if it exists. Note that {@link #close}
public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException { * will also close the provided directory. */
this(matchVersion, indexPath, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS); public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
this(matchVersion, dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
} }
/** Create a new instance, loading from a previously built /** Create a new instance, loading from a previously built
* directory, if it exists. * directory, if it exists. Note that {@link #close}
* will also close the provided directory.
* *
* @param minPrefixChars Minimum number of leading characters * @param minPrefixChars Minimum number of leading characters
* before PrefixQuery is used (default 4). * before PrefixQuery is used (default 4).
@ -148,7 +145,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* ngrams (increasing index size but making lookups * ngrams (increasing index size but making lookups
* faster). * faster).
*/ */
public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException { public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
if (minPrefixChars < 0) { if (minPrefixChars < 0) {
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars); throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
@ -157,33 +154,29 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
this.queryAnalyzer = queryAnalyzer; this.queryAnalyzer = queryAnalyzer;
this.indexAnalyzer = indexAnalyzer; this.indexAnalyzer = indexAnalyzer;
this.matchVersion = matchVersion; this.matchVersion = matchVersion;
this.indexPath = indexPath; this.dir = dir;
this.minPrefixChars = minPrefixChars; this.minPrefixChars = minPrefixChars;
dir = getDirectory(indexPath);
if (DirectoryReader.indexExists(dir)) { if (DirectoryReader.indexExists(dir)) {
// Already built; open it: // Already built; open it:
initSorter();
writer = new IndexWriter(dir, writer = new IndexWriter(dir,
getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.APPEND)); getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
searcherMgr = new SearcherManager(writer, true, null); searcherMgr = new SearcherManager(writer, true, null);
} }
} }
/** Override this to customize index settings, e.g. which /** Override this to customize index settings, e.g. which
* codec to use. Sorter is null if this config is for * codec to use. */
* the first pass writer. */ protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sorter sorter, IndexWriterConfig.OpenMode openMode) {
IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer); IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
iwc.setCodec(new Lucene46Codec()); iwc.setCodec(new Lucene46Codec());
iwc.setOpenMode(openMode); iwc.setOpenMode(openMode);
if (sorter != null) { // This way all merged segments will be sorted at
// This way all merged segments will be sorted at // merge time, allow for per-segment early termination
// merge time, allow for per-segment early termination // when those segments are searched:
// when those segments are searched: iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));
iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), sorter));
}
return iwc; return iwc;
} }
@ -206,16 +199,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
writer = null; writer = null;
} }
Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
IndexWriter w = null;
AtomicReader r = null; AtomicReader r = null;
boolean success = false; boolean success = false;
try { try {
// First pass: build a temporary normal Lucene index, // First pass: build a temporary normal Lucene index,
// just indexing the suggestions as they iterate: // just indexing the suggestions as they iterate:
w = new IndexWriter(dirTmp, writer = new IndexWriter(dir,
getIndexWriterConfig(matchVersion, getGramAnalyzer(), null, IndexWriterConfig.OpenMode.CREATE)); getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
BytesRef text; BytesRef text;
Document doc = new Document(); Document doc = new Document();
FieldType ft = getTextFieldType(); FieldType ft = getTextFieldType();
@ -253,37 +243,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (iter.hasPayloads()) { if (iter.hasPayloads()) {
payloadField.setBytesValue(iter.payload()); payloadField.setBytesValue(iter.payload());
} }
w.addDocument(doc); writer.addDocument(doc);
} }
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
// Second pass: sort the entire index:
r = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(w, false));
//long t1 = System.nanoTime();
// We can rollback the first pass, now that have have
// the reader open, because we will discard it anyway
// (no sense in fsync'ing it):
w.rollback();
initSorter();
r = SortingAtomicReader.wrap(r, sorter);
writer = new IndexWriter(dir,
getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.CREATE));
writer.addIndexes(new IndexReader[] {r});
r.close();
//System.out.println("sort time: " + ((System.nanoTime()-t1)/1000000) + " msec");
searcherMgr = new SearcherManager(writer, true, null); searcherMgr = new SearcherManager(writer, true, null);
success = true; success = true;
} finally { } finally {
if (success) { if (success) {
IOUtils.close(w, r, dirTmp); IOUtils.close(r);
} else { } else {
IOUtils.closeWhileHandlingException(w, writer, r, dirTmp); IOUtils.closeWhileHandlingException(writer, r);
writer = null; writer = null;
} }
} }
@ -359,39 +329,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
searcherMgr.maybeRefreshBlocking(); searcherMgr.maybeRefreshBlocking();
} }
private void initSorter() {
sorter = new Sorter() {
@Override
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
final NumericDocValues weights = reader.getNumericDocValues("weight");
final Sorter.DocComparator comparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final long v1 = weights.get(docID1);
final long v2 = weights.get(docID2);
// Reverse sort (highest weight first);
// java7 only:
//return Long.compare(v2, v1);
if (v1 > v2) {
return -1;
} else if (v1 < v2) {
return 1;
} else {
return 0;
}
}
};
return Sorter.sort(reader.maxDoc(), comparator);
}
@Override
public String getID() {
return "BySuggestWeight";
}
};
}
/** /**
* Subclass can override this method to change the field type of the text field * Subclass can override this method to change the field type of the text field
* e.g. to change the index options * e.g. to change the index options
@ -497,12 +434,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
//System.out.println("finalQuery=" + query); //System.out.println("finalQuery=" + query);
// Sort by weight, descending: // Sort by weight, descending:
TopFieldCollector c = TopFieldCollector.create(new Sort(new SortField("weight", SortField.Type.LONG, true)), TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false);
num, true, false, false, false);
// We sorted postings by weight during indexing, so we // We sorted postings by weight during indexing, so we
// only retrieve the first num hits now: // only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, sorter, num); Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
IndexSearcher searcher = searcherMgr.acquire(); IndexSearcher searcher = searcherMgr.acquire();
List<LookupResult> results = null; List<LookupResult> results = null;
try { try {
@ -512,7 +448,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
TopFieldDocs hits = (TopFieldDocs) c.topDocs(); TopFieldDocs hits = (TopFieldDocs) c.topDocs();
// Slower way if postings are not pre-sorted by weight: // Slower way if postings are not pre-sorted by weight:
// hits = searcher.search(query, null, num, new Sort(new SortField("weight", SortField.Type.LONG, true))); // hits = searcher.search(query, null, num, SORT);
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
} finally { } finally {
searcherMgr.release(searcher); searcherMgr.release(searcher);
@ -676,11 +612,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} }
if (writer != null) { if (writer != null) {
writer.close(); writer.close();
writer = null;
}
if (dir != null) {
dir.close(); dir.close();
dir = null; writer = null;
} }
} }

View File

@ -17,7 +17,6 @@ package org.apache.lucene.search.suggest.analyzing;
* limitations under the License. * limitations under the License.
*/ */
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
@ -38,6 +37,7 @@ import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -92,8 +92,8 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
* Create a new instance, loading from a previously built * Create a new instance, loading from a previously built
* directory, if it exists. * directory, if it exists.
*/ */
public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException { public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
super(matchVersion, indexPath, analyzer); super(matchVersion, dir, analyzer);
this.blenderType = BlenderType.POSITION_LINEAR; this.blenderType = BlenderType.POSITION_LINEAR;
this.numFactor = DEFAULT_NUM_FACTOR; this.numFactor = DEFAULT_NUM_FACTOR;
} }
@ -106,9 +106,9 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
* @param numFactor Factor to multiply the number of searched elements before ponderate * @param numFactor Factor to multiply the number of searched elements before ponderate
* @throws IOException If there are problems opening the underlying Lucene index. * @throws IOException If there are problems opening the underlying Lucene index.
*/ */
public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer, public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int minPrefixChars, BlenderType blenderType, int numFactor) throws IOException { int minPrefixChars, BlenderType blenderType, int numFactor) throws IOException {
super(matchVersion, indexPath, indexAnalyzer, queryAnalyzer, minPrefixChars); super(matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars);
this.blenderType = blenderType; this.blenderType = blenderType;
this.numFactor = numFactor; this.numFactor = numFactor;
} }

View File

@ -40,6 +40,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup; import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup; import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup; import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.*; import org.apache.lucene.util.*;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Ignore; import org.junit.Ignore;
@ -161,7 +162,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
} catch (InstantiationException e) { } catch (InstantiationException e) {
Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false); Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
if (cls == AnalyzingInfixSuggester.class) { if (cls == AnalyzingInfixSuggester.class) {
lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, TestUtil.getTempDir("LookupBenchmarkTest"), a); lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, FSDirectory.open(TestUtil.getTempDir("LookupBenchmarkTest")), a);
} else { } else {
Constructor<? extends Lookup> ctor = cls.getConstructor(Analyzer.class); Constructor<? extends Lookup> ctor = cls.getConstructor(Analyzer.class);
lookup = ctor.newInstance(a); lookup = ctor.newInstance(a);

View File

@ -21,7 +21,6 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
@ -39,7 +38,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -55,15 +53,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
@ -106,22 +97,12 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
assertEquals(2, suggester.getCount()); assertEquals(2, suggester.getCount());
suggester.close(); suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size()); assertEquals(2, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key); assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
@ -159,15 +140,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
@Override @Override
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException { protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) { try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
@ -239,17 +213,11 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("lend me your ear", 8, new BytesRef("foobar")), new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
int minPrefixLength = random().nextInt(10); int minPrefixLength = random().nextInt(10);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
for(int i=0;i<2;i++) { for(int i=0;i<2;i++) {
@ -306,12 +274,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
// Make sure things still work after close and reopen: // Make sure things still work after close and reopen:
suggester.close(); suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) { suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
} }
suggester.close(); suggester.close();
} }
@ -321,15 +284,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size()); assertEquals(1, results.size());
@ -342,15 +298,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")), new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size()); assertEquals(1, results.size());
@ -359,18 +308,13 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
// Try again, but overriding addPrefixMatch to highlight // Try again, but overriding addPrefixMatch to highlight
// the entire hit: // the entire hit:
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
@Override @Override
protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) { protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
sb.append("<b>"); sb.append("<b>");
sb.append(surface); sb.append(surface);
sb.append("</b>"); sb.append("</b>");
} }
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
}; };
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true); results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
@ -384,15 +328,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
suggester.close(); suggester.close();
suggester.close(); suggester.close();
@ -418,14 +355,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
} }
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), indexAnalyzer, queryAnalyzer, 3);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, indexAnalyzer, queryAnalyzer, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
Input keys[] = new Input[] { Input keys[] = new Input[] {
new Input("a bob for apples", 10, new BytesRef("foobaz")), new Input("a bob for apples", 10, new BytesRef("foobaz")),
@ -439,14 +369,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
} }
public void testEmptyAtStart() throws Exception { public void testEmptyAtStart() throws Exception {
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(new Input[0])); suggester.build(new InputArrayIterator(new Input[0]));
suggester.add(new BytesRef("a penny saved is a penny earned"), 10, new BytesRef("foobaz")); suggester.add(new BytesRef("a penny saved is a penny earned"), 10, new BytesRef("foobaz"));
suggester.add(new BytesRef("lend me your ear"), 8, new BytesRef("foobar")); suggester.add(new BytesRef("lend me your ear"), 8, new BytesRef("foobar"));
@ -483,14 +407,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
} }
public void testBothExactAndPrefix() throws Exception { public void testBothExactAndPrefix() throws Exception {
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(new Input[0])); suggester.build(new InputArrayIterator(new Input[0]));
suggester.add(new BytesRef("the pen is pretty"), 10, new BytesRef("foobaz")); suggester.add(new BytesRef("the pen is pretty"), 10, new BytesRef("foobaz"));
suggester.refresh(); suggester.refresh();
@ -563,12 +481,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
System.out.println(" minPrefixChars=" + minPrefixChars); System.out.println(" minPrefixChars=" + minPrefixChars);
} }
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
// Initial suggester built with nothing: // Initial suggester built with nothing:
suggester.build(new InputArrayIterator(new Input[0])); suggester.build(new InputArrayIterator(new Input[0]));
@ -648,12 +561,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
} }
lookupThread.finish(); lookupThread.finish();
suggester.close(); suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) { suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
lookupThread = new LookupThread(suggester); lookupThread = new LookupThread(suggester);
lookupThread.start(); lookupThread.start();
@ -824,15 +732,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("lend me your ear", 8, new BytesRef("foobar")), new Input("lend me your ear", 8, new BytesRef("foobar")),
}; };
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@ -49,15 +48,10 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
File tempDir = TestUtil.getTempDir("BlendedInfixSuggesterTest"); File tempDir = TestUtil.getTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_LINEAR, BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR) { BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
// we query for star wars and check that the weight // we query for star wars and check that the weight
@ -94,12 +88,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
// BlenderType.LINEAR is used by default (remove position*10%) // BlenderType.LINEAR is used by default (remove position*10%)
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a) { BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
assertEquals(w, getInResults(suggester, "top", pl, 1)); assertEquals(w, getInResults(suggester, "top", pl, 1));
@ -109,13 +98,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
suggester.close(); suggester.close();
// BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word // BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) { AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
assertEquals(w, getInResults(suggester, "top", pl, 1)); assertEquals(w, getInResults(suggester, "top", pl, 1));
@ -145,13 +129,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
// if factor is small, we don't get the expected element // if factor is small, we don't get the expected element
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) { AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
@ -169,13 +148,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
suggester.close(); suggester.close();
// if we increase the factor we have it // if we increase the factor we have it
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2) { AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));
// we have it // we have it
@ -205,14 +179,9 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
// if factor is small, we don't get the expected element // if factor is small, we don't get the expected element
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR) { BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester.build(new InputArrayIterator(keys)); suggester.build(new InputArrayIterator(keys));

View File

@ -440,8 +440,10 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
if (dir instanceof BaseDirectoryWrapper) { if (dir instanceof BaseDirectoryWrapper) {
((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves. ((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
} }
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setInfoStream(new FailOnNonBulkMergesInfoStream()); analyzer).setInfoStream(new FailOnNonBulkMergesInfoStream());
if (LuceneTestCase.TEST_NIGHTLY) { if (LuceneTestCase.TEST_NIGHTLY) {
// newIWConfig makes smallish max seg size, which // newIWConfig makes smallish max seg size, which

View File

@ -76,7 +76,16 @@ Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1 Apache UIMA 2.3.1
Apache ZooKeeper 3.4.5 Apache ZooKeeper 3.4.5
Upgrading from Solr 4.7
----------------------
* In previous versions of Solr, Terms that exceeded Lucene's MAX_TERM_LENGTH were
silently ignored when indexing documents. Begining with Solr 4.8, a document
an error will be generated when attempting to index a document with a term
that is too large. If you wish to continue to have large terms ignored,
use "solr.LengthFilterFactory" in all of your Analyzers. See LUCENE-5472 for
more details.
Detailed Change List Detailed Change List
---------------------- ----------------------
@ -88,6 +97,13 @@ New Features
* SOLR-5183: JSON updates now support nested child documents using a * SOLR-5183: JSON updates now support nested child documents using a
"_childDocument_" object key. (Varun Thacker, hossman) "_childDocument_" object key. (Varun Thacker, hossman)
* SOLR-5714: You can now use one pool of memory for for the HDFS block cache
that all collections share. (Mark Miller, Gregory Chanan)
* SOLR-5720: Add ExpandComponent to expand results collapsed by the
CollapsingQParserPlugin. (Joel Bernstein)
Bug Fixes Bug Fixes
---------------------- ----------------------
@ -113,6 +129,15 @@ Bug Fixes
* SOLR-5761: HttpSolrServer has a few fields that can be set via setters but * SOLR-5761: HttpSolrServer has a few fields that can be set via setters but
are not volatile. (Mark Miller, Gregory Chanan) are not volatile. (Mark Miller, Gregory Chanan)
* SOLR-5811: The Overseer will retry work items until success, which is a serious
problem if you hit a bad work item. (Mark Miller)
* SOLR-5796: Increase how long we are willing to wait for a core to see the ZK
advertised leader in it's local state. (Timothy Potter, Mark Miller)
* SOLR-5818: distrib search with custom comparator does not quite work correctly
(Ryan Ernst)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
@ -148,6 +173,15 @@ Other Changes
registration exists, wait a short time to see if it goes away. registration exists, wait a short time to see if it goes away.
(Mark Miller) (Mark Miller)
* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException
if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous
behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
(hossman, Mike McCandless, Varun Thacker)
* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
advertised leader in it's local state configurable.
(Timothy Potter via Mark Miller)
================== 4.7.0 ================== ================== 4.7.0 ==================
Versions of Major Components Versions of Major Components

View File

@ -123,7 +123,16 @@ public class Overseer {
else if (LeaderStatus.YES == isLeader) { else if (LeaderStatus.YES == isLeader) {
final ZkNodeProps message = ZkNodeProps.load(head); final ZkNodeProps message = ZkNodeProps.load(head);
final String operation = message.getStr(QUEUE_OPERATION); final String operation = message.getStr(QUEUE_OPERATION);
clusterState = processMessage(clusterState, message, operation); try {
clusterState = processMessage(clusterState, message, operation);
} catch (Exception e) {
// generally there is nothing we can do - in most cases, we have
// an issue that will fail again on retry or we cannot communicate with
// ZooKeeper in which case another Overseer should take over
// TODO: if ordering for the message is not important, we could
// track retries and put it back on the end of the queue
log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
}
zkClient.setData(ZkStateReader.CLUSTER_STATE, zkClient.setData(ZkStateReader.CLUSTER_STATE,
ZkStateReader.toJSON(clusterState), true); ZkStateReader.toJSON(clusterState), true);
@ -189,8 +198,16 @@ public class Overseer {
while (head != null) { while (head != null) {
final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
final String operation = message.getStr(QUEUE_OPERATION); final String operation = message.getStr(QUEUE_OPERATION);
try {
clusterState = processMessage(clusterState, message, operation); clusterState = processMessage(clusterState, message, operation);
} catch (Exception e) {
// generally there is nothing we can do - in most cases, we have
// an issue that will fail again on retry or we cannot communicate with
// ZooKeeper in which case another Overseer should take over
// TODO: if ordering for the message is not important, we could
// track retries and put it back on the end of the queue
log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
}
workQueue.offer(head.getBytes()); workQueue.offer(head.getBytes());
stateUpdateQueue.poll(); stateUpdateQueue.poll();
@ -294,6 +311,7 @@ public class Overseer {
private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) { private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) {
log.info("createReplica() {} ", message); log.info("createReplica() {} ", message);
String coll = message.getStr(ZkStateReader.COLLECTION_PROP); String coll = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String slice = message.getStr(ZkStateReader.SHARD_ID_PROP); String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
Slice sl = clusterState.getSlice(coll, slice); Slice sl = clusterState.getSlice(coll, slice);
if(sl == null){ if(sl == null){
@ -334,6 +352,7 @@ public class Overseer {
private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) { private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP); String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
log.info("Update shard state invoked for collection: " + collection + " with message: " + message); log.info("Update shard state invoked for collection: " + collection + " with message: " + message);
for (String key : message.keySet()) { for (String key : message.keySet()) {
if (ZkStateReader.COLLECTION_PROP.equals(key)) continue; if (ZkStateReader.COLLECTION_PROP.equals(key)) continue;
@ -358,6 +377,7 @@ public class Overseer {
private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) { private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP); String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
String routeKey = message.getStr("routeKey"); String routeKey = message.getStr("routeKey");
String range = message.getStr("range"); String range = message.getStr("range");
@ -397,8 +417,22 @@ public class Overseer {
return clusterState; return clusterState;
} }
private boolean checkCollectionKeyExistence(ZkNodeProps message) {
return checkKeyExistence(message, ZkStateReader.COLLECTION_PROP);
}
private boolean checkKeyExistence(ZkNodeProps message, String key) {
String value = message.getStr(key);
if (value == null || value.trim().length() == 0) {
log.error("Skipping invalid Overseer message because it has no " + key + " specified: " + message);
return false;
}
return true;
}
private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) { private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP); String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
String routeKeyStr = message.getStr("routeKey"); String routeKeyStr = message.getStr("routeKey");
@ -424,6 +458,7 @@ public class Overseer {
private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) { private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP); String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP); String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP);
Slice slice = clusterState.getSlice(collection, shardId); Slice slice = clusterState.getSlice(collection, shardId);
if (slice == null) { if (slice == null) {
@ -470,6 +505,7 @@ public class Overseer {
private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) { private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP); String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
if(collection==null || sliceName == null){ if(collection==null || sliceName == null){
@ -488,32 +524,30 @@ public class Overseer {
/** /**
* Try to assign core to the cluster. * Try to assign core to the cluster.
*/ */
private ClusterState updateState(ClusterState state, final ZkNodeProps message) { private ClusterState updateState(ClusterState clusterState, final ZkNodeProps message) {
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
assert collection.length() > 0 : message; if (!checkCollectionKeyExistence(message)) return clusterState;
Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null); Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null);
log.info("Update state numShards={} message={}", numShards, message); log.info("Update state numShards={} message={}", numShards, message);
List<String> shardNames = new ArrayList<String>(); List<String> shardNames = new ArrayList<String>();
//collection does not yet exist, create placeholders if num shards is specified //collection does not yet exist, create placeholders if num shards is specified
boolean collectionExists = state.hasCollection(collection); boolean collectionExists = clusterState.hasCollection(collection);
if (!collectionExists && numShards!=null) { if (!collectionExists && numShards!=null) {
getShardNames(numShards, shardNames); getShardNames(numShards, shardNames);
state = createCollection(state, collection, shardNames, message); clusterState = createCollection(clusterState, collection, shardNames, message);
} }
String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP); String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
if (coreNodeName == null) { if (coreNodeName == null) {
coreNodeName = getAssignedCoreNodeName(state, message); coreNodeName = getAssignedCoreNodeName(clusterState, message);
if (coreNodeName != null) { if (coreNodeName != null) {
log.info("node=" + coreNodeName + " is already registered"); log.info("node=" + coreNodeName + " is already registered");
} else { } else {
// if coreNodeName is null, auto assign one // if coreNodeName is null, auto assign one
coreNodeName = Assign.assignNode(collection, state); coreNodeName = Assign.assignNode(collection, clusterState);
} }
message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP, message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP,
coreNodeName); coreNodeName);
@ -522,7 +556,7 @@ public class Overseer {
// use the provided non null shardId // use the provided non null shardId
if (sliceName == null) { if (sliceName == null) {
//get shardId from ClusterState //get shardId from ClusterState
sliceName = getAssignedId(state, coreNodeName, message); sliceName = getAssignedId(clusterState, coreNodeName, message);
if (sliceName != null) { if (sliceName != null) {
log.info("shard=" + sliceName + " is already registered"); log.info("shard=" + sliceName + " is already registered");
} }
@ -531,14 +565,14 @@ public class Overseer {
//request new shardId //request new shardId
if (collectionExists) { if (collectionExists) {
// use existing numShards // use existing numShards
numShards = state.getCollection(collection).getSlices().size(); numShards = clusterState.getCollection(collection).getSlices().size();
log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards); log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards);
} }
sliceName = Assign.assignShard(collection, state, numShards); sliceName = Assign.assignShard(collection, clusterState, numShards);
log.info("Assigning new node to shard shard=" + sliceName); log.info("Assigning new node to shard shard=" + sliceName);
} }
Slice slice = state.getSlice(collection, sliceName); Slice slice = clusterState.getSlice(collection, sliceName);
Map<String,Object> replicaProps = new LinkedHashMap<String,Object>(); Map<String,Object> replicaProps = new LinkedHashMap<String,Object>();
@ -584,9 +618,9 @@ public class Overseer {
Map<String,Replica> replicas; Map<String,Replica> replicas;
if (slice != null) { if (slice != null) {
state = checkAndCompleteShardSplit(state, collection, coreNodeName, sliceName, replicaProps); clusterState = checkAndCompleteShardSplit(clusterState, collection, coreNodeName, sliceName, replicaProps);
// get the current slice again because it may have been updated due to checkAndCompleteShardSplit method // get the current slice again because it may have been updated due to checkAndCompleteShardSplit method
slice = state.getSlice(collection, sliceName); slice = clusterState.getSlice(collection, sliceName);
sliceProps = slice.getProperties(); sliceProps = slice.getProperties();
replicas = slice.getReplicasCopy(); replicas = slice.getReplicasCopy();
} else { } else {
@ -600,7 +634,7 @@ public class Overseer {
replicas.put(replica.getName(), replica); replicas.put(replica.getName(), replica);
slice = new Slice(sliceName, replicas, sliceProps); slice = new Slice(sliceName, replicas, sliceProps);
ClusterState newClusterState = updateSlice(state, collection, slice); ClusterState newClusterState = updateSlice(clusterState, collection, slice);
return newClusterState; return newClusterState;
} }
@ -849,13 +883,9 @@ public class Overseer {
* Remove collection from cloudstate * Remove collection from cloudstate
*/ */
private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) { private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) {
final String collection = message.getStr("name"); final String collection = message.getStr("name");
if (!checkKeyExistence(message, "name")) return clusterState;
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
// newCollections.remove(collection);
// ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections);
return clusterState.copyWith(singletonMap(collection, (DocCollection)null)); return clusterState.copyWith(singletonMap(collection, (DocCollection)null));
} }
@ -863,32 +893,28 @@ public class Overseer {
* Remove collection slice from cloudstate * Remove collection slice from cloudstate
*/ */
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) { private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP); final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate"); log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
DocCollection coll = clusterState.getCollection(collection); DocCollection coll = clusterState.getCollection(collection);
Map<String, Slice> newSlices = new LinkedHashMap<String, Slice>(coll.getSlicesMap()); Map<String, Slice> newSlices = new LinkedHashMap<String, Slice>(coll.getSlicesMap());
newSlices.remove(sliceId); newSlices.remove(sliceId);
DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter()); DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter());
// newCollections.put(newCollection.getName(), newCollection);
return newState(clusterState, singletonMap(collection,newCollection)); return newState(clusterState, singletonMap(collection,newCollection));
// return new ClusterState(clusterState.getLiveNodes(), newCollections);
} }
/* /*
* Remove core from cloudstate * Remove core from cloudstate
*/ */
private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) { private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) {
final String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy // final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
// DocCollection coll = newCollections.get(collection); // DocCollection coll = newCollections.get(collection);

View File

@ -1627,7 +1627,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
if (!created) if (!created)
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully createcollection: " + message.getStr("name")); throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully createcollection: " + message.getStr("name"));
log.info("going to create cores replicas shardNames {} , repFactor : {}", shardNames, repFactor); log.info("Creating SolrCores for new collection, shardNames {} , replicationFactor : {}", shardNames, repFactor);
Map<String ,ShardRequest> coresToCreate = new LinkedHashMap<String, ShardRequest>(); Map<String ,ShardRequest> coresToCreate = new LinkedHashMap<String, ShardRequest>();
for (int i = 1; i <= shardNames.size(); i++) { for (int i = 1; i <= shardNames.size(); i++) {
String sliceName = shardNames.get(i-1); String sliceName = shardNames.get(i-1);
@ -1671,14 +1671,17 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
sreq.actualShards = sreq.shards; sreq.actualShards = sreq.shards;
sreq.params = params; sreq.params = params;
if(isLegacyCloud) shardHandler.submit(sreq, sreq.shards[0], sreq.params); if(isLegacyCloud) {
else coresToCreate.put(coreName, sreq); shardHandler.submit(sreq, sreq.shards[0], sreq.params);
} else {
coresToCreate.put(coreName, sreq);
}
} }
} }
if(!isLegacyCloud) { if(!isLegacyCloud) {
//wait for all replica entries to be created // wait for all replica entries to be created
Map<String, Replica> replicas = lookupReplicas(collectionName, coresToCreate.keySet()); Map<String, Replica> replicas = waitToSeeReplicasInState(collectionName, coresToCreate.keySet());
for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) { for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
ShardRequest sreq = e.getValue(); ShardRequest sreq = e.getValue();
sreq.params.set(CoreAdminParams.CORE_NODE_NAME, replicas.get(e.getKey()).getName()); sreq.params.set(CoreAdminParams.CORE_NODE_NAME, replicas.get(e.getKey()).getName());
@ -1704,37 +1707,35 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
} }
} }
private Map<String, Replica> lookupReplicas(String collectionName, Collection<String> coreNames) throws InterruptedException { private Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
Map<String, Replica> result = new HashMap<String, Replica>(); Map<String, Replica> result = new HashMap<String, Replica>();
long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS); long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
for(;;) { while (true) {
DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName); DocCollection coll = zkStateReader.getClusterState().getCollection(
for (String coreName : coreNames) { collectionName);
if(result.containsKey(coreName)) continue; for (String coreName : coreNames) {
if (result.containsKey(coreName)) continue;
for (Slice slice : coll.getSlices()) { for (Slice slice : coll.getSlices()) {
for (Replica replica : slice.getReplicas()) { for (Replica replica : slice.getReplicas()) {
if(coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) { if (coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
result.put(coreName,replica); result.put(coreName, replica);
break; break;
} }
} }
} }
} }
if(result.size() == coreNames.size()) { if (result.size() == coreNames.size()) {
return result; return result;
} }
if( System.nanoTime() > endTime) { if (System.nanoTime() > endTime) {
//time up . throw exception and go out throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas in cluster state.");
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to create replica entries in ZK");
} }
Thread.sleep(100); Thread.sleep(100);
} }
} }
private void addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException { private void addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
String collection = message.getStr(COLLECTION_PROP); String collection = message.getStr(COLLECTION_PROP);
String node = message.getStr("node"); String node = message.getStr("node");
@ -1789,7 +1790,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
ZkStateReader.STATE_PROP, ZkStateReader.DOWN, ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
ZkStateReader.BASE_URL_PROP,zkStateReader.getBaseUrlForNodeName(node)); ZkStateReader.BASE_URL_PROP,zkStateReader.getBaseUrlForNodeName(node));
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props)); Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props));
params.set(CoreAdminParams.CORE_NODE_NAME, lookupReplicas(collection, Collections.singletonList(coreName)).get(coreName).getName()); params.set(CoreAdminParams.CORE_NODE_NAME, waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
} }

View File

@ -31,6 +31,7 @@ import java.util.Enumeration;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
@ -160,16 +161,19 @@ public final class ZkController {
protected volatile Overseer overseer; protected volatile Overseer overseer;
private int leaderVoteWait; private int leaderVoteWait;
private int leaderConflictResolveWait;
private boolean genericCoreNodeNames; private boolean genericCoreNodeNames;
private int clientTimeout; private int clientTimeout;
private volatile boolean isClosed; private volatile boolean isClosed;
public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort, public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort,
String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException, String localHostContext, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect)
TimeoutException, IOException { throws InterruptedException, TimeoutException, IOException
{
if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null."); if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
this.cc = cc; this.cc = cc;
this.genericCoreNodeNames = genericCoreNodeNames; this.genericCoreNodeNames = genericCoreNodeNames;
@ -188,6 +192,8 @@ public final class ZkController {
this.localHostContext); this.localHostContext);
this.leaderVoteWait = leaderVoteWait; this.leaderVoteWait = leaderVoteWait;
this.leaderConflictResolveWait = leaderConflictResolveWait;
this.clientTimeout = zkClientTimeout; this.clientTimeout = zkClientTimeout;
zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout, zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout,
zkClientConnectTimeout, new DefaultConnectionStrategy(), zkClientConnectTimeout, new DefaultConnectionStrategy(),
@ -850,19 +856,28 @@ public final class ZkController {
shardId, timeoutms * 2); // since we found it in zk, we are willing to shardId, timeoutms * 2); // since we found it in zk, we are willing to
// wait a while to find it in state // wait a while to find it in state
int tries = 0; int tries = 0;
final long msInSec = 1000L;
int maxTries = (int)Math.floor(leaderConflictResolveWait/msInSec);
while (!leaderUrl.equals(clusterStateLeaderUrl)) { while (!leaderUrl.equals(clusterStateLeaderUrl)) {
if (tries == 60) { if (tries > maxTries) {
throw new SolrException(ErrorCode.SERVER_ERROR, throw new SolrException(ErrorCode.SERVER_ERROR,
"There is conflicting information about the leader of shard: " "There is conflicting information about the leader of shard: "
+ cloudDesc.getShardId() + " our state says:" + cloudDesc.getShardId() + " our state says:"
+ clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl); + clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl);
} }
Thread.sleep(1000); Thread.sleep(msInSec);
tries++; tries++;
clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId, clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId,
timeoutms); timeoutms);
leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms) leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms)
.getCoreUrl(); .getCoreUrl();
if (tries % 30 == 0) {
String warnMsg = String.format(Locale.ENGLISH, "Still seeing conflicting information about the leader "
+ "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s",
cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl);
log.warn(warnMsg);
}
} }
} catch (Exception e) { } catch (Exception e) {
@ -1013,7 +1028,8 @@ public final class ZkController {
core.close(); core.close();
} }
} }
log.info("publishing core={} state={}", cd.getName(), state); String collection = cd.getCloudDescriptor().getCollectionName();
log.info("publishing core={} state={} collection={}", cd.getName(), state, collection);
//System.out.println(Thread.currentThread().getStackTrace()[3]); //System.out.println(Thread.currentThread().getStackTrace()[3]);
Integer numShards = cd.getCloudDescriptor().getNumShards(); Integer numShards = cd.getCloudDescriptor().getNumShards();
if (numShards == null) { //XXX sys prop hack if (numShards == null) { //XXX sys prop hack
@ -1021,8 +1037,7 @@ public final class ZkController {
numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP); numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
} }
assert cd.getCloudDescriptor().getCollectionName() != null && cd.getCloudDescriptor() assert collection != null && collection.length() > 0;
.getCollectionName().length() > 0;
String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
//assert cd.getCloudDescriptor().getShardId() != null; //assert cd.getCloudDescriptor().getShardId() != null;
@ -1033,12 +1048,9 @@ public final class ZkController {
ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles(), ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles(),
ZkStateReader.NODE_NAME_PROP, getNodeName(), ZkStateReader.NODE_NAME_PROP, getNodeName(),
ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId(), ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId(),
ZkStateReader.COLLECTION_PROP, cd.getCloudDescriptor() ZkStateReader.COLLECTION_PROP, collection,
.getCollectionName(), ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() : null,
ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName : null);
: null,
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName
: null);
if (updateLastState) { if (updateLastState) {
cd.getCloudDescriptor().lastPublished = state; cd.getCloudDescriptor().lastPublished = state;
} }
@ -1064,6 +1076,12 @@ public final class ZkController {
final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
final String collection = cd.getCloudDescriptor().getCollectionName(); final String collection = cd.getCloudDescriptor().getCollectionName();
assert collection != null; assert collection != null;
if (collection == null || collection.trim().length() == 0) {
log.error("No collection was specified.");
return;
}
ElectionContext context = electionContexts.remove(new ContextKey(collection, coreNodeName)); ElectionContext context = electionContexts.remove(new ContextKey(collection, coreNodeName));
if (context != null) { if (context != null) {
@ -1362,7 +1380,6 @@ public final class ZkController {
CloudDescriptor cloudDesc = cd.getCloudDescriptor(); CloudDescriptor cloudDesc = cd.getCloudDescriptor();
// make sure the node name is set on the descriptor // make sure the node name is set on the descriptor
if (cloudDesc.getCoreNodeName() == null) { if (cloudDesc.getCoreNodeName() == null) {
cloudDesc.setCoreNodeName(coreNodeName); cloudDesc.setCoreNodeName(coreNodeName);

View File

@ -138,6 +138,7 @@ public abstract class ConfigSolr {
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000; private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000;
private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes
private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
private static final int DEFAULT_CORE_LOAD_THREADS = 3; private static final int DEFAULT_CORE_LOAD_THREADS = 3;
protected static final String DEFAULT_CORE_ADMIN_PATH = "/admin/cores"; protected static final String DEFAULT_CORE_ADMIN_PATH = "/admin/cores";
@ -157,6 +158,10 @@ public abstract class ConfigSolr {
public int getLeaderVoteWait() { public int getLeaderVoteWait() {
return getInt(CfgProp.SOLR_LEADERVOTEWAIT, DEFAULT_LEADER_VOTE_WAIT); return getInt(CfgProp.SOLR_LEADERVOTEWAIT, DEFAULT_LEADER_VOTE_WAIT);
} }
public int getLeaderConflictResolveWait() {
return getInt(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT);
}
public boolean getGenericCoreNodeNames() { public boolean getGenericCoreNodeNames() {
return getBool(CfgProp.SOLR_GENERICCORENODENAMES, false); return getBool(CfgProp.SOLR_GENERICCORENODENAMES, false);
@ -255,6 +260,7 @@ public abstract class ConfigSolr {
SOLR_GENERICCORENODENAMES, SOLR_GENERICCORENODENAMES,
SOLR_ZKCLIENTTIMEOUT, SOLR_ZKCLIENTTIMEOUT,
SOLR_ZKHOST, SOLR_ZKHOST,
SOLR_LEADERCONFLICTRESOLVEWAIT,
//TODO: Remove all of these elements for 5.0 //TODO: Remove all of these elements for 5.0
SOLR_PERSISTENT, SOLR_PERSISTENT,

View File

@ -67,6 +67,7 @@ public class ConfigSolrXml extends ConfigSolr {
failIfFound("solr/cores/@hostContext"); failIfFound("solr/cores/@hostContext");
failIfFound("solr/cores/@hostPort"); failIfFound("solr/cores/@hostPort");
failIfFound("solr/cores/@leaderVoteWait"); failIfFound("solr/cores/@leaderVoteWait");
failIfFound("solr/cores/@leaderConflictResolveWait");
failIfFound("solr/cores/@genericCoreNodeNames"); failIfFound("solr/cores/@genericCoreNodeNames");
failIfFound("solr/cores/@managementPath"); failIfFound("solr/cores/@managementPath");
failIfFound("solr/cores/@shareSchema"); failIfFound("solr/cores/@shareSchema");
@ -113,6 +114,7 @@ public class ConfigSolrXml extends ConfigSolr {
propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']")); propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']"));
propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']")); propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']"));
propMap.put(CfgProp.SOLR_LEADERVOTEWAIT, doSub("solr/solrcloud/int[@name='leaderVoteWait']")); propMap.put(CfgProp.SOLR_LEADERVOTEWAIT, doSub("solr/solrcloud/int[@name='leaderVoteWait']"));
propMap.put(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, doSub("solr/solrcloud/int[@name='leaderConflictResolveWait']"));
propMap.put(CfgProp.SOLR_GENERICCORENODENAMES, doSub("solr/solrcloud/bool[@name='genericCoreNodeNames']")); propMap.put(CfgProp.SOLR_GENERICCORENODENAMES, doSub("solr/solrcloud/bool[@name='genericCoreNodeNames']"));
propMap.put(CfgProp.SOLR_MANAGEMENTPATH, doSub("solr/str[@name='managementPath']")); propMap.put(CfgProp.SOLR_MANAGEMENTPATH, doSub("solr/str[@name='managementPath']"));
propMap.put(CfgProp.SOLR_SHAREDLIB, doSub("solr/str[@name='sharedLib']")); propMap.put(CfgProp.SOLR_SHAREDLIB, doSub("solr/str[@name='sharedLib']"));

View File

@ -51,6 +51,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
public static final String BLOCKCACHE_SLAB_COUNT = "solr.hdfs.blockcache.slab.count"; public static final String BLOCKCACHE_SLAB_COUNT = "solr.hdfs.blockcache.slab.count";
public static final String BLOCKCACHE_DIRECT_MEMORY_ALLOCATION = "solr.hdfs.blockcache.direct.memory.allocation"; public static final String BLOCKCACHE_DIRECT_MEMORY_ALLOCATION = "solr.hdfs.blockcache.direct.memory.allocation";
public static final String BLOCKCACHE_ENABLED = "solr.hdfs.blockcache.enabled"; public static final String BLOCKCACHE_ENABLED = "solr.hdfs.blockcache.enabled";
public static final String BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
public static final String BLOCKCACHE_READ_ENABLED = "solr.hdfs.blockcache.read.enabled"; public static final String BLOCKCACHE_READ_ENABLED = "solr.hdfs.blockcache.read.enabled";
public static final String BLOCKCACHE_WRITE_ENABLED = "solr.hdfs.blockcache.write.enabled"; public static final String BLOCKCACHE_WRITE_ENABLED = "solr.hdfs.blockcache.write.enabled";
@ -72,6 +73,8 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
private String hdfsDataDir; private String hdfsDataDir;
private String confDir; private String confDir;
private static BlockCache globalBlockCache;
public static Metrics metrics; public static Metrics metrics;
private static Boolean kerberosInit; private static Boolean kerberosInit;
@ -102,6 +105,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
} }
boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true); boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true);
boolean blockCacheGlobal = params.getBool(BLOCKCACHE_GLOBAL, false); // default to false for back compat
boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED, boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED,
true); true);
boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true); boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true);
@ -117,8 +121,6 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
boolean directAllocation = params.getBool( boolean directAllocation = params.getBool(
BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true); BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true);
BlockCache blockCache;
int slabSize = numberOfBlocksPerBank * blockSize; int slabSize = numberOfBlocksPerBank * blockSize;
LOG.info( LOG.info(
"Number of slabs of block cache [{}] with direct memory allocation set to [{}]", "Number of slabs of block cache [{}] with direct memory allocation set to [{}]",
@ -131,22 +133,13 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128); int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128);
int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128); int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128);
BufferStore.initNewBuffer(bufferSize, bufferCount); BlockCache blockCache = getBlockDirectoryCache(path, numberOfBlocksPerBank,
long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank blockSize, bankCount, directAllocation, slabSize,
* (long) blockSize; bufferSize, bufferCount, blockCacheGlobal);
try {
blockCache = new BlockCache(metrics, directAllocation, totalMemory, Cache cache = new BlockDirectoryCache(blockCache, path, metrics);
slabSize, blockSize);
} catch (OutOfMemoryError e) {
throw new RuntimeException(
"The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
+ " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
+ " your java heap size might not be large enough."
+ " Failed allocating ~" + totalMemory / 1000000.0 + " MB.", e);
}
Cache cache = new BlockDirectoryCache(blockCache, metrics);
HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf); HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf);
dir = new BlockDirectory("solrcore", hdfsDirectory, cache, null, dir = new BlockDirectory(path, hdfsDirectory, cache, null,
blockCacheReadEnabled, blockCacheWriteEnabled); blockCacheReadEnabled, blockCacheWriteEnabled);
} else { } else {
dir = new HdfsDirectory(new Path(path), conf); dir = new HdfsDirectory(new Path(path), conf);
@ -164,6 +157,45 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
} }
return dir; return dir;
} }
private BlockCache getBlockDirectoryCache(String path,
int numberOfBlocksPerBank, int blockSize, int bankCount,
boolean directAllocation, int slabSize, int bufferSize, int bufferCount, boolean staticBlockCache) {
if (!staticBlockCache) {
LOG.info("Creating new single instance HDFS BlockCache");
return createBlockCache(numberOfBlocksPerBank, blockSize, bankCount, directAllocation, slabSize, bufferSize, bufferCount);
}
LOG.info("Creating new global HDFS BlockCache");
synchronized (HdfsDirectoryFactory.class) {
if (globalBlockCache == null) {
globalBlockCache = createBlockCache(numberOfBlocksPerBank, blockSize, bankCount,
directAllocation, slabSize, bufferSize, bufferCount);
}
}
return globalBlockCache;
}
private BlockCache createBlockCache(int numberOfBlocksPerBank, int blockSize,
int bankCount, boolean directAllocation, int slabSize, int bufferSize,
int bufferCount) {
BufferStore.initNewBuffer(bufferSize, bufferCount);
long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank
* (long) blockSize;
BlockCache blockCache;
try {
blockCache = new BlockCache(metrics, directAllocation, totalMemory, slabSize, blockSize);
} catch (OutOfMemoryError e) {
throw new RuntimeException(
"The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
+ " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
+ " your java heap size might not be large enough."
+ " Failed allocating ~" + totalMemory / 1000000.0 + " MB.",
e);
}
return blockCache;
}
@Override @Override
public boolean exists(String path) { public boolean exists(String path) {

View File

@ -66,11 +66,12 @@ public class ZkContainer {
initZooKeeper(cc, solrHome, initZooKeeper(cc, solrHome,
config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(), config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(),
config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames()); config.getHost(), config.getLeaderVoteWait(), config.getLeaderConflictResolveWait(), config.getGenericCoreNodeNames());
} }
public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort, public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort,
String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames) { String hostContext, String host, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames) {
ZkController zkController = null; ZkController zkController = null;
// if zkHost sys property is not set, we are not using ZooKeeper // if zkHost sys property is not set, we are not using ZooKeeper
@ -135,7 +136,7 @@ public class ZkContainer {
} }
zkController = new ZkController(cc, zookeeperHost, zkClientTimeout, zkController = new ZkController(cc, zookeeperHost, zkClientTimeout,
zkClientConnectTimeout, host, hostPort, hostContext, zkClientConnectTimeout, host, hostPort, hostContext,
leaderVoteWait, genericCoreNodeNames, leaderVoteWait, leaderConflictResolveWait, genericCoreNodeNames,
new CurrentCoreDescriptorProvider() { new CurrentCoreDescriptorProvider() {
@Override @Override

View File

@ -25,12 +25,14 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.SearchGroup; import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
@ -500,12 +502,32 @@ public class QueryComponent extends SearchComponent
// sort ids from lowest to highest so we can access them in order // sort ids from lowest to highest so we can access them in order
int nDocs = docList.size(); int nDocs = docList.size();
long[] sortedIds = new long[nDocs]; final long[] sortedIds = new long[nDocs];
DocIterator it = rb.getResults().docList.iterator(); final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds
DocList docs = rb.getResults().docList;
DocIterator it = docs.iterator();
for (int i=0; i<nDocs; i++) { for (int i=0; i<nDocs; i++) {
sortedIds[i] = (((long)it.nextDoc()) << 32) | i; sortedIds[i] = (((long)it.nextDoc()) << 32) | i;
scores[i] = docs.hasScores() ? it.score() : Float.NaN;
} }
Arrays.sort(sortedIds);
// sort ids and scores together
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpId = sortedIds[i];
float tmpScore = scores[i];
sortedIds[i] = sortedIds[j];
scores[i] = scores[j];
sortedIds[j] = tmpId;
scores[j] = tmpScore;
}
@Override
protected int compare(int i, int j) {
return Long.compare(sortedIds[i], sortedIds[j]);
}
}.sort(0, sortedIds.length);
SortSpec sortSpec = rb.getSortSpec(); SortSpec sortSpec = rb.getSortSpec();
Sort sort = searcher.weightSort(sortSpec.getSort()); Sort sort = searcher.weightSort(sortSpec.getSort());
@ -527,7 +549,9 @@ public class QueryComponent extends SearchComponent
int lastIdx = -1; int lastIdx = -1;
int idx = 0; int idx = 0;
for (long idAndPos : sortedIds) { for (int i = 0; i < sortedIds.length; ++i) {
long idAndPos = sortedIds[i];
float score = scores[i];
int doc = (int)(idAndPos >>> 32); int doc = (int)(idAndPos >>> 32);
int position = (int)idAndPos; int position = (int)idAndPos;
@ -546,6 +570,7 @@ public class QueryComponent extends SearchComponent
} }
doc -= currentLeaf.docBase; // adjust for what segment this is in doc -= currentLeaf.docBase; // adjust for what segment this is in
comparator.setScorer(new FakeScorer(doc, score));
comparator.copy(0, doc); comparator.copy(0, doc);
Object val = comparator.value(0); Object val = comparator.value(0);
if (null != ft) val = ft.marshalSortValue(val); if (null != ft) val = ft.marshalSortValue(val);
@ -1157,4 +1182,50 @@ public class QueryComponent extends SearchComponent
public URL[] getDocs() { public URL[] getDocs() {
return null; return null;
} }
/**
* Fake scorer for a single document
*
* TODO: when SOLR-5595 is fixed, this wont be needed, as we dont need to recompute sort values here from the comparator
*/
private static class FakeScorer extends Scorer {
final int docid;
final float score;
FakeScorer(int docid, float score) {
super(null);
this.docid = docid;
this.score = score;
}
@Override
public int docID() {
return docid;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return 1;
}
}
} }

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
@ -90,7 +91,8 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
try { try {
return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion, return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion,
new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars); FSDirectory.open(new File(indexPath)), indexAnalyzer,
queryAnalyzer, minPrefixChars);
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(); throw new RuntimeException();
} }

View File

@ -23,8 +23,9 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester.BlenderType; import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester.BlenderType;
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
@ -94,7 +95,9 @@ public class BlendedInfixLookupFactory extends AnalyzingInfixLookupFactory {
try { try {
return new BlendedInfixSuggester(core.getSolrConfig().luceneMatchVersion, return new BlendedInfixSuggester(core.getSolrConfig().luceneMatchVersion,
new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor); FSDirectory.open(new File(indexPath)),
indexAnalyzer, queryAnalyzer, minPrefixChars,
blenderType, numFactor);
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(); throw new RuntimeException();
} }

View File

@ -24,6 +24,9 @@ import java.util.concurrent.atomic.AtomicInteger;
import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap; import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap;
import com.googlecode.concurrentlinkedhashmap.EvictionListener; import com.googlecode.concurrentlinkedhashmap.EvictionListener;
/**
* @lucene.experimental
*/
public class BlockCache { public class BlockCache {
public static final int _128M = 134217728; public static final int _128M = 134217728;

View File

@ -16,12 +16,23 @@ package org.apache.solr.store.blockcache;
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/**
* @lucene.experimental
*/
public class BlockCacheKey implements Cloneable { public class BlockCacheKey implements Cloneable {
private long block; private long block;
private int file; private int file;
private String path;
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public long getBlock() { public long getBlock() {
return block; return block;
} }
@ -44,9 +55,10 @@ public class BlockCacheKey implements Cloneable {
int result = 1; int result = 1;
result = prime * result + (int) (block ^ (block >>> 32)); result = prime * result + (int) (block ^ (block >>> 32));
result = prime * result + file; result = prime * result + file;
result = prime * result + ((path == null) ? 0 : path.hashCode());
return result; return result;
} }
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) return true; if (this == obj) return true;
@ -55,9 +67,12 @@ public class BlockCacheKey implements Cloneable {
BlockCacheKey other = (BlockCacheKey) obj; BlockCacheKey other = (BlockCacheKey) obj;
if (block != other.block) return false; if (block != other.block) return false;
if (file != other.file) return false; if (file != other.file) return false;
if (path == null) {
if (other.path != null) return false;
} else if (!path.equals(other.path)) return false;
return true; return true;
} }
@Override @Override
public BlockCacheKey clone() { public BlockCacheKey clone() {
try { try {

View File

@ -19,6 +19,9 @@ package org.apache.solr.store.blockcache;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
/**
* @lucene.experimental
*/
public class BlockCacheLocation { public class BlockCacheLocation {
private int block; private int block;

View File

@ -34,6 +34,9 @@ import org.apache.solr.store.hdfs.HdfsDirectory;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
/**
* @lucene.experimental
*/
public class BlockDirectory extends Directory { public class BlockDirectory extends Directory {
public static Logger LOG = LoggerFactory.getLogger(BlockDirectory.class); public static Logger LOG = LoggerFactory.getLogger(BlockDirectory.class);
@ -82,11 +85,11 @@ public class BlockDirectory extends Directory {
private Directory directory; private Directory directory;
private int blockSize; private int blockSize;
private String dirName; private String dirName;
private Cache cache; private final Cache cache;
private Set<String> blockCacheFileTypes; private Set<String> blockCacheFileTypes;
private final boolean blockCacheReadEnabled; private final boolean blockCacheReadEnabled;
private final boolean blockCacheWriteEnabled; private final boolean blockCacheWriteEnabled;
public BlockDirectory(String dirName, Directory directory, Cache cache, public BlockDirectory(String dirName, Directory directory, Cache cache,
Set<String> blockCacheFileTypes, boolean blockCacheReadEnabled, Set<String> blockCacheFileTypes, boolean blockCacheReadEnabled,
boolean blockCacheWriteEnabled) throws IOException { boolean blockCacheWriteEnabled) throws IOException {
@ -265,6 +268,15 @@ public class BlockDirectory extends Directory {
return dirName + "/" + name; return dirName + "/" + name;
} }
/**
* Expert: mostly for tests
*
* @lucene.experimental
*/
public Cache getCache() {
return cache;
}
@Override @Override
public void copy(Directory to, String src, String dest, IOContext context) public void copy(Directory to, String src, String dest, IOContext context)
throws IOException { throws IOException {
@ -383,4 +395,13 @@ public class BlockDirectory extends Directory {
return directory; return directory;
} }
public boolean isBlockCacheReadEnabled() {
return blockCacheReadEnabled;
}
public boolean isBlockCacheWriteEnabled() {
return blockCacheWriteEnabled;
}
} }

View File

@ -21,17 +21,31 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
/**
* @lucene.experimental
*/
public class BlockDirectoryCache implements Cache { public class BlockDirectoryCache implements Cache {
private BlockCache blockCache; private final BlockCache blockCache;
private AtomicInteger counter = new AtomicInteger(); private AtomicInteger counter = new AtomicInteger();
private Map<String,Integer> names = new ConcurrentHashMap<String,Integer>(); private Map<String,Integer> names = new ConcurrentHashMap<String,Integer>();
private String path;
private Metrics metrics; private Metrics metrics;
public BlockDirectoryCache(BlockCache blockCache, Metrics metrics) { public BlockDirectoryCache(BlockCache blockCache, String path, Metrics metrics) {
this.blockCache = blockCache; this.blockCache = blockCache;
this.path = path;
this.metrics = metrics; this.metrics = metrics;
} }
/**
* Expert: mostly for tests
*
* @lucene.experimental
*/
public BlockCache getBlockCache() {
return blockCache;
}
@Override @Override
public void delete(String name) { public void delete(String name) {
names.remove(name); names.remove(name);
@ -46,6 +60,7 @@ public class BlockDirectoryCache implements Cache {
names.put(name, file); names.put(name, file);
} }
BlockCacheKey blockCacheKey = new BlockCacheKey(); BlockCacheKey blockCacheKey = new BlockCacheKey();
blockCacheKey.setPath(path);
blockCacheKey.setBlock(blockId); blockCacheKey.setBlock(blockId);
blockCacheKey.setFile(file); blockCacheKey.setFile(file);
blockCache.store(blockCacheKey, blockOffset, buffer, offset, length); blockCache.store(blockCacheKey, blockOffset, buffer, offset, length);
@ -59,6 +74,7 @@ public class BlockDirectoryCache implements Cache {
return false; return false;
} }
BlockCacheKey blockCacheKey = new BlockCacheKey(); BlockCacheKey blockCacheKey = new BlockCacheKey();
blockCacheKey.setPath(path);
blockCacheKey.setBlock(blockId); blockCacheKey.setBlock(blockId);
blockCacheKey.setFile(file); blockCacheKey.setFile(file);
boolean fetch = blockCache.fetch(blockCacheKey, b, blockOffset, off, boolean fetch = blockCache.fetch(blockCacheKey, b, blockOffset, off,

View File

@ -21,6 +21,9 @@ import java.util.concurrent.atomic.AtomicLongArray;
import org.apache.lucene.util.LongBitSet; import org.apache.lucene.util.LongBitSet;
/**
* @lucene.experimental
*/
public class BlockLocks { public class BlockLocks {
private AtomicLongArray bits; private AtomicLongArray bits;

View File

@ -22,7 +22,9 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
/**
* @lucene.experimental
*/
public class BufferStore implements Store { public class BufferStore implements Store {
private static final Store EMPTY = new Store() { private static final Store EMPTY = new Store() {

View File

@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
* limitations under the License. * limitations under the License.
*/ */
/**
* @lucene.experimental
*/
public interface Cache { public interface Cache {
/** /**

View File

@ -21,10 +21,11 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
/* /**
* Cache the blocks as they are written. The cache file name is the name of * Cache the blocks as they are written. The cache file name is the name of
* the file until the file is closed, at which point the cache is updated * the file until the file is closed, at which point the cache is updated
* to include the last modified date (which is unknown until that point). * to include the last modified date (which is unknown until that point).
* @lucene.experimental
*/ */
public class CachedIndexOutput extends ReusedBufferedIndexOutput { public class CachedIndexOutput extends ReusedBufferedIndexOutput {
private final BlockDirectory directory; private final BlockDirectory directory;

View File

@ -23,6 +23,9 @@ import java.io.IOException;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
/**
* @lucene.experimental
*/
public abstract class CustomBufferedIndexInput extends IndexInput { public abstract class CustomBufferedIndexInput extends IndexInput {
public static final int BUFFER_SIZE = 32768; public static final int BUFFER_SIZE = 32768;

View File

@ -29,6 +29,9 @@ import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.metrics.Updater; import org.apache.hadoop.metrics.Updater;
import org.apache.hadoop.metrics.jvm.JvmMetrics; import org.apache.hadoop.metrics.jvm.JvmMetrics;
/**
* @lucene.experimental
*/
public class Metrics implements Updater { public class Metrics implements Updater {
public static class MethodCall { public static class MethodCall {

View File

@ -21,6 +21,9 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
/**
* @lucene.experimental
*/
public abstract class ReusedBufferedIndexOutput extends IndexOutput { public abstract class ReusedBufferedIndexOutput extends IndexOutput {
public static final int BUFFER_SIZE = 1024; public static final int BUFFER_SIZE = 1024;

View File

@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
* limitations under the License. * limitations under the License.
*/ */
/**
* @lucene.experimental
*/
public interface Store { public interface Store {
byte[] takeBuffer(int bufferSize); byte[] takeBuffer(int bufferSize);

View File

@ -28,6 +28,9 @@ import org.apache.lucene.store.DataInput;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
/**
* @lucene.experimental
*/
public class HdfsFileReader extends DataInput { public class HdfsFileReader extends DataInput {
public static Logger LOG = LoggerFactory.getLogger(HdfsFileReader.class); public static Logger LOG = LoggerFactory.getLogger(HdfsFileReader.class);

View File

@ -32,6 +32,9 @@ import org.apache.lucene.store.DataOutput;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
/**
* @lucene.experimental
*/
public class HdfsFileWriter extends DataOutput implements Closeable { public class HdfsFileWriter extends DataOutput implements Closeable {
public static Logger LOG = LoggerFactory.getLogger(HdfsFileWriter.class); public static Logger LOG = LoggerFactory.getLogger(HdfsFileWriter.class);

View File

@ -21,6 +21,9 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
/**
* @lucene.experimental
*/
public class NullIndexOutput extends IndexOutput { public class NullIndexOutput extends IndexOutput {
private long pos; private long pos;

View File

@ -0,0 +1,41 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="test-custom-comparator" version="1.5">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldType class="org.apache.solr.schema.WrappedIntField" name="wrapped_int"/>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="payload" type="wrapped_int" indexed="false"
stored="true" multiValued="false" docValues="true" required="true"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -287,6 +287,16 @@ valued. -->
class="solr.ExternalFileField"/> class="solr.ExternalFileField"/>
<fieldType name="text_no_analyzer" stored="false" indexed="true" class="solr.TextField" /> <fieldType name="text_no_analyzer" stored="false" indexed="true" class="solr.TextField" />
<fieldtype name="text_length" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="32768"/>
</analyzer>
</fieldtype>
</types> </types>
@ -324,6 +334,9 @@ valued. -->
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" /> <field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
<field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="cat_length" type="text_length" indexed="true" stored="true" multiValued="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields <!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns. will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have RESTRICTION: the glob-like pattern in the name attribute must have

View File

@ -28,6 +28,7 @@
<int name="solr.hdfs.blockcache.blocksperbank">${solr.hdfs.blockcache.blocksperbank:1024}</int> <int name="solr.hdfs.blockcache.blocksperbank">${solr.hdfs.blockcache.blocksperbank:1024}</int>
<str name="solr.hdfs.home">${solr.hdfs.home:}</str> <str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str> <str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
</directoryFactory> </directoryFactory>
<dataDir>${solr.data.dir:}</dataDir> <dataDir>${solr.data.dir:}</dataDir>

View File

@ -46,7 +46,8 @@
<double name="maxWriteMBPerSecMerge">3000000</double> <double name="maxWriteMBPerSecMerge">3000000</double>
<double name="maxWriteMBPerSecRead">4000000</double> <double name="maxWriteMBPerSecRead">4000000</double>
<str name="solr.hdfs.home">${solr.hdfs.home:}</str> <str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<bool name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</bool> <bool name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</bool>
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
</directoryFactory> </directoryFactory>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion> <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>

View File

@ -131,7 +131,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
int threadCount = 1; int threadCount = 1;
int i = 0; int i = 0;
for (i = 0; i < threadCount; i++) { for (i = 0; i < threadCount; i++) {
StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true); StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
threads.add(indexThread); threads.add(indexThread);
indexThread.start(); indexThread.start();
} }
@ -270,7 +270,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
public FullThrottleStopableIndexingThread(List<SolrServer> clients, public FullThrottleStopableIndexingThread(List<SolrServer> clients,
String id, boolean doDeletes) { String id, boolean doDeletes) {
super(id, doDeletes); super(controlClient, cloudClient, id, doDeletes);
setName("FullThrottleStopableIndexingThread"); setName("FullThrottleStopableIndexingThread");
setDaemon(true); setDaemon(true);
this.clients = clients; this.clients = clients;

View File

@ -108,7 +108,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>(); List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
int threadCount = 2; int threadCount = 2;
for (int i = 0; i < threadCount; i++) { for (int i = 0; i < threadCount; i++) {
StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true); StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
threads.add(indexThread); threads.add(indexThread);
indexThread.start(); indexThread.start();
} }

View File

@ -203,14 +203,14 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
testCollectionsAPI(); testCollectionsAPI();
testCollectionsAPIAddRemoveStress(); testCollectionsAPIAddRemoveStress();
testErrorHandling(); testErrorHandling();
testNoCollectionSpecified();
deletePartiallyCreatedCollection(); deletePartiallyCreatedCollection();
deleteCollectionRemovesStaleZkCollectionsNode(); deleteCollectionRemovesStaleZkCollectionsNode();
clusterPropTest(); clusterPropTest();
addReplicaTest(); addReplicaTest();
// last // last
deleteCollectionWithDownNodes(); deleteCollectionWithDownNodes();
if (DEBUG) { if (DEBUG) {
super.printLayout(); super.printLayout();
} }
@ -578,6 +578,40 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
String val2 = failure.getVal(0).toString(); String val2 = failure.getVal(0).toString();
assertTrue(val1.contains("SolrException") || val2.contains("SolrException")); assertTrue(val1.contains("SolrException") || val2.contains("SolrException"));
} }
private void testNoCollectionSpecified() throws Exception {
cloudClient.getZkStateReader().updateClusterState(true);
assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
// try and create a SolrCore with no collection name
Create createCmd = new Create();
createCmd.setCoreName("corewithnocollection");
createCmd.setCollection("");
String dataDir = SolrTestCaseJ4.dataDir.getAbsolutePath() + File.separator
+ System.currentTimeMillis() + "corewithnocollection" + "_1v";
createCmd.setDataDir(dataDir);
createCmd.setNumShards(1);
if (secondConfigSet) {
createCmd.setCollectionConfigName("conf1");
}
createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
.request(createCmd);
// try and create a SolrCore with no collection name
createCmd.setCollection(null);
createCmd.setCoreName("corewithnocollection2");
createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
.request(createCmd);
// in both cases, the collection should have default to the core name
cloudClient.getZkStateReader().updateClusterState(true);
assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
}
private void testNodesUsedByCreate() throws Exception { private void testNodesUsedByCreate() throws Exception {
// we can use this client because we just want base url // we can use this client because we just want base url
@ -631,7 +665,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
boolean disableLegacy = random().nextBoolean(); boolean disableLegacy = random().nextBoolean();
CloudSolrServer client1 = null; CloudSolrServer client1 = null;
if(disableLegacy) { if (disableLegacy) {
log.info("legacyCloud=false"); log.info("legacyCloud=false");
client1 = createCloudClient(null); client1 = createCloudClient(null);
setClusterProp(client1, ZkStateReader.LEGACY_CLOUD, "false"); setClusterProp(client1, ZkStateReader.LEGACY_CLOUD, "false");

View File

@ -64,18 +64,18 @@ public class OverseerTest extends SolrTestCaseJ4 {
private List<Overseer> overseers = new ArrayList<Overseer>(); private List<Overseer> overseers = new ArrayList<Overseer>();
private List<ZkStateReader> readers = new ArrayList<ZkStateReader>(); private List<ZkStateReader> readers = new ArrayList<ZkStateReader>();
private String collection = "collection1";
public static class MockZKController{ public static class MockZKController{
private final SolrZkClient zkClient; private final SolrZkClient zkClient;
private final ZkStateReader zkStateReader; private final ZkStateReader zkStateReader;
private final String nodeName; private final String nodeName;
private final String collection;
private final LeaderElector elector; private final LeaderElector elector;
private final Map<String, ElectionContext> electionContext = Collections.synchronizedMap(new HashMap<String, ElectionContext>()); private final Map<String, ElectionContext> electionContext = Collections.synchronizedMap(new HashMap<String, ElectionContext>());
public MockZKController(String zkAddress, String nodeName, String collection) throws InterruptedException, TimeoutException, IOException, KeeperException { public MockZKController(String zkAddress, String nodeName) throws InterruptedException, TimeoutException, IOException, KeeperException {
this.nodeName = nodeName; this.nodeName = nodeName;
this.collection = collection;
zkClient = new SolrZkClient(zkAddress, TIMEOUT); zkClient = new SolrZkClient(zkAddress, TIMEOUT);
zkStateReader = new ZkStateReader(zkClient); zkStateReader = new ZkStateReader(zkClient);
zkStateReader.createClusterStateWatchersAndUpdate(); zkStateReader.createClusterStateWatchersAndUpdate();
@ -105,7 +105,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
zkClient.close(); zkClient.close();
} }
public String publishState(String coreName, String coreNodeName, String stateName, int numShards) public String publishState(String collection, String coreName, String coreNodeName, String stateName, int numShards)
throws KeeperException, InterruptedException, IOException { throws KeeperException, InterruptedException, IOException {
if (stateName == null) { if (stateName == null) {
ElectionContext ec = electionContext.remove(coreName); ElectionContext ec = electionContext.remove(coreName);
@ -134,41 +134,40 @@ public class OverseerTest extends SolrTestCaseJ4 {
q.offer(ZkStateReader.toJSON(m)); q.offer(ZkStateReader.toJSON(m));
} }
for (int i = 0; i < 120; i++) { if (collection.length() > 0) {
String shardId = getShardId("http://" + nodeName + "/solr/", coreName); for (int i = 0; i < 120; i++) {
if (shardId != null) { String shardId = getShardId(collection, coreNodeName);
try { if (shardId != null) {
zkClient.makePath("/collections/" + collection + "/leader_elect/" try {
+ shardId + "/election", true); zkClient.makePath("/collections/" + collection + "/leader_elect/"
} catch (NodeExistsException nee) {} + shardId + "/election", true);
ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, } catch (NodeExistsException nee) {}
"http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP, ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
nodeName, ZkStateReader.CORE_NAME_PROP, coreName, "http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP,
ZkStateReader.SHARD_ID_PROP, shardId, nodeName, ZkStateReader.CORE_NAME_PROP, coreName,
ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.SHARD_ID_PROP, shardId,
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName); ZkStateReader.COLLECTION_PROP, collection,
ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase( ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
elector, shardId, collection, nodeName + "_" + coreName, props, ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
zkStateReader); elector, shardId, collection, nodeName + "_" + coreName, props,
elector.setup(ctx); zkStateReader);
elector.joinElection(ctx, false); elector.setup(ctx);
return shardId; elector.joinElection(ctx, false);
return shardId;
}
Thread.sleep(500);
} }
Thread.sleep(500);
} }
return null; return null;
} }
private String getShardId(final String baseUrl, final String coreName) { private String getShardId(String collection, String coreNodeName) {
Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap( Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap(collection);
collection);
if (slices != null) { if (slices != null) {
for (Slice slice : slices.values()) { for (Slice slice : slices.values()) {
for (Replica replica : slice.getReplicas()) { for (Replica replica : slice.getReplicas()) {
// TODO: for really large clusters, we could 'index' on this String cnn = replica.getName();
String rbaseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP); if (coreNodeName.equals(cnn)) {
String rcore = replica.getStr(ZkStateReader.CORE_NAME_PROP);
if (baseUrl.equals(rbaseUrl) && coreName.equals(rcore)) {
return slice.getName(); return slice.getName();
} }
} }
@ -226,17 +225,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
ZkStateReader reader = new ZkStateReader(zkClient); ZkStateReader reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate(); reader.createClusterStateWatchersAndUpdate();
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1", "collection1"); zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
final int numShards=6; final int numShards=6;
for (int i = 0; i < numShards; i++) { for (int i = 0; i < numShards; i++) {
assertNotNull("shard got no id?", zkController.publishState("core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3)); assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
} }
Map<String,Replica> rmap = reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap();
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap().size()); assertEquals(rmap.toString(), 2, rmap.size());
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size()); assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size());
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size()); assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size());
//make sure leaders are in cloud state //make sure leaders are in cloud state
assertNotNull(reader.getLeaderUrl("collection1", "shard1", 15000)); assertNotNull(reader.getLeaderUrl("collection1", "shard1", 15000));
@ -258,6 +257,81 @@ public class OverseerTest extends SolrTestCaseJ4 {
} }
} }
@Test
public void testBadQueueItem() throws Exception {
String zkDir = dataDir.getAbsolutePath() + File.separator
+ "zookeeper/server1/data";
ZkTestServer server = new ZkTestServer(zkDir);
MockZKController zkController = null;
SolrZkClient zkClient = null;
SolrZkClient overseerClient = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
overseerClient = electNewOverseer(server.getZkAddress());
ZkStateReader reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
final int numShards=3;
for (int i = 0; i < numShards; i++) {
assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
}
assertEquals(1, reader.getClusterState().getSlice(collection, "shard1").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice(collection, "shard2").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice(collection, "shard3").getReplicasMap().size());
//make sure leaders are in cloud state
assertNotNull(reader.getLeaderUrl(collection, "shard1", 15000));
assertNotNull(reader.getLeaderUrl(collection, "shard2", 15000));
assertNotNull(reader.getLeaderUrl(collection, "shard3", 15000));
// publish a bad queue item
String emptyCollectionName = "";
zkController.publishState(emptyCollectionName, "core0", "node0", ZkStateReader.ACTIVE, 1);
zkController.publishState(emptyCollectionName, "core0", "node0", null, 1);
// make sure the Overseer is still processing items
for (int i = 0; i < numShards; i++) {
assertNotNull("shard got no id?", zkController.publishState("collection2", "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
}
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard1").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard2").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard3").getReplicasMap().size());
//make sure leaders are in cloud state
assertNotNull(reader.getLeaderUrl("collection2", "shard1", 15000));
assertNotNull(reader.getLeaderUrl("collection2", "shard2", 15000));
assertNotNull(reader.getLeaderUrl("collection2", "shard3", 15000));
} finally {
if (DEBUG) {
if (zkController != null) {
zkClient.printLayoutToStdOut();
}
}
close(zkClient);
if (zkController != null) {
zkController.close();
}
close(overseerClient);
server.shutdown();
}
}
@Test @Test
public void testShardAssignmentBigger() throws Exception { public void testShardAssignmentBigger() throws Exception {
String zkDir = dataDir.getAbsolutePath() + File.separator String zkDir = dataDir.getAbsolutePath() + File.separator
@ -289,7 +363,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader.createClusterStateWatchersAndUpdate(); reader.createClusterStateWatchersAndUpdate();
for (int i = 0; i < nodeCount; i++) { for (int i = 0; i < nodeCount; i++) {
controllers[i] = new MockZKController(server.getZkAddress(), "node" + i, "collection1"); controllers[i] = new MockZKController(server.getZkAddress(), "node" + i);
} }
for (int i = 0; i < nodeCount; i++) { for (int i = 0; i < nodeCount; i++) {
nodeExecutors[i] = Executors.newFixedThreadPool(1, new DefaultSolrThreadFactory("testShardAssignment")); nodeExecutors[i] = Executors.newFixedThreadPool(1, new DefaultSolrThreadFactory("testShardAssignment"));
@ -306,7 +380,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
final String coreName = "core" + slot; final String coreName = "core" + slot;
try { try {
ids[slot]=controllers[slot % nodeCount].publishState(coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount); ids[slot]=controllers[slot % nodeCount].publishState(collection, coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount);
} catch (Throwable e) { } catch (Throwable e) {
e.printStackTrace(); e.printStackTrace();
fail("register threw exception:" + e.getClass()); fail("register threw exception:" + e.getClass());
@ -551,21 +625,20 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader = new ZkStateReader(zkClient); reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate(); reader.createClusterStateWatchersAndUpdate();
mockController = new MockZKController(server.getZkAddress(), "node1", mockController = new MockZKController(server.getZkAddress(), "node1");
"collection1");
overseerClient = electNewOverseer(server.getZkAddress()); overseerClient = electNewOverseer(server.getZkAddress());
Thread.sleep(1000); Thread.sleep(1000);
mockController.publishState("core1", "core_node1", mockController.publishState(collection, "core1", "core_node1",
ZkStateReader.RECOVERING, 1); ZkStateReader.RECOVERING, 1);
waitForCollections(reader, "collection1"); waitForCollections(reader, collection);
verifyStatus(reader, ZkStateReader.RECOVERING); verifyStatus(reader, ZkStateReader.RECOVERING);
int version = getClusterStateVersion(zkClient); int version = getClusterStateVersion(zkClient);
mockController.publishState("core1", "core_node1", ZkStateReader.ACTIVE, mockController.publishState(collection, "core1", "core_node1", ZkStateReader.ACTIVE,
1); 1);
while (version == getClusterStateVersion(zkClient)); while (version == getClusterStateVersion(zkClient));
@ -575,7 +648,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
overseerClient.close(); overseerClient.close();
Thread.sleep(1000); // wait for overseer to get killed Thread.sleep(1000); // wait for overseer to get killed
mockController.publishState("core1", "core_node1", mockController.publishState(collection, "core1", "core_node1",
ZkStateReader.RECOVERING, 1); ZkStateReader.RECOVERING, 1);
version = getClusterStateVersion(zkClient); version = getClusterStateVersion(zkClient);
@ -588,13 +661,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
assertEquals("Live nodes count does not match", 1, reader assertEquals("Live nodes count does not match", 1, reader
.getClusterState().getLiveNodes().size()); .getClusterState().getLiveNodes().size());
assertEquals("Shard count does not match", 1, reader.getClusterState() assertEquals("Shard count does not match", 1, reader.getClusterState()
.getSlice("collection1", "shard1").getReplicasMap().size()); .getSlice(collection, "shard1").getReplicasMap().size());
version = getClusterStateVersion(zkClient); version = getClusterStateVersion(zkClient);
mockController.publishState("core1", "core_node1", null, 1); mockController.publishState(collection, "core1", "core_node1", null, 1);
while (version == getClusterStateVersion(zkClient)); while (version == getClusterStateVersion(zkClient));
Thread.sleep(500); Thread.sleep(500);
assertFalse("collection1 should be gone after publishing the null state", assertFalse("collection1 should be gone after publishing the null state",
reader.getClusterState().getCollections().contains("collection1")); reader.getClusterState().getCollections().contains(collection));
} finally { } finally {
close(mockController); close(mockController);
close(overseerClient); close(overseerClient);
@ -676,17 +749,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
for (int i = 0; i < atLeast(4); i++) { for (int i = 0; i < atLeast(4); i++) {
killCounter.incrementAndGet(); //for each round allow 1 kill killCounter.incrementAndGet(); //for each round allow 1 kill
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); mockController = new MockZKController(server.getZkAddress(), "node1");
mockController.publishState("core1", "node1", "state1",1); mockController.publishState(collection, "core1", "node1", "state1",1);
if(mockController2!=null) { if(mockController2!=null) {
mockController2.close(); mockController2.close();
mockController2 = null; mockController2 = null;
} }
mockController.publishState("core1", "node1","state2",1); mockController.publishState(collection, "core1", "node1","state2",1);
mockController2 = new MockZKController(server.getZkAddress(), "node2", "collection1"); mockController2 = new MockZKController(server.getZkAddress(), "node2");
mockController.publishState("core1", "node1", "state1",1); mockController.publishState(collection, "core1", "node1", "state1",1);
verifyShardLeader(reader, "collection1", "shard1", "core1"); verifyShardLeader(reader, "collection1", "shard1", "core1");
mockController2.publishState("core4", "node2", "state2" ,1); mockController2.publishState(collection, "core4", "node2", "state2" ,1);
mockController.close(); mockController.close();
mockController = null; mockController = null;
verifyShardLeader(reader, "collection1", "shard1", "core4"); verifyShardLeader(reader, "collection1", "shard1", "core4");
@ -729,11 +802,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader = new ZkStateReader(controllerClient); reader = new ZkStateReader(controllerClient);
reader.createClusterStateWatchersAndUpdate(); reader.createClusterStateWatchersAndUpdate();
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); mockController = new MockZKController(server.getZkAddress(), "node1");
overseerClient = electNewOverseer(server.getZkAddress()); overseerClient = electNewOverseer(server.getZkAddress());
mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1); mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
waitForCollections(reader, "collection1"); waitForCollections(reader, "collection1");
@ -743,8 +816,8 @@ public class OverseerTest extends SolrTestCaseJ4 {
int version = getClusterStateVersion(controllerClient); int version = getClusterStateVersion(controllerClient);
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); mockController = new MockZKController(server.getZkAddress(), "node1");
mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1); mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
while (version == getClusterStateVersion(controllerClient)); while (version == getClusterStateVersion(controllerClient));
@ -794,11 +867,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader = new ZkStateReader(controllerClient); reader = new ZkStateReader(controllerClient);
reader.createClusterStateWatchersAndUpdate(); reader.createClusterStateWatchersAndUpdate();
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1"); mockController = new MockZKController(server.getZkAddress(), "node1");
overseerClient = electNewOverseer(server.getZkAddress()); overseerClient = electNewOverseer(server.getZkAddress());
mockController.publishState("core1", "node1", ZkStateReader.RECOVERING, 12); mockController.publishState(collection, "core1", "node1", ZkStateReader.RECOVERING, 12);
waitForCollections(reader, "collection1"); waitForCollections(reader, "collection1");

View File

@ -66,10 +66,10 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
int maxDoc = maxDocList[random().nextInt(maxDocList.length - 1)]; int maxDoc = maxDocList[random().nextInt(maxDocList.length - 1)];
indexThread = new StopableIndexingThread("1", true, maxDoc); indexThread = new StopableIndexingThread(controlClient, cloudClient, "1", true, maxDoc);
indexThread.start(); indexThread.start();
indexThread2 = new StopableIndexingThread("2", true, maxDoc); indexThread2 = new StopableIndexingThread(controlClient, cloudClient, "2", true, maxDoc);
indexThread2.start(); indexThread2.start();
@ -100,7 +100,7 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
Thread.sleep(1000); Thread.sleep(1000);
waitForThingsToLevelOut(45); waitForThingsToLevelOut(90);
Thread.sleep(2000); Thread.sleep(2000);

View File

@ -103,6 +103,8 @@ public class TestDistribDocBasedVersion extends AbstractFullDistribZkTestBase {
doTestDocVersions(); doTestDocVersions();
doTestHardFail(); doTestHardFail();
commit(); // work arround SOLR-5628
testFinished = true; testFinished = true;
} finally { } finally {
if (!testFinished) { if (!testFinished) {

View File

@ -190,7 +190,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
cc = getCoreContainer(); cc = getCoreContainer();
ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000, ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
"127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() { "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override @Override
public List<CoreDescriptor> getCurrentDescriptors() { public List<CoreDescriptor> getCurrentDescriptors() {
@ -230,7 +230,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
cc = getCoreContainer(); cc = getCoreContainer();
zkController = new ZkController(cc, server.getZkAddress(), zkController = new ZkController(cc, server.getZkAddress(),
TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() { TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override @Override
public List<CoreDescriptor> getCurrentDescriptors() { public List<CoreDescriptor> getCurrentDescriptors() {
@ -284,7 +284,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
try { try {
zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000, zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
"http://127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() { "http://127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override @Override
public List<CoreDescriptor> getCurrentDescriptors() { public List<CoreDescriptor> getCurrentDescriptors() {

View File

@ -64,6 +64,8 @@ public class HdfsTestUtil {
System.setProperty("solr.hdfs.home", "/solr_hdfs_home"); System.setProperty("solr.hdfs.home", "/solr_hdfs_home");
System.setProperty("solr.hdfs.blockcache.global", Boolean.toString(LuceneTestCase.random().nextBoolean()));
final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null); final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
dfsCluster.waitActive(); dfsCluster.waitActive();
@ -92,6 +94,7 @@ public class HdfsTestUtil {
System.clearProperty("test.build.data"); System.clearProperty("test.build.data");
System.clearProperty("test.cache.data"); System.clearProperty("test.cache.data");
System.clearProperty("solr.hdfs.home"); System.clearProperty("solr.hdfs.home");
System.clearProperty("solr.hdfs.blockcache.global");
if (dfsCluster != null) { if (dfsCluster != null) {
timers.remove(dfsCluster); timers.remove(dfsCluster);
dfsCluster.shutdown(); dfsCluster.shutdown();

View File

@ -0,0 +1,170 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud.hdfs;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.LuceneTestCase.Nightly;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.cloud.BasicDistributedZkTest;
import org.apache.solr.cloud.StopableIndexingThread;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.HdfsDirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.servlet.SolrDispatchFilter;
import org.apache.solr.store.blockcache.BlockCache;
import org.apache.solr.store.blockcache.BlockDirectory;
import org.apache.solr.store.blockcache.BlockDirectoryCache;
import org.apache.solr.store.blockcache.Cache;
import org.apache.solr.util.RefCounted;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
@Slow
@Nightly
@ThreadLeakScope(Scope.NONE) // hdfs client currently leaks thread(s)
public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest {
private static final String SOLR_HDFS_HOME = "solr.hdfs.home";
private static final String SOLR_HDFS_BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
private static final String ACOLLECTION = "acollection";
private static MiniDFSCluster dfsCluster;
@BeforeClass
public static void setupClass() throws Exception {
schemaString = "schema15.xml"; // we need a string id
dfsCluster = HdfsTestUtil.setupClass(new File(TEMP_DIR,
HdfsBasicDistributedZk2Test.class.getName() + "_"
+ System.currentTimeMillis()).getAbsolutePath());
System.setProperty(SOLR_HDFS_HOME, dfsCluster.getURI().toString() + "/solr");
}
@AfterClass
public static void teardownClass() throws Exception {
HdfsTestUtil.teardownClass(dfsCluster);
System.clearProperty(SOLR_HDFS_HOME);
dfsCluster = null;
}
@Override
protected String getDataDir(String dataDir) throws IOException {
return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
}
public HdfsWriteToMultipleCollectionsTest() {
super();
sliceCount = 1;
shardCount = 3;
}
protected String getSolrXml() {
return "solr-no-core.xml";
}
@Override
public void doTest() throws Exception {
int docCount = random().nextInt(1313) + 1;
int cnt = random().nextInt(4) + 1;
for (int i = 0; i < cnt; i++) {
createCollection(ACOLLECTION + i, 2, 2, 9);
}
for (int i = 0; i < cnt; i++) {
waitForRecoveriesToFinish(ACOLLECTION + i, false);
}
List<CloudSolrServer> cloudServers = new ArrayList<CloudSolrServer>();
List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
for (int i = 0; i < cnt; i++) {
CloudSolrServer server = new CloudSolrServer(zkServer.getZkAddress());
server.setDefaultCollection(ACOLLECTION + i);
cloudServers.add(server);
StopableIndexingThread indexThread = new StopableIndexingThread(null, server, "1", true, docCount);
threads.add(indexThread);
indexThread.start();
}
int addCnt = 0;
for (StopableIndexingThread thread : threads) {
thread.join();
addCnt += thread.getNumAdds() - thread.getNumDeletes();
}
long collectionsCount = 0;
for (CloudSolrServer server : cloudServers) {
server.commit();
collectionsCount += server.query(new SolrQuery("*:*")).getResults().getNumFound();
}
for (CloudSolrServer server : cloudServers) {
server.shutdown();
}
assertEquals(addCnt, collectionsCount);
BlockCache lastBlockCache = null;
// assert that we are using the block directory and that write and read caching are being used
for (JettySolrRunner jetty : jettys) {
CoreContainer cores = ((SolrDispatchFilter) jetty.getDispatchFilter()
.getFilter()).getCores();
Collection<SolrCore> solrCores = cores.getCores();
for (SolrCore core : solrCores) {
if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName()
.startsWith(ACOLLECTION)) {
assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory);
RefCounted<IndexWriter> iwRef = core.getUpdateHandler()
.getSolrCoreState().getIndexWriter(core);
try {
IndexWriter iw = iwRef.get();
NRTCachingDirectory directory = (NRTCachingDirectory) iw
.getDirectory();
BlockDirectory blockDirectory = (BlockDirectory) directory
.getDelegate();
assertTrue(blockDirectory.isBlockCacheReadEnabled());
assertTrue(blockDirectory.isBlockCacheWriteEnabled());
Cache cache = blockDirectory.getCache();
// we know its a BlockDirectoryCache, but future proof
assertTrue(cache instanceof BlockDirectoryCache);
BlockCache blockCache = ((BlockDirectoryCache) cache)
.getBlockCache();
if (lastBlockCache != null) {
if (Boolean.getBoolean(SOLR_HDFS_BLOCKCACHE_GLOBAL)) {
assertEquals(lastBlockCache, blockCache);
} else {
assertNotSame(lastBlockCache, blockCache);
}
}
lastBlockCache = blockCache;
} finally {
iwRef.decref();
}
}
}
}
}
}

View File

@ -30,8 +30,12 @@ import org.apache.solr.util.RefCounted;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TestNonNRTOpen extends SolrTestCaseJ4 { public class TestNonNRTOpen extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(TestNonNRTOpen.class);
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
// use a filesystem, because we need to create an index, then "start up solr" // use a filesystem, because we need to create an index, then "start up solr"
@ -80,6 +84,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
// core reload // core reload
String core = h.getCore().getName(); String core = h.getCore().getName();
log.info("Reloading core: " + h.getCore().toString());
h.getCoreContainer().reload(core); h.getCoreContainer().reload(core);
assertNotNRT(1); assertNotNRT(1);
@ -90,6 +95,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
// add a doc and core reload // add a doc and core reload
assertU(adoc("bazz", "doc2")); assertU(adoc("bazz", "doc2"));
log.info("Reloading core: " + h.getCore().toString());
h.getCoreContainer().reload(core); h.getCoreContainer().reload(core);
assertNotNRT(3); assertNotNRT(3);
} }
@ -127,11 +133,15 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
} }
static void assertNotNRT(int maxDoc) { static void assertNotNRT(int maxDoc) {
RefCounted<SolrIndexSearcher> searcher = h.getCore().getSearcher(); SolrCore core = h.getCore();
log.info("Checking notNRT & maxDoc=" + maxDoc + " of core=" + core.toString());
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try { try {
DirectoryReader ir = searcher.get().getIndexReader(); SolrIndexSearcher s = searcher.get();
assertEquals(maxDoc, ir.maxDoc()); DirectoryReader ir = s.getIndexReader();
assertFalse("expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt")); assertEquals("SOLR-5815? : wrong maxDoc: core=" + core.toString() +" searcher=" + s.toString(),
maxDoc, ir.maxDoc());
assertFalse("SOLR-5815? : expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt"));
} finally { } finally {
searcher.decref(); searcher.decref();
} }

View File

@ -0,0 +1,46 @@
package org.apache.solr.schema;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.search.SortField;
/**
* Custom field wrapping an int, to test sorting via a custom comparator.
*/
public class WrappedIntField extends TrieIntField {
Expression expr;
public WrappedIntField() {
try {
expr = JavascriptCompiler.compile("payload % 3");
} catch (Exception e) {
throw new RuntimeException("impossible?", e);
}
}
@Override
public SortField getSortField(final SchemaField field, final boolean reverse) {
field.checkSortability();
SimpleBindings bindings = new SimpleBindings();
bindings.add(super.getSortField(field, reverse));
return expr.getSortField(bindings, reverse);
}
}

View File

@ -0,0 +1,53 @@
package org.apache.solr.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
/**
* Test QueryComponent.doFieldSortValues
*/
@SuppressCodecs({"Lucene3x"})
public class TestFieldSortValues extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-minimal.xml", "schema-field-sort-values.xml");
}
public void testCustomComparator() throws Exception {
clearIndex();
assertU(adoc(sdoc("id", "1", "payload", "2")));
assertU(adoc(sdoc("id", "2", "payload", "3")));
assertU(adoc(sdoc("id", "3", "payload", "1")));
assertU(adoc(sdoc("id", "4", "payload", "5")));
assertU(adoc(sdoc("id", "5", "payload", "4")));
assertU(commit());
// payload is backed by a custom sort field which returns the payload value mod 3
assertQ(req("q", "*:*", "fl", "id", "sort", "payload asc, id asc", "fsv", "true")
, "//result/doc[int='2' and position()=1]"
, "//result/doc[int='3' and position()=2]"
, "//result/doc[int='5' and position()=3]"
, "//result/doc[int='1' and position()=4]"
, "//result/doc[int='4' and position()=5]");
}
}

View File

@ -51,6 +51,7 @@ public class BlockCacheTest extends LuceneTestCase {
int file = 0; int file = 0;
blockCacheKey.setBlock(block); blockCacheKey.setBlock(block);
blockCacheKey.setFile(file); blockCacheKey.setFile(file);
blockCacheKey.setPath("/");
if (blockCache.fetch(blockCacheKey, buffer)) { if (blockCache.fetch(blockCacheKey, buffer)) {
hitsInCache.incrementAndGet(); hitsInCache.incrementAndGet();
@ -91,6 +92,7 @@ public class BlockCacheTest extends LuceneTestCase {
BlockCacheKey blockCacheKey = new BlockCacheKey(); BlockCacheKey blockCacheKey = new BlockCacheKey();
blockCacheKey.setBlock(0); blockCacheKey.setBlock(0);
blockCacheKey.setFile(0); blockCacheKey.setFile(0);
blockCacheKey.setPath("/");
byte[] newData = new byte[blockSize*3]; byte[] newData = new byte[blockSize*3];
byte[] testData = testData(random, blockSize, newData); byte[] testData = testData(random, blockSize, newData);

View File

@ -0,0 +1,153 @@
package org.apache.solr.update;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.SolrTestCaseJ4;
import java.util.Locale;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestExceedMaxTermLength extends SolrTestCaseJ4 {
public final static String TEST_SOLRCONFIG_NAME = "solrconfig.xml";
public final static String TEST_SCHEMAXML_NAME = "schema11.xml";
private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
@BeforeClass
public static void beforeTests() throws Exception {
initCore(TEST_SOLRCONFIG_NAME, TEST_SCHEMAXML_NAME);
}
@After
public void cleanup() throws Exception {
assertU(delQ("*:*"));
assertU(commit());
}
@Test
public void testExceededMaxTermLength(){
// problematic field
final String longFieldName = "cat";
final String longFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
final String okayFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
boolean includeOkayFields = random().nextBoolean();
if(random().nextBoolean()) {
//Use XML
String doc;
if(includeOkayFields) {
doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
} else {
doc = adoc("id", "1", longFieldName, longFieldValue);
}
assertFailedU(doc);
} else {
//Use JSON
try {
if(includeOkayFields) {
String jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue,
okayFieldName, okayFieldValue);
updateJ(json(jsonStr), null);
} else {
String jsonStr = "[{'id':'1','%s':'%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
updateJ(json(jsonStr), null);
}
} catch (Exception e) {
//expected
String msg= e.getCause().getMessage();
assertTrue(msg.contains("one immense term in field=\"cat\""));
}
}
assertU(commit());
assertQ(req("q", "*:*"), "//*[@numFound='0']");
}
@Test
public void testExceededMaxTermLengthWithLimitingFilter(){
// problematic field
final String longFieldName = "cat_length";
final String longFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
final String okayFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
boolean includeOkayFields = random().nextBoolean();
if(random().nextBoolean()) {
//Use XML
String doc;
if(includeOkayFields) {
doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
} else {
doc = adoc("id", "1", longFieldName, longFieldValue);
}
assertU(doc);
} else {
//Use JSON
String jsonStr = null;
try {
if(includeOkayFields) {
jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue,
okayFieldName, okayFieldValue);
updateJ(json(jsonStr), null);
} else {
jsonStr = "[{'id':'1','%s':'%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
updateJ(json(jsonStr), null);
}
} catch (Exception e) {
//expected
fail("Should not have failed adding doc " + jsonStr);
String msg= e.getCause().getMessage();
assertTrue(msg.contains("one immense term in field=\"cat\""));
}
}
assertU(commit());
assertQ(req("q", "*:*"), "//*[@numFound='1']");
}
}

View File

@ -129,6 +129,9 @@
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str> <str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<!-- Enable/Disable the hdfs cache. --> <!-- Enable/Disable the hdfs cache. -->
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str> <str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
<!-- Enable/Disable using one global cache for all SolrCores.
The settings used will be from the first HdfsDirectoryFactory created. -->
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
</directoryFactory> </directoryFactory>

View File

@ -626,7 +626,7 @@ public class ZkStateReader {
} }
/** /**
* Returns the baseURL corrisponding to a given node's nodeName -- * Returns the baseURL corresponding to a given node's nodeName --
* NOTE: does not (currently) imply that the nodeName (or resulting * NOTE: does not (currently) imply that the nodeName (or resulting
* baseURL) exists in the cluster. * baseURL) exists in the cluster.
* @lucene.experimental * @lucene.experimental

View File

@ -1428,122 +1428,13 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
return rsp; return rsp;
} }
abstract class StopableThread extends Thread { static abstract class StopableThread extends Thread {
public StopableThread(String name) { public StopableThread(String name) {
super(name); super(name);
} }
public abstract void safeStop(); public abstract void safeStop();
} }
class StopableIndexingThread extends StopableThread {
private volatile boolean stop = false;
protected final String id;
protected final List<String> deletes = new ArrayList<String>();
protected Set<String> addFails = new HashSet<String>();
protected Set<String> deleteFails = new HashSet<String>();
protected boolean doDeletes;
private int numCycles;
public StopableIndexingThread(String id, boolean doDeletes) {
this(id, doDeletes, -1);
}
public StopableIndexingThread(String id, boolean doDeletes, int numCycles) {
super("StopableIndexingThread");
this.id = id;
this.doDeletes = doDeletes;
this.numCycles = numCycles;
setDaemon(true);
}
@Override
public void run() {
int i = 0;
int numDone = 0;
int numDeletes = 0;
int numAdds = 0;
while (true && !stop) {
if (numCycles != -1) {
if (numDone > numCycles) {
break;
}
}
++numDone;
String id = this.id + "-" + i;
++i;
boolean addFailed = false;
if (doDeletes && random().nextBoolean() && deletes.size() > 0) {
String delete = deletes.remove(0);
try {
numDeletes++;
UpdateRequest req = new UpdateRequest();
req.deleteById(delete);
req.setParam("CONTROL", "TRUE");
req.process(controlClient);
cloudClient.deleteById(delete);
} catch (Exception e) {
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
deleteFails.add(id);
}
}
try {
numAdds++;
indexr("id", id, i1, 50, t1,
"to come to the aid of their country.");
} catch (Exception e) {
addFailed = true;
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
addFails.add(id);
}
if (!addFailed && doDeletes && random().nextBoolean()) {
deletes.add(id);
}
try {
Thread.currentThread().sleep(random().nextInt(100));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
+ " deletes:" + numDeletes);
}
@Override
public void safeStop() {
stop = true;
}
public Set<String> getAddFails() {
return addFails;
}
public Set<String> getDeleteFails() {
return deleteFails;
}
public int getFailCount() {
return addFails.size() + deleteFails.size();
}
};
class StopableSearchThread extends StopableThread { class StopableSearchThread extends StopableThread {
private volatile boolean stop = false; private volatile boolean stop = false;
protected final AtomicInteger queryFails = new AtomicInteger(); protected final AtomicInteger queryFails = new AtomicInteger();

View File

@ -0,0 +1,185 @@
package org.apache.solr.cloud;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class StopableIndexingThread extends AbstractFullDistribZkTestBase.StopableThread {
private static String t1 = "a_t";
private static String i1 = "a_si";
private volatile boolean stop = false;
protected final String id;
protected final List<String> deletes = new ArrayList<String>();
protected Set<String> addFails = new HashSet<String>();
protected Set<String> deleteFails = new HashSet<String>();
protected boolean doDeletes;
private int numCycles;
private SolrServer controlClient;
private SolrServer cloudClient;
private int numDeletes;
private int numAdds;
public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes) {
this(controlClient, cloudClient, id, doDeletes, -1);
}
public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes, int numCycles) {
super("StopableIndexingThread");
this.controlClient = controlClient;
this.cloudClient = cloudClient;
this.id = id;
this.doDeletes = doDeletes;
this.numCycles = numCycles;
setDaemon(true);
}
@Override
public void run() {
int i = 0;
int numDone = 0;
numDeletes = 0;
numAdds = 0;
while (true && !stop) {
if (numCycles != -1) {
if (numDone > numCycles) {
break;
}
}
++numDone;
String id = this.id + "-" + i;
++i;
boolean addFailed = false;
if (doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean() && deletes.size() > 0) {
String delete = deletes.remove(0);
try {
numDeletes++;
if (controlClient != null) {
UpdateRequest req = new UpdateRequest();
req.deleteById(delete);
req.setParam("CONTROL", "TRUE");
req.process(controlClient);
}
cloudClient.deleteById(delete);
} catch (Exception e) {
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
deleteFails.add(id);
}
}
try {
numAdds++;
indexr("id", id, i1, 50, t1,
"to come to the aid of their country.");
} catch (Exception e) {
addFailed = true;
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
addFails.add(id);
}
if (!addFailed && doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean()) {
deletes.add(id);
}
try {
Thread.currentThread().sleep(AbstractFullDistribZkTestBase.random().nextInt(100));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
+ " deletes:" + numDeletes);
}
@Override
public void safeStop() {
stop = true;
}
public Set<String> getAddFails() {
return addFails;
}
public Set<String> getDeleteFails() {
return deleteFails;
}
public int getFailCount() {
return addFails.size() + deleteFails.size();
}
protected void addFields(SolrInputDocument doc, Object... fields) {
for (int i = 0; i < fields.length; i += 2) {
doc.addField((String) (fields[i]), fields[i + 1]);
}
}
protected void indexr(Object... fields) throws Exception {
SolrInputDocument doc = new SolrInputDocument();
addFields(doc, fields);
addFields(doc, "rnd_b", true);
indexDoc(doc);
}
protected void indexDoc(SolrInputDocument doc) throws IOException,
SolrServerException {
if (controlClient != null) {
UpdateRequest req = new UpdateRequest();
req.add(doc);
req.setParam("CONTROL", "TRUE");
req.process(controlClient);
}
UpdateRequest ureq = new UpdateRequest();
ureq.add(doc);
ureq.process(cloudClient);
}
public int getNumDeletes() {
return numDeletes;
}
public int getNumAdds() {
return numAdds;
}
}