LUCENE-5487: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1575397 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-03-07 20:50:45 +00:00
commit d784980654
91 changed files with 2511 additions and 978 deletions

View File

@ -16,6 +16,7 @@
<orderEntry type="library" scope="TEST" name="HSQLDB" level="project" />
<orderEntry type="library" scope="TEST" name="Derby" level="project" />
<orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" />
<orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
<orderEntry type="library" name="Solr core library" level="project" />
<orderEntry type="library" name="Solrj library" level="project" />
<orderEntry type="library" name="Solr DIH library" level="project" />

View File

@ -18,6 +18,7 @@
<orderEntry type="library" name="Solr morphlines core library" level="project" />
<orderEntry type="library" name="Solr morphlines cell library" level="project" />
<orderEntry type="library" scope="TEST" name="Solr morphlines core test library" level="project" />
<orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
<orderEntry type="module" module-name="solr-core" />

View File

@ -212,7 +212,7 @@ def checkClassSummaries(fullPath):
if inThing:
if lineLower.find('</tr>') != -1:
if not hasDesc:
missing.append((lastCaption, lastItem))
missing.append((lastCaption, unEscapeURL(lastItem)))
inThing = False
continue
else:
@ -298,6 +298,11 @@ def checkSummary(fullPath):
f.close()
return anyMissing
def unEscapeURL(s):
# Not exhaustive!!
s = s.replace('%20', ' ')
return s
def unescapeHTML(s):
s = s.replace('&lt;', '<')
s = s.replace('&gt;', '>')

View File

@ -731,7 +731,7 @@ def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs
os.chdir('solr')
print(" run tests w/ Java 7 and testArgs='%s'..." % testArgs)
run('%s; ant clean test %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath)
run('%s; ant clean test -Dtests.slow=false %s' % (javaExe('1.7'), testArgs), '%s/test.log' % unpackPath)
# test javadocs
print(' generate javadocs w/ Java 7...')

View File

@ -68,6 +68,13 @@ Optimizations
======================= Lucene 4.8.0 =======================
Changes in Runtime Behavior
* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException
if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous
behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
(hossman, Mike McCandless, Varun Thacker)
New Features
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
@ -89,6 +96,13 @@ New Features
* LUCENE-5485: Add circumfix support to HunspellStemFilter. (Robert Muir)
* LUCENE-5224: Add iconv, oconv, and ignore support to HunspellStemFilter.
(Robert Muir)
* LUCENE-5493: SortingMergePolicy, and EarlyTerminatingSortingCollector
support arbitrary Sort specifications.
(Robert Muir, Mike McCandless, Adrien Grand)
API Changes
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
@ -96,6 +110,12 @@ API Changes
* LUCENE-5468: Move offline Sort (from suggest module) to OfflineSort. (Robert Muir)
* LUCENE-5493: SortingMergePolicy and EarlyTerminatingSortingCollector take
Sort instead of Sorter. BlockJoinSorter is removed, replaced with
BlockJoinComparatorSource, which can take a Sort for ordering of parents
and a separate Sort for ordering of children within a block.
(Robert Muir, Mike McCandless, Adrien Grand)
Optimizations
* LUCENE-5468: HunspellStemFilter uses 10 to 100x less RAM. It also loads

View File

@ -21,14 +21,17 @@ import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.CharSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
import java.io.BufferedInputStream;
@ -67,6 +70,9 @@ public class Dictionary {
private static final String FLAG_KEY = "FLAG";
private static final String COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
private static final String CIRCUMFIX_KEY = "CIRCUMFIX";
private static final String IGNORE_KEY = "IGNORE";
private static final String ICONV_KEY = "ICONV";
private static final String OCONV_KEY = "OCONV";
private static final String NUM_FLAG_TYPE = "num";
private static final String UTF8_FLAG_TYPE = "UTF-8";
@ -110,6 +116,16 @@ public class Dictionary {
int circumfix = -1; // circumfix flag, or -1 if one is not defined
// ignored characters (dictionary, affix, inputs)
private char[] ignore;
// FSTs used for ICONV/OCONV, output ord pointing to replacement text
FST<CharsRef> iconv;
FST<CharsRef> oconv;
boolean needsInputCleaning;
boolean needsOutputCleaning;
/**
* Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
* and dictionary files.
@ -136,9 +152,13 @@ public class Dictionary {
*/
public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
this.ignoreCase = ignoreCase;
// hungarian has thousands of AF before the SET, so a 32k buffer is needed
BufferedInputStream buffered = new BufferedInputStream(affix, 32768);
buffered.mark(32768);
this.needsInputCleaning = ignoreCase;
this.needsOutputCleaning = false; // set if we have an OCONV
// TODO: we really need to probably buffer this on disk since so many newer dictionaries
// (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare
// their encoding... but for now this large buffer is a workaround
BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
buffered.mark(65536);
String encoding = getDictionaryEncoding(buffered);
buffered.reset();
CharsetDecoder decoder = getJavaEncoding(encoding);
@ -249,6 +269,29 @@ public class Dictionary {
throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber());
}
circumfix = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(IGNORE_KEY)) {
String parts[] = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal IGNORE declaration", reader.getLineNumber());
}
ignore = parts[1].toCharArray();
Arrays.sort(ignore);
needsInputCleaning = true;
} else if (line.startsWith(ICONV_KEY) || line.startsWith(OCONV_KEY)) {
String parts[] = line.split("\\s+");
String type = parts[0];
if (parts.length != 2) {
throw new ParseException("Illegal " + type + " declaration", reader.getLineNumber());
}
int num = Integer.parseInt(parts[1]);
FST<CharsRef> res = parseConversions(reader, num);
if (type.equals("ICONV")) {
iconv = res;
needsInputCleaning |= iconv != null;
} else {
oconv = res;
needsOutputCleaning |= oconv != null;
}
}
}
@ -291,6 +334,7 @@ public class Dictionary {
Map<String,Integer> seenPatterns) throws IOException, ParseException {
BytesRef scratch = new BytesRef();
StringBuilder sb = new StringBuilder();
String args[] = header.split("\\s+");
boolean crossProduct = args[2].equals("Y");
@ -300,9 +344,6 @@ public class Dictionary {
ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
for (int i = 0; i < numLines; i++) {
if (currentAffix > Short.MAX_VALUE) {
throw new UnsupportedOperationException("Too many affixes, please report this to dev@lucene.apache.org");
}
assert affixWriter.getPosition() == currentAffix << 3;
String line = reader.readLine();
String ruleArgs[] = line.split("\\s+");
@ -345,6 +386,9 @@ public class Dictionary {
Integer patternIndex = seenPatterns.get(regex);
if (patternIndex == null) {
patternIndex = patterns.size();
if (patternIndex > Short.MAX_VALUE) {
throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org");
}
seenPatterns.put(regex, patternIndex);
Pattern pattern = Pattern.compile(regex);
patterns.add(pattern);
@ -355,6 +399,8 @@ public class Dictionary {
if (stripOrd < 0) {
// already exists in our hash
stripOrd = (-stripOrd)-1;
} else if (stripOrd > Character.MAX_VALUE) {
throw new UnsupportedOperationException("Too many unique strips, please report this to dev@lucene.apache.org");
}
if (appendFlags == null) {
@ -368,7 +414,7 @@ public class Dictionary {
appendFlagsOrd = (-appendFlagsOrd)-1;
} else if (appendFlagsOrd > Short.MAX_VALUE) {
// this limit is probably flexible, but its a good sanity check too
throw new UnsupportedOperationException("Too many unique flags, please report this to dev@lucene.apache.org");
throw new UnsupportedOperationException("Too many unique append flags, please report this to dev@lucene.apache.org");
}
affixWriter.writeShort((short)flag);
@ -378,6 +424,11 @@ public class Dictionary {
affixWriter.writeShort((short)patternOrd);
affixWriter.writeShort((short)appendFlagsOrd);
if (needsInputCleaning) {
CharSequence cleaned = cleanInput(affixArg, sb);
affixArg = cleaned.toString();
}
List<Character> list = affixes.get(affixArg);
if (list == null) {
list = new ArrayList<Character>();
@ -389,6 +440,31 @@ public class Dictionary {
}
}
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
Map<String,String> mappings = new TreeMap<>();
for (int i = 0; i < num; i++) {
String line = reader.readLine();
String parts[] = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
}
if (mappings.put(parts[1], parts[2]) != null) {
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
}
}
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRef scratchInts = new IntsRef();
for (Map.Entry<String,String> entry : mappings.entrySet()) {
Util.toUTF16(entry.getKey(), scratchInts);
builder.add(scratchInts, new CharsRef(entry.getValue()));
}
return builder.finish();
}
/**
* Parses the encoding specified in the affix file readable through the provided InputStream
*
@ -485,6 +561,8 @@ public class Dictionary {
BytesRef flagsScratch = new BytesRef();
IntsRef scratchInts = new IntsRef();
StringBuilder sb = new StringBuilder();
File unsorted = File.createTempFile("unsorted", "dat", tempDir);
try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
for (InputStream dictionary : dictionaries) {
@ -492,16 +570,19 @@ public class Dictionary {
String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
while ((line = lines.readLine()) != null) {
if (ignoreCase) {
if (needsInputCleaning) {
int flagSep = line.lastIndexOf('/');
if (flagSep == -1) {
writer.write(line.toLowerCase(Locale.ROOT).getBytes(IOUtils.CHARSET_UTF_8));
CharSequence cleansed = cleanInput(line, sb);
writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
} else {
StringBuilder sb = new StringBuilder();
sb.append(line.substring(0, flagSep).toLowerCase(Locale.ROOT));
if (flagSep < line.length()) {
sb.append(line.substring(flagSep, line.length()));
String text = line.substring(0, flagSep);
CharSequence cleansed = cleanInput(text, sb);
if (cleansed != sb) {
sb.setLength(0);
sb.append(cleansed);
}
sb.append(line.substring(flagSep));
writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8));
}
} else {
@ -761,4 +842,76 @@ public class Dictionary {
static boolean hasFlag(char flags[], char flag) {
return Arrays.binarySearch(flags, flag) >= 0;
}
CharSequence cleanInput(CharSequence input, StringBuilder reuse) {
reuse.setLength(0);
for (int i = 0; i < input.length(); i++) {
char ch = input.charAt(i);
if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0) {
continue;
}
if (ignoreCase && iconv == null) {
// if we have no input conversion mappings, do this on-the-fly
ch = Character.toLowerCase(ch);
}
reuse.append(ch);
}
if (iconv != null) {
try {
applyMappings(iconv, reuse);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
if (ignoreCase) {
for (int i = 0; i < reuse.length(); i++) {
reuse.setCharAt(i, Character.toLowerCase(reuse.charAt(i)));
}
}
}
return reuse;
}
// TODO: this could be more efficient!
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
final FST.BytesReader bytesReader = fst.getBytesReader();
final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
final CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
// temporary stuff
final FST.Arc<CharsRef> arc = new FST.Arc<>();
int longestMatch;
CharsRef longestOutput;
for (int i = 0; i < sb.length(); i++) {
arc.copyFrom(firstArc);
CharsRef output = NO_OUTPUT;
longestMatch = -1;
longestOutput = null;
for (int j = i; j < sb.length(); j++) {
char ch = sb.charAt(j);
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
break;
} else {
output = fst.outputs.add(output, arc.output);
}
if (arc.isFinal()) {
longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
longestMatch = j;
}
}
if (longestMatch >= 0) {
sb.delete(i, longestMatch+1);
sb.insert(i, longestOutput);
i += (longestOutput.length - 1);
}
}
}
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -24,8 +25,8 @@ import java.util.List;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
@ -40,7 +41,10 @@ final class Stemmer {
private final BytesRef scratch = new BytesRef();
private final StringBuilder segment = new StringBuilder();
private final ByteArrayDataInput affixReader;
private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT);
// used for normalization
private final StringBuilder scratchSegment = new StringBuilder();
private char scratchBuffer[] = new char[32];
/**
* Constructs a new Stemmer which will use the provided Dictionary to create its stems.
@ -69,16 +73,24 @@ final class Stemmer {
* @return List of stems for the word
*/
public List<CharsRef> stem(char word[], int length) {
if (dictionary.ignoreCase) {
charUtils.toLowerCase(word, 0, length);
if (dictionary.needsInputCleaning) {
scratchSegment.setLength(0);
scratchSegment.append(word, 0, length);
CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
length = segment.length();
segment.getChars(0, length, scratchBuffer, 0);
word = scratchBuffer;
}
List<CharsRef> stems = new ArrayList<CharsRef>();
IntsRef forms = dictionary.lookupWord(word, 0, length);
if (forms != null) {
// TODO: some forms should not be added, e.g. ONLYINCOMPOUND
// just because it exists, does not make it valid...
for (int i = 0; i < forms.length; i++) {
stems.add(new CharsRef(word, 0, length));
stems.add(newStem(word, length));
}
}
stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false));
@ -107,6 +119,23 @@ final class Stemmer {
return deduped;
}
private CharsRef newStem(char buffer[], int length) {
if (dictionary.needsOutputCleaning) {
scratchSegment.setLength(0);
scratchSegment.append(buffer, 0, length);
try {
Dictionary.applyMappings(dictionary.oconv, scratchSegment);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
char cleaned[] = new char[scratchSegment.length()];
scratchSegment.getChars(0, cleaned.length, cleaned, 0);
return new CharsRef(cleaned, 0, cleaned.length);
} else {
return new CharsRef(buffer, 0, length);
}
}
// ================================================= Helper Methods ================================================
/**
@ -292,7 +321,7 @@ final class Stemmer {
continue;
}
}
stems.add(new CharsRef(strippedWord, 0, length));
stems.add(newStem(strippedWord, length));
}
}
}

View File

@ -0,0 +1,219 @@
package org.apache.lucene.analysis.hunspell;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.InputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageEstimator;
import org.junit.Ignore;
/**
* These thunderbird dictionaries can be retrieved via:
* https://addons.mozilla.org/en-US/thunderbird/language-tools/
* You must click and download every file: sorry!
*/
@Ignore("enable manually")
public class TestAllDictionaries2 extends LuceneTestCase {
// set this to the location of where you downloaded all the files
static final File DICTIONARY_HOME =
new File("/data/thunderbirdDicts");
final String tests[] = {
/* zip file */ /* dictionary */ /* affix */
"addon-0.4.5-an+fx+tb+fn+sm.xpi", "dictionaries/ru.dic", "dictionaries/ru.aff",
"addon-0.5.5-fx+tb.xpi", "dictionaries/ko-KR.dic", "dictionaries/ko-KR.aff",
"afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff",
"albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff",
"amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff",
//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
"azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff",
"belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff",
"belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff",
"bengali_bangladesh_dictionary-0.08-sm+tb+fx.xpi", "dictionaries/bn-BD.dic", "dictionaries/bn-BD.aff",
"brazilian_portuguese_dictionary_former_spelling-28.20140203-tb+sm+fx.xpi", "dictionaries/pt-BR-antigo.dic", "dictionaries/pt-BR-antigo.aff",
"brazilian_portuguese_dictionary_new_spelling-28.20140203-fx+sm+tb.xpi", "dictionaries/pt-BR.dic", "dictionaries/pt-BR.aff",
"british_english_dictionary_updated-1.19.5-sm+fx+tb.xpi", "dictionaries/en-GB.dic", "dictionaries/en-GB.aff",
"bulgarian_dictionary-4.3-fx+tb+sm.xpi", "dictionaries/bg.dic", "dictionaries/bg.aff",
"canadian_english_dictionary-2.0.8-fx+sm+tb.xpi", "dictionaries/en-CA.dic", "dictionaries/en-CA.aff",
"ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff",
"chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff",
"corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff",
"corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff",
"corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff",
"croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff",
"croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff",
"dansk_ordbog_til_stavekontrollen-2.2.1-sm+tb+fx.xpi", "dictionaries/da.dic", "dictionaries/da.aff",
"deutsches_worterbuch_de_de_alte_rechtschreibung-2.1.8-sm.xpi", "dictionaries/de-DE-1901.dic", "dictionaries/de-DE-1901.aff",
"diccionario_de_espanolespana-1.7-sm+tb+fn+fx.xpi", "dictionaries/es-ES.dic", "dictionaries/es-ES.aff",
"diccionario_en_espanol_para_venezuela-1.1.17-sm+an+tb+fn+fx.xpi", "dictionaries/es_VE.dic", "dictionaries/es_VE.aff",
"diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff",
"diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff",
"diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff",
//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff",
"difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff",
//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
"dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff",
"dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff",
"eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff",
"english_australian_dictionary-2.1.2-tb+fx+sm.xpi", "dictionaries/en-AU.dic", "dictionaries/en-AU.aff",
"esperanta_vortaro-1.0.2-fx+tb+sm.xpi", "dictionaries/eo-EO.dic", "dictionaries/eo-EO.aff",
"european_portuguese_spellchecker-14.1.1.1-tb+fx.xpi", "dictionaries/pt-PT.dic", "dictionaries/pt-PT.aff",
"faroese_spell_checker_faroe_islands-2.0-tb+sm+fx+fn.xpi", "dictionaries/fo_FO.dic", "dictionaries/fo_FO.aff",
"frysk_wurdboek-2.1.1-fn+sm+fx+an+tb.xpi", "dictionaries/fy.dic", "dictionaries/fy.aff",
"geiriadur_cymraeg-1.08-tb+sm+fx.xpi", "dictionaries/cy_GB.dic", "dictionaries/cy_GB.aff",
"general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi", "dictionaries/ca.dic", "dictionaries/ca.aff",
"german_dictionary-2.0.3-fn+fx+sm+tb.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff",
"german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff",
"german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff",
"german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff",
"german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff",
"german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff",
"greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi", "dictionaries/el-GR.dic", "dictionaries/el-GR.aff",
"gujarati_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/gu_IN.dic", "dictionaries/gu_IN.aff",
"haitian_creole_spell_checker-0.08-tb+sm+fx.xpi", "dictionaries/ht-HT.dic", "dictionaries/ht-HT.aff",
"hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff",
"hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff",
"hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff",
//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff",
//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
"kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff",
//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
"kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff",
"kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff",
"kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff",
"lao_spellchecking_dictionary-0-fx+tb+sm+fn+an.xpi", "dictionaries/lo_LA.dic", "dictionaries/lo_LA.aff",
"latviesu_valodas_pareizrakstibas_parbaudes_vardnica-1.0.0-fn+fx+tb+sm.xpi", "dictionaries/lv_LV.dic", "dictionaries/lv_LV.aff",
"lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff",
"litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff",
"litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff",
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
"malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff",
"marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff",
"ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff",
"nepali_dictionary-1.2-fx+tb.xpi", "dictionaries/ne_NP.dic", "dictionaries/ne_NP.aff",
"norsk_bokmal_ordliste-2.0.10.2-fx+tb+sm.xpi", "dictionaries/nb.dic", "dictionaries/nb.aff",
"norsk_nynorsk_ordliste-2.1.0-sm+fx+tb.xpi", "dictionaries/nn.dic", "dictionaries/nn.aff",
"northern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nso-ZA.dic", "dictionaries/nso-ZA.aff",
"oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff",
"polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff",
"punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff",
//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
"sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff",
"scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff",
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff",
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Latn.dic", "dictionaries/sr-RS-Latn.aff",
"slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK.dic", "dictionaries/sk-SK.aff",
"slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK-ascii.dic", "dictionaries/sk-SK-ascii.aff",
"slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi", "dictionaries/sl.dic", "dictionaries/sl.aff",
"songhay_spell_checker-0.03-fx+tb+sm.xpi", "dictionaries/Songhay - Mali.dic", "dictionaries/Songhay - Mali.aff",
"southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/st-ZA.dic", "dictionaries/st-ZA.aff",
"sownik_acinski-0.41.20110603-tb+fx+sm.xpi", "dictionaries/la.dic", "dictionaries/la.aff",
"sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi", "dictionaries/dsb.dic", "dictionaries/dsb.aff",
"srpska_latinica-0.1-fx+tb+sm.xpi", "dictionaries/Srpski_latinica.dic", "dictionaries/Srpski_latinica.aff",
"svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv.dic", "dictionaries/sv.aff",
"svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv_FI.dic", "dictionaries/sv_FI.aff",
"swati_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ss-ZA.dic", "dictionaries/ss-ZA.aff",
"tamil_spell_checker_for_firefox-0.4-tb+fx.xpi", "dictionaries/ta-TA.dic", "dictionaries/ta-TA.aff",
"telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff",
//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",
"upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff",
//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
"uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff",
"valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff",
"venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff",
"verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff",
//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
"xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff",
"xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff",
"yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff",
"zulu_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/zu-ZA.dic", "dictionaries/zu-ZA.aff"
};
public void test() throws Exception {
for (int i = 0; i < tests.length; i += 3) {
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
assert affEntry != null;
try (InputStream dictionary = zip.getInputStream(dicEntry);
InputStream affix = zip.getInputStream(affEntry)) {
Dictionary dic = new Dictionary(affix, dictionary);
System.out.println(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" +
"words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " +
"flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " +
"strips=" + RamUsageEstimator.humanSizeOf(dic.stripLookup) + ", " +
"conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " +
"affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " +
"prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " +
"suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
}
}
}
}
public void testOneDictionary() throws Exception {
String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
for (int i = 0; i < tests.length; i++) {
if (tests[i].equals(toTest)) {
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
assert affEntry != null;
try (InputStream dictionary = zip.getInputStream(dicEntry);
InputStream affix = zip.getInputStream(affEntry)) {
new Dictionary(affix, dictionary);
}
}
}
}
}
}

View File

@ -0,0 +1,36 @@
package org.apache.lucene.analysis.hunspell;
import org.junit.BeforeClass;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestConv extends StemmerTestBase {
@BeforeClass
public static void beforeClass() throws Exception {
init("conv.aff", "conv.dic");
}
public void testConversion() {
assertStemsTo("drink", "drInk");
assertStemsTo("drInk", "drInk");
assertStemsTo("drInkAble", "drInk");
assertStemsTo("drInkABle", "drInk");
assertStemsTo("drinkABle", "drInk");
}
}

View File

@ -22,10 +22,15 @@ import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.CharSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
public class TestDictionary extends LuceneTestCase {
@ -123,4 +128,54 @@ public class TestDictionary extends LuceneTestCase {
assertTrue(affixStream.isClosed());
assertTrue(dictStream.isClosed());
}
public void testReplacements() throws Exception {
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRef scratchInts = new IntsRef();
// a -> b
Util.toUTF16("a", scratchInts);
builder.add(scratchInts, new CharsRef("b"));
// ab -> c
Util.toUTF16("ab", scratchInts);
builder.add(scratchInts, new CharsRef("c"));
// c -> de
Util.toUTF16("c", scratchInts);
builder.add(scratchInts, new CharsRef("de"));
// def -> gh
Util.toUTF16("def", scratchInts);
builder.add(scratchInts, new CharsRef("gh"));
FST<CharsRef> fst = builder.finish();
StringBuilder sb = new StringBuilder("atestanother");
Dictionary.applyMappings(fst, sb);
assertEquals("btestbnother", sb.toString());
sb = new StringBuilder("abtestanother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestbnother", sb.toString());
sb = new StringBuilder("atestabnother");
Dictionary.applyMappings(fst, sb);
assertEquals("btestcnother", sb.toString());
sb = new StringBuilder("abtestabnother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestcnother", sb.toString());
sb = new StringBuilder("abtestabcnother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestcdenother", sb.toString());
sb = new StringBuilder("defdefdefc");
Dictionary.applyMappings(fst, sb);
assertEquals("ghghghde", sb.toString());
}
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -30,7 +31,6 @@ import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -94,4 +94,20 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
};
checkOneTerm(a, "", "");
}
public void testIgnoreCaseNoSideEffects() throws Exception {
final Dictionary d;
try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
}
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
}
};
checkOneTerm(a, "NoChAnGy", "NoChAnGy");
}
}

View File

@ -0,0 +1,36 @@
package org.apache.lucene.analysis.hunspell;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.junit.BeforeClass;
public class TestIgnore extends StemmerTestBase {
@BeforeClass
public static void beforeClass() throws Exception {
init("ignore.aff", "ignore.dic");
}
public void testExamples() {
assertStemsTo("drink", "drink");
assertStemsTo("drinkable", "drink");
assertStemsTo("dr'ink-able", "drink");
assertStemsTo("drank-able", "drank");
assertStemsTo("'-'-'-");
}
}

View File

@ -0,0 +1,16 @@
SET UTF-8
ICONV 4
ICONV A a
ICONV B b
ICONV C c
ICONV I i
OCONV 4
OCONV a A
OCONV b B
OCONV c C
OCONV i I
SFX X Y 1
SFX X 0 able . +ABLE

View File

@ -0,0 +1,2 @@
1
drink/X [VERB]

View File

@ -0,0 +1,6 @@
SET UTF-8
IGNORE '-
SFX X Y 1
SFX X 0 able . +ABLE

View File

@ -0,0 +1,3 @@
1
drink/X [VERB]
dr-ank/X [VERB]

View File

@ -209,11 +209,6 @@ final class DocFieldProcessor extends DocConsumer {
final DocFieldProcessorPerField perField = fields[i];
perField.consumer.processFields(perField.fields, perField.fieldCount);
}
if (docState.maxTermPrefix != null && docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", "WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
docState.maxTermPrefix = null;
}
}
private DocFieldProcessorPerField processField(FieldInfos.Builder fieldInfos,

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.IOUtils;
/**
* Holds state for inverting all occurrences of a single
@ -182,6 +181,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
// when we come back around to the field...
fieldState.position += posIncrAttribute.getPositionIncrement();
fieldState.offset += offsetAttribute.endOffset();
if (docState.maxTermPrefix != null) {
final String msg = "Document contains at least one immense term in field=\"" + fieldInfo.name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'";
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", "ERROR: " + msg);
}
docState.maxTermPrefix = null;
throw new IllegalArgumentException(msg);
}
/* if success was false above there is an exception coming through and we won't get here.*/
succeededInProcessingField = true;
} finally {

View File

@ -207,8 +207,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
/**
* Absolute hard maximum length for a term, in bytes once
* encoded as UTF8. If a term arrives from the analyzer
* longer than this length, it is skipped and a message is
* printed to infoStream, if set (see {@link
* longer than this length, an
* <code>IllegalArgumentException</code> is thrown
* and a message is printed to infoStream, if set (see {@link
* IndexWriterConfig#setInfoStream(InfoStream)}).
*/
public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
@ -1159,7 +1160,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
* merge policy.
*
* <p>Note that each term in the document can be no longer
* than 16383 characters, otherwise an
* than {@link #MAX_TERM_LENGTH} in bytes, otherwise an
* IllegalArgumentException will be thrown.</p>
*
* <p>Note that it's possible to create an invalid Unicode

View File

@ -179,12 +179,11 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
try {
termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
} catch (MaxBytesLengthExceededException e) {
// Not enough room in current block
// Just skip this term, to remain as robust as
// possible during indexing. A TokenFilter
// can be inserted into the analyzer chain if
// other behavior is wanted (pruning the term
// to a prefix, throwing an exception, etc).
// Term is too large; record this here (can't throw an
// exc because DocInverterPerField will then abort the
// entire segment) and then throw an exc later in
// DocInverterPerField.java. LengthFilter can always be
// used to prune the term before indexing:
if (docState.maxTermPrefix == null) {
final int saved = termBytesRef.length;
try {

View File

@ -202,8 +202,8 @@ public class Sort {
return 0x45aaf665 + Arrays.hashCode(fields);
}
/** Whether the relevance score is needed to sort documents. */
boolean needsScores() {
/** Returns true if the relevance score is needed to sort documents. */
public boolean needsScores() {
for (SortField sortField : fields) {
if (sortField.needsScores()) {
return true;

View File

@ -0,0 +1,105 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import org.junit.Before;
import org.junit.After;
/**
* Tests that a useful exception is thrown when attempting to index a term that is
* too large
*
* @see IndexWriter#MAX_TERM_LENGTH
*/
public class TestExceedMaxTermLength extends LuceneTestCase {
private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
Directory dir = null;
@Before
public void createDir() {
dir = newDirectory();
}
@After
public void destroyDir() throws IOException {
dir.close();
dir = null;
}
public void test() throws Exception {
IndexWriter w = new IndexWriter
(dir, newIndexWriterConfig(random(),
TEST_VERSION_CURRENT,
new MockAnalyzer(random())));
try {
final FieldType ft = new FieldType();
ft.setIndexed(true);
ft.setStored(random().nextBoolean());
ft.freeze();
final Document doc = new Document();
if (random().nextBoolean()) {
// totally ok short field value
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
TestUtil.randomSimpleString(random(), 1, 10),
ft));
}
// problematic field
final String name = TestUtil.randomSimpleString(random(), 1, 50);
final String value = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
final Field f = new Field(name, value, ft);
if (random().nextBoolean()) {
// totally ok short field value
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
TestUtil.randomSimpleString(random(), 1, 10),
ft));
}
doc.add(f);
try {
w.addDocument(doc);
fail("Did not get an exception from adding a monster term");
} catch (IllegalArgumentException e) {
final String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH);
final String msg = e.getMessage();
assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg,
msg.contains("immense term"));
assertTrue("IllegalArgumentException didn't mention max length ("+maxLengthMsg+"): " + msg,
msg.contains(maxLengthMsg));
assertTrue("IllegalArgumentException didn't mention field name ("+name+"): " + msg,
msg.contains(name));
}
} finally {
w.close();
}
}
}

View File

@ -1660,32 +1660,32 @@ public class TestIndexWriter extends LuceneTestCase {
// This contents produces a too-long term:
String contents = "abc xyz x" + bigTerm + " another term";
doc.add(new TextField("content", contents, Field.Store.NO));
w.addDocument(doc);
try {
w.addDocument(doc);
fail("should have hit exception");
} catch (IllegalArgumentException iae) {
// expected
}
// Make sure we can add another normal document
doc = new Document();
doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
w.addDocument(doc);
// So we remove the deleted doc:
w.forceMerge(1);
IndexReader reader = w.getReader();
w.close();
// Make sure all terms < max size were indexed
assertEquals(2, reader.docFreq(new Term("content", "abc")));
assertEquals(1, reader.docFreq(new Term("content", "abc")));
assertEquals(1, reader.docFreq(new Term("content", "bbb")));
assertEquals(1, reader.docFreq(new Term("content", "term")));
assertEquals(1, reader.docFreq(new Term("content", "another")));
assertEquals(0, reader.docFreq(new Term("content", "term")));
// Make sure position is still incremented when
// massive term is skipped:
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another"));
assertEquals(0, tps.nextDoc());
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());
// Make sure the doc that has the massive term is in
// Make sure the doc that has the massive term is NOT in
// the index:
assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
assertEquals("document with wicked long term is in the index!", 1, reader.numDocs());
reader.close();
dir.close();

View File

@ -0,0 +1,223 @@
package org.apache.lucene.index.sorter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher; // javadocs
import org.apache.lucene.search.Query; // javadocs
import org.apache.lucene.search.ScoreDoc; // javadocs
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.FixedBitSet;
/**
* Helper class to sort readers that contain blocks of documents.
* <p>
* Note that this class is intended to used with {@link SortingMergePolicy},
* and for other purposes has some limitations:
* <ul>
* <li>Cannot yet be used with {@link IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter}
* <li>Filling sort field values is not yet supported.
* </ul>
* @lucene.experimental
*/
// TODO: can/should we clean this thing up (e.g. return a proper sort value)
// and move to the join/ module?
public class BlockJoinComparatorSource extends FieldComparatorSource {
final Filter parentsFilter;
final Sort parentSort;
final Sort childSort;
/**
* Create a new BlockJoinComparatorSource, sorting only blocks of documents
* with {@code parentSort} and not reordering children with a block.
*
* @param parentsFilter Filter identifying parent documents
* @param parentSort Sort for parent documents
*/
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) {
this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC));
}
/**
* Create a new BlockJoinComparatorSource, specifying the sort order for both
* blocks of documents and children within a block.
*
* @param parentsFilter Filter identifying parent documents
* @param parentSort Sort for parent documents
* @param childSort Sort for child documents in the same block
*/
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort) {
this.parentsFilter = parentsFilter;
this.parentSort = parentSort;
this.childSort = childSort;
}
@Override
public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
// we keep parallel slots: the parent ids and the child ids
final int parentSlots[] = new int[numHits];
final int childSlots[] = new int[numHits];
SortField parentFields[] = parentSort.getSort();
final int parentReverseMul[] = new int[parentFields.length];
final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length];
for (int i = 0; i < parentFields.length; i++) {
parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
parentComparators[i] = parentFields[i].getComparator(1, i);
}
SortField childFields[] = childSort.getSort();
final int childReverseMul[] = new int[childFields.length];
final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length];
for (int i = 0; i < childFields.length; i++) {
childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
childComparators[i] = childFields[i].getComparator(1, i);
}
// NOTE: we could return parent ID as value but really our sort "value" is more complex...
// So we throw UOE for now. At the moment you really should only use this at indexing time.
return new FieldComparator<Integer>() {
int bottomParent;
int bottomChild;
FixedBitSet parentBits;
@Override
public int compare(int slot1, int slot2) {
try {
return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void setBottom(int slot) {
bottomParent = parentSlots[slot];
bottomChild = childSlots[slot];
}
@Override
public void setTopValue(Integer value) {
// we dont have enough information (the docid is needed)
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
}
@Override
public int compareBottom(int doc) throws IOException {
return compare(bottomChild, bottomParent, doc, parent(doc));
}
@Override
public int compareTop(int doc) throws IOException {
// we dont have enough information (the docid is needed)
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
}
@Override
public void copy(int slot, int doc) throws IOException {
childSlots[slot] = doc;
parentSlots[slot] = parent(doc);
}
@Override
public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
final DocIdSet parents = parentsFilter.getDocIdSet(context, null);
if (parents == null) {
throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!");
}
if (!(parents instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
parentBits = (FixedBitSet) parents;
for (int i = 0; i < parentComparators.length; i++) {
parentComparators[i] = parentComparators[i].setNextReader(context);
}
for (int i = 0; i < childComparators.length; i++) {
childComparators[i] = childComparators[i].setNextReader(context);
}
return this;
}
@Override
public Integer value(int slot) {
// really our sort "value" is more complex...
throw new UnsupportedOperationException("filling sort field values is not yet supported");
}
@Override
public void setScorer(Scorer scorer) {
super.setScorer(scorer);
for (FieldComparator<?> comp : parentComparators) {
comp.setScorer(scorer);
}
for (FieldComparator<?> comp : childComparators) {
comp.setScorer(scorer);
}
}
int parent(int doc) {
return parentBits.nextSetBit(doc);
}
int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
if (parent1 == parent2) { // both are in the same block
if (docID1 == parent1 || docID2 == parent2) {
// keep parents at the end of blocks
return docID1 - docID2;
} else {
return compare(docID1, docID2, childComparators, childReverseMul);
}
} else {
int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
if (cmp == 0) {
return parent1 - parent2;
} else {
return cmp;
}
}
}
int compare(int docID1, int docID2, FieldComparator<?> comparators[], int reverseMul[]) throws IOException {
for (int i = 0; i < comparators.length; i++) {
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
// the segments are always the same here...
comparators[i].copy(0, docID1);
comparators[i].setBottom(0);
int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
if (comp != 0) {
return comp;
}
}
return 0; // no need to docid tiebreak
}
};
}
@Override
public String toString() {
return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
}
}

View File

@ -1,88 +0,0 @@
package org.apache.lucene.index.sorter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.FixedBitSet;
/**
* Helper class to sort readers that contain blocks of documents.
*/
public abstract class BlockJoinSorter extends Sorter {
protected final Filter parentsFilter;
/** Sole constructor. */
public BlockJoinSorter(Filter parentsFilter) {
this.parentsFilter = parentsFilter;
}
/** Return a {@link Sorter.DocComparator} instance that will be called on
* parent doc IDs. */
protected abstract DocComparator getParentComparator(AtomicReader reader);
/** Return a {@link Sorter.DocComparator} instance that will be called on
* children of the same parent. By default, children of the same parent are
* not reordered. */
protected DocComparator getChildComparator(AtomicReader reader) {
return INDEX_ORDER_COMPARATOR;
}
@Override
public final DocMap sort(AtomicReader reader) throws IOException {
final DocIdSet parents = parentsFilter.getDocIdSet(reader.getContext(), null);
if (parents == null) {
throw new IllegalStateException("AtomicReader " + reader + " contains no parents!");
}
if (!(parents instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
final FixedBitSet parentBits = (FixedBitSet) parents;
final DocComparator parentComparator = getParentComparator(reader);
final DocComparator childComparator = getChildComparator(reader);
final DocComparator comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final int parent1 = parentBits.nextSetBit(docID1);
final int parent2 = parentBits.nextSetBit(docID2);
if (parent1 == parent2) { // both are in the same block
if (docID1 == parent1 || docID2 == parent2) {
// keep parents at the end of blocks
return docID1 - docID2;
} else {
return childComparator.compare(docID1, docID2);
}
} else {
int cmp = parentComparator.compare(parent1, parent2);
if (cmp == 0) {
cmp = parent1 - parent2;
}
return cmp;
}
}
};
return sort(reader.maxDoc(), comparator);
}
}

View File

@ -24,50 +24,53 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TotalHitCountCollector;
/**
* A {@link Collector} that early terminates collection of documents on a
* per-segment basis, if the segment was sorted according to the given
* {@link Sorter}.
* {@link Sort}.
*
* <p>
* <b>NOTE:</b> the {@link Collector} detects sorted segments according to
* <b>NOTE:</b> the {@code Collector} detects sorted segments according to
* {@link SortingMergePolicy}, so it's best used in conjunction with it. Also,
* it collects up to a specified num docs from each segment, and therefore is
* mostly suitable for use in conjunction with collectors such as
* it collects up to a specified {@code numDocsToCollect} from each segment,
* and therefore is mostly suitable for use in conjunction with collectors such as
* {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}.
* <p>
* <b>NOTE</b>: If you wrap a {@link TopDocsCollector} that sorts in the same
* order as the index order, the returned {@link TopDocsCollector#topDocs()}
* <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
* order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
* will be correct. However the total of {@link TopDocsCollector#getTotalHits()
* hit count} will be underestimated since not all matching documents will have
* been collected.
* <p>
* <b>NOTE</b>: This {@link Collector} uses {@link Sorter#getID()} to detect
* whether a segment was sorted with the same {@link Sorter} as the one given in
* {@link #EarlyTerminatingSortingCollector(Collector, Sorter, int)}. This has
* <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect
* whether a segment was sorted with the same {@code Sort}. This has
* two implications:
* <ul>
* <li>if {@link Sorter#getID()} is not implemented correctly and returns
* different identifiers for equivalent {@link Sorter}s, this collector will not
* <li>if a custom comparator is not implemented correctly and returns
* different identifiers for equivalent instances, this collector will not
* detect sorted segments,</li>
* <li>if you suddenly change the {@link IndexWriter}'s
* {@link SortingMergePolicy} to sort according to another criterion and if both
* the old and the new {@link Sorter}s have the same identifier, this
* {@link Collector} will incorrectly detect sorted segments.</li>
* {@code SortingMergePolicy} to sort according to another criterion and if both
* the old and the new {@code Sort}s have the same identifier, this
* {@code Collector} will incorrectly detect sorted segments.</li>
* </ul>
*
* @lucene.experimental
*/
public class EarlyTerminatingSortingCollector extends Collector {
/** The wrapped Collector */
protected final Collector in;
protected final Sorter sorter;
/** Sort used to sort the search results */
protected final Sort sort;
/** Number of documents to collect in each segment */
protected final int numDocsToCollect;
/** Number of documents to collect in the current segment being processed */
protected int segmentTotalCollect;
/** True if the current segment being processed is sorted by {@link #sort} */
protected boolean segmentSorted;
private int numCollected;
@ -77,20 +80,19 @@ public class EarlyTerminatingSortingCollector extends Collector {
*
* @param in
* the collector to wrap
* @param sorter
* the same sorter as the one which is used by {@link IndexWriter}'s
* {@link SortingMergePolicy}
* @param sort
* the sort you are sorting the search results on
* @param numDocsToCollect
* the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of
* hits.
*/
public EarlyTerminatingSortingCollector(Collector in, Sorter sorter, int numDocsToCollect) {
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
if (numDocsToCollect <= 0) {
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
}
this.in = in;
this.sorter = sorter;
this.sort = sort;
this.numDocsToCollect = numDocsToCollect;
}
@ -110,7 +112,7 @@ public class EarlyTerminatingSortingCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
in.setNextReader(context);
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sorter);
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort);
segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE;
numCollected = 0;
}

View File

@ -1,81 +0,0 @@
package org.apache.lucene.index.sorter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.NumericDocValues;
/**
* A {@link Sorter} which sorts documents according to their
* {@link NumericDocValues}. One can specify ascending or descending sort order.
*
* @lucene.experimental
*/
public class NumericDocValuesSorter extends Sorter {
private final String fieldName;
private final boolean ascending;
/** Constructor over the given field name, and ascending sort order. */
public NumericDocValuesSorter(final String fieldName) {
this(fieldName, true);
}
/**
* Constructor over the given field name, and whether sorting should be
* ascending ({@code true}) or descending ({@code false}).
*/
public NumericDocValuesSorter(final String fieldName, boolean ascending) {
this.fieldName = fieldName;
this.ascending = ascending;
}
@Override
public Sorter.DocMap sort(final AtomicReader reader) throws IOException {
final NumericDocValues ndv = reader.getNumericDocValues(fieldName);
final DocComparator comparator;
if (ascending) {
comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final long v1 = ndv.get(docID1);
final long v2 = ndv.get(docID2);
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
}
};
} else {
comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final long v1 = ndv.get(docID1);
final long v2 = ndv.get(docID2);
return v1 > v2 ? -1 : v1 == v2 ? 0 : 1;
}
};
}
return sort(reader.maxDoc(), comparator);
}
@Override
public String getID() {
return "DocValues(" + fieldName + "," + (ascending ? "ascending" : "descending") + ")";
}
}

View File

@ -22,47 +22,47 @@ import java.util.Comparator;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/**
* Sorts documents of a given index by returning a permutation on the document
* IDs.
* <p><b>NOTE</b>: A {@link Sorter} implementation can be easily written from
* a {@link DocComparator document comparator} by using the
* {@link #sort(int, DocComparator)} helper method. This is especially useful
* when documents are directly comparable by their field values.
* @lucene.experimental
*/
public abstract class Sorter {
final class Sorter {
final Sort sort;
/** A comparator that keeps documents in index order. */
public static final DocComparator INDEX_ORDER_COMPARATOR = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
return docID1 - docID2;
/** Creates a new Sorter to sort the index with {@code sort} */
Sorter(Sort sort) {
if (sort.needsScores()) {
throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
}
};
this.sort = sort;
}
/**
* A permutation of doc IDs. For every document ID between <tt>0</tt> and
* {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
* return <code>docID</code>.
*/
public static abstract class DocMap {
static abstract class DocMap {
/** Given a doc ID from the original index, return its ordinal in the
* sorted index. */
public abstract int oldToNew(int docID);
abstract int oldToNew(int docID);
/** Given the ordinal of a doc ID, return its doc ID in the original index. */
public abstract int newToOld(int docID);
abstract int newToOld(int docID);
/** Return the number of documents in this map. This must be equal to the
* {@link AtomicReader#maxDoc() number of documents} of the
* {@link AtomicReader} which is sorted. */
public abstract int size();
abstract int size();
}
/** Check consistency of a {@link DocMap}, useful for assertions. */
@ -81,7 +81,7 @@ public abstract class Sorter {
}
/** A comparator of doc IDs. */
public static abstract class DocComparator {
static abstract class DocComparator {
/** Compare docID1 against docID2. The contract for the return value is the
* same as {@link Comparator#compare(Object, Object)}. */
@ -89,45 +89,13 @@ public abstract class Sorter {
}
/**
* Sorts documents in reverse order. <b>NOTE</b>: This {@link Sorter} is not
* idempotent. Sorting an {@link AtomicReader} once or twice will return two
* different {@link AtomicReader} views. This {@link Sorter} should not be
* used with {@link SortingMergePolicy}.
*/
public static final Sorter REVERSE_DOCS = new Sorter() {
@Override
public DocMap sort(final AtomicReader reader) throws IOException {
final int maxDoc = reader.maxDoc();
return new DocMap() {
@Override
public int oldToNew(int docID) {
return maxDoc - docID - 1;
}
@Override
public int newToOld(int docID) {
return maxDoc - docID - 1;
}
@Override
public int size() {
return maxDoc;
}
};
}
@Override
public String getID() {
return "ReverseDocs";
}
};
private static final class DocValueSorter extends TimSorter {
private final int[] docs;
private final Sorter.DocComparator comparator;
private final int[] tmp;
public DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
super(docs.length / 64);
this.docs = docs;
this.comparator = comparator;
@ -168,7 +136,7 @@ public abstract class Sorter {
}
/** Computes the old-to-new permutation over the given comparator. */
protected static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
// check if the index is sorted
boolean sorted = true;
for (int i = 1; i < maxDoc; ++i) {
@ -242,20 +210,75 @@ public abstract class Sorter {
* <b>NOTE:</b> deleted documents are expected to appear in the mapping as
* well, they will however be marked as deleted in the sorted view.
*/
public abstract DocMap sort(AtomicReader reader) throws IOException;
DocMap sort(AtomicReader reader) throws IOException {
SortField fields[] = sort.getSort();
final int reverseMul[] = new int[fields.length];
final FieldComparator<?> comparators[] = new FieldComparator[fields.length];
for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, i);
comparators[i].setNextReader(reader.getContext());
comparators[i].setScorer(FAKESCORER);
}
final DocComparator comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
try {
for (int i = 0; i < comparators.length; i++) {
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
// the segments are always the same here...
comparators[i].copy(0, docID1);
comparators[i].setBottom(0);
int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
if (comp != 0) {
return comp;
}
}
return Integer.compare(docID1, docID2); // docid order tiebreak
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
return sort(reader.maxDoc(), comparator);
}
/**
* Returns the identifier of this {@link Sorter}.
* <p>This identifier is similar to {@link Object#hashCode()} and should be
* chosen so that two instances of this class that sort documents likewise
* will have the same identifier. On the contrary, this identifier should be
* different on different {@link Sorter sorters}.
* different on different {@link Sort sorts}.
*/
public abstract String getID();
public String getID() {
return sort.toString();
}
@Override
public String toString() {
return getID();
}
static final Scorer FAKESCORER = new Scorer(null) {
@Override
public float score() throws IOException { throw new UnsupportedOperationException(); }
@Override
public int freq() throws IOException { throw new UnsupportedOperationException(); }
@Override
public int docID() { throw new UnsupportedOperationException(); }
@Override
public int nextDoc() throws IOException { throw new UnsupportedOperationException(); }
@Override
public int advance(int target) throws IOException { throw new UnsupportedOperationException(); }
@Override
public long cost() { throw new UnsupportedOperationException(); }
};
}

View File

@ -35,6 +35,7 @@ import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMFile;
@ -48,13 +49,13 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* An {@link AtomicReader} which supports sorting documents by a given
* {@link Sorter}. You can use this class to sort an index as follows:
* {@link Sort}. You can use this class to sort an index as follows:
*
* <pre class="prettyprint">
* IndexWriter writer; // writer to which the sorted index will be added
* DirectoryReader reader; // reader on the input index
* Sorter sorter; // determines how the documents are sorted
* AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
* Sort sort; // determines how the documents are sorted
* AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
* writer.addIndexes(reader);
* writer.close();
* reader.close();
@ -480,7 +481,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
/**
* A {@link Sorter} which sorts two parallel arrays of doc IDs and
* A {@link TimSorter} which sorts two parallel arrays of doc IDs and
* offsets in one go. Everytime a doc ID is 'swapped', its correponding offset
* is swapped too.
*/
@ -708,14 +709,14 @@ public class SortingAtomicReader extends FilterAtomicReader {
}
/** Return a sorted view of <code>reader</code> according to the order
* defined by <code>sorter</code>. If the reader is already sorted, this
* defined by <code>sort</code>. If the reader is already sorted, this
* method might return the reader as-is. */
public static AtomicReader wrap(AtomicReader reader, Sorter sorter) throws IOException {
return wrap(reader, sorter.sort(reader));
public static AtomicReader wrap(AtomicReader reader, Sort sort) throws IOException {
return wrap(reader, new Sorter(sort).sort(reader));
}
/** Expert: same as {@link #wrap(AtomicReader, Sorter)} but operates directly on a {@link Sorter.DocMap}. */
public static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
/** Expert: same as {@link #wrap(AtomicReader, Sort)} but operates directly on a {@link Sorter.DocMap}. */
static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap) {
if (docMap == null) {
// the reader is already sorter
return reader;

View File

@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@ -33,22 +34,23 @@ import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/** A {@link MergePolicy} that reorders documents according to a {@link Sorter}
/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
* before merging them. As a consequence, all segments resulting from a merge
* will be sorted while segments resulting from a flush will be in the order
* in which documents have been added.
* <p><b>NOTE</b>: Never use this {@link MergePolicy} if you rely on
* {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)}
* <p><b>NOTE</b>: Never use this policy if you rely on
* {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments}
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
* <p><b>NOTE</b>: This {@link MergePolicy} should only be used with idempotent
* {@link Sorter}s so that the order of segments is predictable. For example,
* using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is
* not idempotent) will make the order of documents in a segment depend on the
* number of times the segment has been merged.
* <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
* so that the order of segments is predictable. For example, using
* {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make
* the order of documents in a segment depend on the number of times the segment
* has been merged.
* @lucene.experimental */
public final class SortingMergePolicy extends MergePolicy {
@ -147,12 +149,12 @@ public final class SortingMergePolicy extends MergePolicy {
}
/** Returns true if the given reader is sorted by the given sorter. */
public static boolean isSorted(AtomicReader reader, Sorter sorter) {
/** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */
public static boolean isSorted(AtomicReader reader, Sort sort) {
if (reader instanceof SegmentReader) {
final SegmentReader segReader = (SegmentReader) reader;
final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
if (diagnostics != null && sorter.getID().equals(diagnostics.get(SORTER_ID_PROP))) {
if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) {
return true;
}
}
@ -172,11 +174,13 @@ public final class SortingMergePolicy extends MergePolicy {
final MergePolicy in;
final Sorter sorter;
final Sort sort;
/** Create a new {@link MergePolicy} that sorts documents with <code>sorter</code>. */
public SortingMergePolicy(MergePolicy in, Sorter sorter) {
/** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
public SortingMergePolicy(MergePolicy in, Sort sort) {
this.in = in;
this.sorter = sorter;
this.sorter = new Sorter(sort);
this.sort = sort;
}
@Override
@ -200,7 +204,7 @@ public final class SortingMergePolicy extends MergePolicy {
@Override
public MergePolicy clone() {
return new SortingMergePolicy(in.clone(), sorter);
return new SortingMergePolicy(in.clone(), sort);
}
@Override

View File

@ -17,19 +17,16 @@
-->
<html>
<body>
<p>Provides index sorting capablities. The application can use one of the
pre-existing Sorter implementations, e.g. to sort by a
{@link org.apache.lucene.index.sorter.NumericDocValuesSorter}
or {@link org.apache.lucene.index.sorter.Sorter#REVERSE_DOCS reverse} the order
of the documents. Additionally, the application can implement a custom
{@link org.apache.lucene.index.sorter.Sorter} which returns a permutation on
a source {@link org.apache.lucene.index.AtomicReader}'s document IDs, to sort
the input documents by additional criteria.
<p>Provides index sorting capablities. The application can use any
Sort specification, e.g. to sort by fields using DocValues or FieldCache, or to
reverse the order of the documents (by using SortField.Type.DOC in reverse).
Multi-level sorts can be specified the same way you would when searching, by
building Sort from multiple SortFields.
<p>{@link org.apache.lucene.index.sorter.SortingMergePolicy} can be used to
make Lucene sort segments before merging them. This will ensure that every
segment resulting from a merge will be sorted according to the provided
{@link org.apache.lucene.index.sorter.Sorter}. This however makes merging and
{@link org.apache.lucene.search.Sort}. This however makes merging and
thus indexing slower.
<p>Sorted segments allow for early query termination when the sort order

View File

@ -24,6 +24,8 @@ import java.util.List;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TestUtil;
@ -31,9 +33,9 @@ import org.junit.BeforeClass;
public class IndexSortingTest extends SorterTestBase {
private static final Sorter[] SORTERS = new Sorter[] {
new NumericDocValuesSorter(NUMERIC_DV_FIELD, true),
Sorter.REVERSE_DOCS,
private static final Sort[] SORT = new Sort[] {
new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG)),
new Sort(new SortField(null, SortField.Type.DOC, true))
};
@BeforeClass
@ -47,13 +49,14 @@ public class IndexSortingTest extends SorterTestBase {
values.add(Integer.valueOf(reader.document(i).get(ID_FIELD)));
}
}
Sorter sorter = SORTERS[random().nextInt(SORTERS.length)];
if (sorter == Sorter.REVERSE_DOCS) {
int idx = random().nextInt(SORT.length);
Sort sorter = SORT[idx];
if (idx == 1) { // reverse doc sort
Collections.reverse(values);
} else {
Collections.sort(values);
if (sorter instanceof NumericDocValuesSorter && random().nextBoolean()) {
sorter = new NumericDocValuesSorter(NUMERIC_DV_FIELD, false); // descending
if (random().nextBoolean()) {
sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.LONG, true)); // descending
Collections.reverse(values);
}
}

View File

@ -17,56 +17,37 @@ package org.apache.lucene.index.sorter;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass;
public class SortingAtomicReaderTest extends SorterTestBase {
@BeforeClass
public static void beforeClassSortingAtomicReaderTest() throws Exception {
// build the mapping from the reader, since we deleted documents, some of
// them might have disappeared from the index (e.g. if an entire segment is
// dropped b/c all its docs are deleted)
final int[] values = new int[reader.maxDoc()];
for (int i = 0; i < reader.maxDoc(); i++) {
values[i] = Integer.valueOf(reader.document(i).get(ID_FIELD));
}
final Sorter.DocComparator comparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final int v1 = values[docID1];
final int v2 = values[docID2];
return v1 < v2 ? -1 : v1 == v2 ? 0 : 1;
}
};
final Sorter.DocMap docMap = Sorter.sort(reader.maxDoc(), comparator);
// sort the index by id (as integer, in NUMERIC_DV_FIELD)
Sort sort = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT));
final Sorter.DocMap docMap = new Sorter(sort).sort(reader);
// Sorter.compute also sorts the values
NumericDocValues dv = reader.getNumericDocValues(NUMERIC_DV_FIELD);
sortedValues = new Integer[reader.maxDoc()];
for (int i = 0; i < reader.maxDoc(); ++i) {
sortedValues[docMap.oldToNew(i)] = values[i];
sortedValues[docMap.oldToNew(i)] = (int)dv.get(i);
}
if (VERBOSE) {
System.out.println("docMap: " + docMap);
System.out.println("sortedValues: " + Arrays.toString(sortedValues));
}
reader = SortingAtomicReader.wrap(reader, new Sorter() {
@Override
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
return docMap;
}
@Override
public String getID() {
return ID_FIELD;
}
});
// sort the index by id (as integer, in NUMERIC_DV_FIELD)
reader = SortingAtomicReader.wrap(reader, sort);
if (VERBOSE) {
System.out.print("mapped-deleted-docs: ");
@ -82,4 +63,13 @@ public class SortingAtomicReaderTest extends SorterTestBase {
TestUtil.checkReader(reader);
}
public void testBadSort() throws Exception {
try {
SortingAtomicReader.wrap(reader, Sort.RELEVANCE);
fail("Didn't get expected exception");
} catch (IllegalArgumentException e) {
assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
}
}
}

View File

@ -37,6 +37,8 @@ import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.FixedBitSet;
@ -89,47 +91,14 @@ public class TestBlockJoinSorter extends LuceneTestCase {
final AtomicReader reader = getOnlySegmentReader(indexReader);
final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
final FixedBitSet parentBits = (FixedBitSet) parentsFilter.getDocIdSet(reader.getContext(), null);
final NumericDocValues parentValues = reader.getNumericDocValues("parent_val");
final Sorter.DocComparator parentComparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
assertTrue(parentBits.get(docID1));
assertTrue(parentBits.get(docID2));
return Long.compare(parentValues.get(docID1), parentValues.get(docID2));
}
};
final NumericDocValues childValues = reader.getNumericDocValues("child_val");
final Sorter.DocComparator childComparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
assertFalse(parentBits.get(docID1));
assertFalse(parentBits.get(docID2));
return Long.compare(childValues.get(docID1), childValues.get(docID2));
}
};
final Sorter sorter = new BlockJoinSorter(parentsFilter) {
final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG));
final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
@Override
public String getID() {
return "Dummy";
}
@Override
protected DocComparator getParentComparator(AtomicReader r) {
assertEquals(reader, r);
return parentComparator;
}
@Override
protected DocComparator getChildComparator(AtomicReader r) {
assertEquals(reader, r);
return childComparator;
}
};
final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
final Sorter sorter = new Sorter(sort);
final Sorter.DocMap docMap = sorter.sort(reader);
assertEquals(reader.maxDoc(), docMap.size());

View File

@ -51,14 +51,14 @@ public class TestEarlyTermination extends LuceneTestCase {
private int numDocs;
private List<String> terms;
private Directory dir;
private Sorter sorter;
private Sort sort;
private RandomIndexWriter iw;
private IndexReader reader;
@Override
public void setUp() throws Exception {
super.setUp();
sorter = new NumericDocValuesSorter("ndv1");
sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
}
private Document randomDocument() {
@ -80,7 +80,7 @@ public class TestEarlyTermination extends LuceneTestCase {
terms = new ArrayList<String>(randomTerms);
final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sorter));
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
for (int i = 0; i < numDocs; ++i) {
final Document doc = randomDocument();
@ -120,7 +120,7 @@ public class TestEarlyTermination extends LuceneTestCase {
for (int i = 0; i < iters; ++i) {
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sorter, numHits));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
}
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
@ -144,7 +144,8 @@ public class TestEarlyTermination extends LuceneTestCase {
for (int i = 0; i < iters; ++i) {
final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, new NumericDocValuesSorter("ndv2"), numHits) {
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
super.setNextReader(context);

View File

@ -40,6 +40,8 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -50,14 +52,14 @@ public class TestSortingMergePolicy extends LuceneTestCase {
private List<String> terms;
private Directory dir1, dir2;
private Sorter sorter;
private Sort sort;
private IndexReader reader;
private IndexReader sortedReader;
@Override
public void setUp() throws Exception {
super.setUp();
sorter = new NumericDocValuesSorter("ndv");
sort = new Sort(new SortField("ndv", SortField.Type.LONG));
createRandomIndexes();
}
@ -68,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
return doc;
}
static MergePolicy newSortingMergePolicy(Sorter sorter) {
static MergePolicy newSortingMergePolicy(Sort sort) {
// create a MP with a low merge factor so that many merges happen
MergePolicy mp;
if (random().nextBoolean()) {
@ -83,7 +85,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
mp = lmp;
}
// wrap it with a sorting mp
return new SortingMergePolicy(mp, sorter);
return new SortingMergePolicy(mp, sort);
}
private void createRandomIndexes() throws IOException {
@ -99,7 +101,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
final long seed = random().nextLong();
final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
iwc2.setMergePolicy(newSortingMergePolicy(sorter));
iwc2.setMergePolicy(newSortingMergePolicy(sort));
final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
for (int i = 0; i < numDocs; ++i) {
@ -162,7 +164,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
}
public void testSortingMP() throws IOException {
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);
assertSorted(sortedReader1);
@ -171,4 +173,13 @@ public class TestSortingMergePolicy extends LuceneTestCase {
assertReaderEquals("", sortedReader1, sortedReader2);
}
public void testBadSort() throws Exception {
try {
new SortingMergePolicy(newMergePolicy(), Sort.RELEVANCE);
fail("Didn't get expected exception");
} catch (IllegalArgumentException e) {
assertEquals("Cannot sort an index with a Sort that refers to the relevance score", e.getMessage());
}
}
}

View File

@ -46,17 +46,12 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
import org.apache.lucene.index.sorter.Sorter;
import org.apache.lucene.index.sorter.SortingAtomicReader;
import org.apache.lucene.index.sorter.SortingMergePolicy;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -117,9 +112,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
/** Analyzer used at index time */
protected final Analyzer indexAnalyzer;
final Version matchVersion;
private final File indexPath;
private final Directory dir;
final int minPrefixChars;
private Directory dir;
/** Used for ongoing NRT additions/updates. */
private IndexWriter writer;
@ -131,16 +125,19 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* PrefixQuery is used (4). */
public static final int DEFAULT_MIN_PREFIX_CHARS = 4;
private Sorter sorter;
/** How we sort the postings and search results. */
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
/** Create a new instance, loading from a previously built
* directory, if it exists. */
public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
this(matchVersion, indexPath, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
* directory, if it exists. Note that {@link #close}
* will also close the provided directory. */
public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
this(matchVersion, dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS);
}
/** Create a new instance, loading from a previously built
* directory, if it exists.
* directory, if it exists. Note that {@link #close}
* will also close the provided directory.
*
* @param minPrefixChars Minimum number of leading characters
* before PrefixQuery is used (default 4).
@ -148,7 +145,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* ngrams (increasing index size but making lookups
* faster).
*/
public AnalyzingInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
public AnalyzingInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars) throws IOException {
if (minPrefixChars < 0) {
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
@ -157,33 +154,29 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
this.queryAnalyzer = queryAnalyzer;
this.indexAnalyzer = indexAnalyzer;
this.matchVersion = matchVersion;
this.indexPath = indexPath;
this.dir = dir;
this.minPrefixChars = minPrefixChars;
dir = getDirectory(indexPath);
if (DirectoryReader.indexExists(dir)) {
// Already built; open it:
initSorter();
writer = new IndexWriter(dir,
getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.APPEND));
getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
searcherMgr = new SearcherManager(writer, true, null);
}
}
/** Override this to customize index settings, e.g. which
* codec to use. Sorter is null if this config is for
* the first pass writer. */
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, Sorter sorter, IndexWriterConfig.OpenMode openMode) {
* codec to use. */
protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
iwc.setCodec(new Lucene46Codec());
iwc.setOpenMode(openMode);
if (sorter != null) {
// This way all merged segments will be sorted at
// merge time, allow for per-segment early termination
// when those segments are searched:
iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), sorter));
}
// This way all merged segments will be sorted at
// merge time, allow for per-segment early termination
// when those segments are searched:
iwc.setMergePolicy(new SortingMergePolicy(iwc.getMergePolicy(), SORT));
return iwc;
}
@ -206,16 +199,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
writer = null;
}
Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
IndexWriter w = null;
AtomicReader r = null;
boolean success = false;
try {
// First pass: build a temporary normal Lucene index,
// just indexing the suggestions as they iterate:
w = new IndexWriter(dirTmp,
getIndexWriterConfig(matchVersion, getGramAnalyzer(), null, IndexWriterConfig.OpenMode.CREATE));
writer = new IndexWriter(dir,
getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
BytesRef text;
Document doc = new Document();
FieldType ft = getTextFieldType();
@ -253,37 +243,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (iter.hasPayloads()) {
payloadField.setBytesValue(iter.payload());
}
w.addDocument(doc);
writer.addDocument(doc);
}
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
// Second pass: sort the entire index:
r = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(w, false));
//long t1 = System.nanoTime();
// We can rollback the first pass, now that have have
// the reader open, because we will discard it anyway
// (no sense in fsync'ing it):
w.rollback();
initSorter();
r = SortingAtomicReader.wrap(r, sorter);
writer = new IndexWriter(dir,
getIndexWriterConfig(matchVersion, getGramAnalyzer(), sorter, IndexWriterConfig.OpenMode.CREATE));
writer.addIndexes(new IndexReader[] {r});
r.close();
//System.out.println("sort time: " + ((System.nanoTime()-t1)/1000000) + " msec");
searcherMgr = new SearcherManager(writer, true, null);
success = true;
} finally {
if (success) {
IOUtils.close(w, r, dirTmp);
IOUtils.close(r);
} else {
IOUtils.closeWhileHandlingException(w, writer, r, dirTmp);
IOUtils.closeWhileHandlingException(writer, r);
writer = null;
}
}
@ -359,39 +329,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
searcherMgr.maybeRefreshBlocking();
}
private void initSorter() {
sorter = new Sorter() {
@Override
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
final NumericDocValues weights = reader.getNumericDocValues("weight");
final Sorter.DocComparator comparator = new Sorter.DocComparator() {
@Override
public int compare(int docID1, int docID2) {
final long v1 = weights.get(docID1);
final long v2 = weights.get(docID2);
// Reverse sort (highest weight first);
// java7 only:
//return Long.compare(v2, v1);
if (v1 > v2) {
return -1;
} else if (v1 < v2) {
return 1;
} else {
return 0;
}
}
};
return Sorter.sort(reader.maxDoc(), comparator);
}
@Override
public String getID() {
return "BySuggestWeight";
}
};
}
/**
* Subclass can override this method to change the field type of the text field
* e.g. to change the index options
@ -497,12 +434,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
//System.out.println("finalQuery=" + query);
// Sort by weight, descending:
TopFieldCollector c = TopFieldCollector.create(new Sort(new SortField("weight", SortField.Type.LONG, true)),
num, true, false, false, false);
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false);
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, sorter, num);
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
IndexSearcher searcher = searcherMgr.acquire();
List<LookupResult> results = null;
try {
@ -512,7 +448,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
TopFieldDocs hits = (TopFieldDocs) c.topDocs();
// Slower way if postings are not pre-sorted by weight:
// hits = searcher.search(query, null, num, new Sort(new SortField("weight", SortField.Type.LONG, true)));
// hits = searcher.search(query, null, num, SORT);
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
} finally {
searcherMgr.release(searcher);
@ -676,11 +612,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
if (writer != null) {
writer.close();
writer = null;
}
if (dir != null) {
dir.close();
dir = null;
writer = null;
}
}

View File

@ -17,7 +17,6 @@ package org.apache.lucene.search.suggest.analyzing;
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
@ -38,6 +37,7 @@ import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
@ -92,8 +92,8 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
* Create a new instance, loading from a previously built
* directory, if it exists.
*/
public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer analyzer) throws IOException {
super(matchVersion, indexPath, analyzer);
public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer analyzer) throws IOException {
super(matchVersion, dir, analyzer);
this.blenderType = BlenderType.POSITION_LINEAR;
this.numFactor = DEFAULT_NUM_FACTOR;
}
@ -106,9 +106,9 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
* @param numFactor Factor to multiply the number of searched elements before ponderate
* @throws IOException If there are problems opening the underlying Lucene index.
*/
public BlendedInfixSuggester(Version matchVersion, File indexPath, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
public BlendedInfixSuggester(Version matchVersion, Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int minPrefixChars, BlenderType blenderType, int numFactor) throws IOException {
super(matchVersion, indexPath, indexAnalyzer, queryAnalyzer, minPrefixChars);
super(matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars);
this.blenderType = blenderType;
this.numFactor = numFactor;
}

View File

@ -40,6 +40,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.*;
import org.junit.BeforeClass;
import org.junit.Ignore;
@ -161,7 +162,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
} catch (InstantiationException e) {
Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
if (cls == AnalyzingInfixSuggester.class) {
lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, TestUtil.getTempDir("LookupBenchmarkTest"), a);
lookup = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, FSDirectory.open(TestUtil.getTempDir("LookupBenchmarkTest")), a);
} else {
Constructor<? extends Lookup> ctor = cls.getConstructor(Analyzer.class);
lookup = ctor.newInstance(a);

View File

@ -21,7 +21,6 @@ import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
@ -39,7 +38,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
@ -55,15 +53,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
@ -106,22 +97,12 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
suggester.build(new InputArrayIterator(keys));
assertEquals(2, suggester.getCount());
suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).key);
@ -159,15 +140,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
@Override
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
@ -239,17 +213,11 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
int minPrefixLength = random().nextInt(10);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
suggester.build(new InputArrayIterator(keys));
for(int i=0;i<2;i++) {
@ -306,12 +274,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
// Make sure things still work after close and reopen:
suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixLength) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixLength);
}
suggester.close();
}
@ -321,15 +284,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
@ -342,15 +298,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
@ -359,18 +308,13 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
// Try again, but overriding addPrefixMatch to highlight
// the entire hit:
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) {
@Override
protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
sb.append("<b>");
sb.append(surface);
sb.append("</b>");
}
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
suggester.build(new InputArrayIterator(keys));
results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
@ -384,15 +328,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
suggester.build(new InputArrayIterator(keys));
suggester.close();
suggester.close();
@ -418,14 +355,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, indexAnalyzer, queryAnalyzer, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), indexAnalyzer, queryAnalyzer, 3);
Input keys[] = new Input[] {
new Input("a bob for apples", 10, new BytesRef("foobaz")),
@ -439,14 +369,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
public void testEmptyAtStart() throws Exception {
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
suggester.build(new InputArrayIterator(new Input[0]));
suggester.add(new BytesRef("a penny saved is a penny earned"), 10, new BytesRef("foobaz"));
suggester.add(new BytesRef("lend me your ear"), 8, new BytesRef("foobar"));
@ -483,14 +407,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
public void testBothExactAndPrefix() throws Exception {
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
suggester.build(new InputArrayIterator(new Input[0]));
suggester.add(new BytesRef("the pen is pretty"), 10, new BytesRef("foobaz"));
suggester.refresh();
@ -563,12 +481,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
System.out.println(" minPrefixChars=" + minPrefixChars);
}
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
// Initial suggester built with nothing:
suggester.build(new InputArrayIterator(new Input[0]));
@ -648,12 +561,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
lookupThread.finish();
suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, minPrefixChars) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, minPrefixChars);
lookupThread = new LookupThread(suggester);
lookupThread.start();
@ -824,15 +732,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
new Input("lend me your ear", 8, new BytesRef("foobar")),
};
File tempDir = TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@Override
protected Directory getDirectory(File path) {
return newDirectory();
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -49,15 +48,10 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
File tempDir = TestUtil.getTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
suggester.build(new InputArrayIterator(keys));
// we query for star wars and check that the weight
@ -94,12 +88,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
// BlenderType.LINEAR is used by default (remove position*10%)
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a);
suggester.build(new InputArrayIterator(keys));
assertEquals(w, getInResults(suggester, "top", pl, 1));
@ -109,13 +98,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
suggester.close();
// BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
suggester.build(new InputArrayIterator(keys));
assertEquals(w, getInResults(suggester, "top", pl, 1));
@ -145,13 +129,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
// if factor is small, we don't get the expected element
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1);
suggester.build(new InputArrayIterator(keys));
@ -169,13 +148,8 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
suggester.close();
// if we increase the factor we have it
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2);
suggester.build(new InputArrayIterator(keys));
// we have it
@ -205,14 +179,9 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
// if factor is small, we don't get the expected element
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR) {
@Override
protected Directory getDirectory(File path) {
return newFSDirectory(path);
}
};
BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR);
suggester.build(new InputArrayIterator(keys));

View File

@ -440,8 +440,10 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
if (dir instanceof BaseDirectoryWrapper) {
((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
}
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setInfoStream(new FailOnNonBulkMergesInfoStream());
analyzer).setInfoStream(new FailOnNonBulkMergesInfoStream());
if (LuceneTestCase.TEST_NIGHTLY) {
// newIWConfig makes smallish max seg size, which

View File

@ -76,6 +76,15 @@ Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.5
Upgrading from Solr 4.7
----------------------
* In previous versions of Solr, Terms that exceeded Lucene's MAX_TERM_LENGTH were
silently ignored when indexing documents. Begining with Solr 4.8, a document
an error will be generated when attempting to index a document with a term
that is too large. If you wish to continue to have large terms ignored,
use "solr.LengthFilterFactory" in all of your Analyzers. See LUCENE-5472 for
more details.
Detailed Change List
----------------------
@ -88,6 +97,13 @@ New Features
* SOLR-5183: JSON updates now support nested child documents using a
"_childDocument_" object key. (Varun Thacker, hossman)
* SOLR-5714: You can now use one pool of memory for for the HDFS block cache
that all collections share. (Mark Miller, Gregory Chanan)
* SOLR-5720: Add ExpandComponent to expand results collapsed by the
CollapsingQParserPlugin. (Joel Bernstein)
Bug Fixes
----------------------
@ -113,6 +129,15 @@ Bug Fixes
* SOLR-5761: HttpSolrServer has a few fields that can be set via setters but
are not volatile. (Mark Miller, Gregory Chanan)
* SOLR-5811: The Overseer will retry work items until success, which is a serious
problem if you hit a bad work item. (Mark Miller)
* SOLR-5796: Increase how long we are willing to wait for a core to see the ZK
advertised leader in it's local state. (Timothy Potter, Mark Miller)
* SOLR-5818: distrib search with custom comparator does not quite work correctly
(Ryan Ernst)
Optimizations
----------------------
* SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
@ -148,6 +173,15 @@ Other Changes
registration exists, wait a short time to see if it goes away.
(Mark Miller)
* LUCENE-5472: IndexWriter.addDocument will now throw an IllegalArgumentException
if a Term to be indexed exceeds IndexWriter.MAX_TERM_LENGTH. To recreate previous
behavior of silently ignoring these terms, use LengthFilter in your Analyzer.
(hossman, Mike McCandless, Varun Thacker)
* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
advertised leader in it's local state configurable.
(Timothy Potter via Mark Miller)
================== 4.7.0 ==================
Versions of Major Components

View File

@ -123,7 +123,16 @@ public class Overseer {
else if (LeaderStatus.YES == isLeader) {
final ZkNodeProps message = ZkNodeProps.load(head);
final String operation = message.getStr(QUEUE_OPERATION);
clusterState = processMessage(clusterState, message, operation);
try {
clusterState = processMessage(clusterState, message, operation);
} catch (Exception e) {
// generally there is nothing we can do - in most cases, we have
// an issue that will fail again on retry or we cannot communicate with
// ZooKeeper in which case another Overseer should take over
// TODO: if ordering for the message is not important, we could
// track retries and put it back on the end of the queue
log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
}
zkClient.setData(ZkStateReader.CLUSTER_STATE,
ZkStateReader.toJSON(clusterState), true);
@ -189,8 +198,16 @@ public class Overseer {
while (head != null) {
final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
final String operation = message.getStr(QUEUE_OPERATION);
clusterState = processMessage(clusterState, message, operation);
try {
clusterState = processMessage(clusterState, message, operation);
} catch (Exception e) {
// generally there is nothing we can do - in most cases, we have
// an issue that will fail again on retry or we cannot communicate with
// ZooKeeper in which case another Overseer should take over
// TODO: if ordering for the message is not important, we could
// track retries and put it back on the end of the queue
log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
}
workQueue.offer(head.getBytes());
stateUpdateQueue.poll();
@ -294,6 +311,7 @@ public class Overseer {
private ClusterState createReplica(ClusterState clusterState, ZkNodeProps message) {
log.info("createReplica() {} ", message);
String coll = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
Slice sl = clusterState.getSlice(coll, slice);
if(sl == null){
@ -334,6 +352,7 @@ public class Overseer {
private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
log.info("Update shard state invoked for collection: " + collection + " with message: " + message);
for (String key : message.keySet()) {
if (ZkStateReader.COLLECTION_PROP.equals(key)) continue;
@ -358,6 +377,7 @@ public class Overseer {
private ClusterState addRoutingRule(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
String routeKey = message.getStr("routeKey");
String range = message.getStr("range");
@ -397,8 +417,22 @@ public class Overseer {
return clusterState;
}
private boolean checkCollectionKeyExistence(ZkNodeProps message) {
return checkKeyExistence(message, ZkStateReader.COLLECTION_PROP);
}
private boolean checkKeyExistence(ZkNodeProps message, String key) {
String value = message.getStr(key);
if (value == null || value.trim().length() == 0) {
log.error("Skipping invalid Overseer message because it has no " + key + " specified: " + message);
return false;
}
return true;
}
private ClusterState removeRoutingRule(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
String routeKeyStr = message.getStr("routeKey");
@ -424,6 +458,7 @@ public class Overseer {
private ClusterState createShard(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP);
Slice slice = clusterState.getSlice(collection, shardId);
if (slice == null) {
@ -470,6 +505,7 @@ public class Overseer {
private ClusterState updateStateNew(ClusterState clusterState, ZkNodeProps message) {
String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
if(collection==null || sliceName == null){
@ -488,32 +524,30 @@ public class Overseer {
/**
* Try to assign core to the cluster.
*/
private ClusterState updateState(ClusterState state, final ZkNodeProps message) {
private ClusterState updateState(ClusterState clusterState, final ZkNodeProps message) {
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
assert collection.length() > 0 : message;
if (!checkCollectionKeyExistence(message)) return clusterState;
Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null);
log.info("Update state numShards={} message={}", numShards, message);
List<String> shardNames = new ArrayList<String>();
//collection does not yet exist, create placeholders if num shards is specified
boolean collectionExists = state.hasCollection(collection);
boolean collectionExists = clusterState.hasCollection(collection);
if (!collectionExists && numShards!=null) {
getShardNames(numShards, shardNames);
state = createCollection(state, collection, shardNames, message);
clusterState = createCollection(clusterState, collection, shardNames, message);
}
String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
if (coreNodeName == null) {
coreNodeName = getAssignedCoreNodeName(state, message);
coreNodeName = getAssignedCoreNodeName(clusterState, message);
if (coreNodeName != null) {
log.info("node=" + coreNodeName + " is already registered");
} else {
// if coreNodeName is null, auto assign one
coreNodeName = Assign.assignNode(collection, state);
coreNodeName = Assign.assignNode(collection, clusterState);
}
message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP,
coreNodeName);
@ -522,7 +556,7 @@ public class Overseer {
// use the provided non null shardId
if (sliceName == null) {
//get shardId from ClusterState
sliceName = getAssignedId(state, coreNodeName, message);
sliceName = getAssignedId(clusterState, coreNodeName, message);
if (sliceName != null) {
log.info("shard=" + sliceName + " is already registered");
}
@ -531,14 +565,14 @@ public class Overseer {
//request new shardId
if (collectionExists) {
// use existing numShards
numShards = state.getCollection(collection).getSlices().size();
numShards = clusterState.getCollection(collection).getSlices().size();
log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards);
}
sliceName = Assign.assignShard(collection, state, numShards);
sliceName = Assign.assignShard(collection, clusterState, numShards);
log.info("Assigning new node to shard shard=" + sliceName);
}
Slice slice = state.getSlice(collection, sliceName);
Slice slice = clusterState.getSlice(collection, sliceName);
Map<String,Object> replicaProps = new LinkedHashMap<String,Object>();
@ -584,9 +618,9 @@ public class Overseer {
Map<String,Replica> replicas;
if (slice != null) {
state = checkAndCompleteShardSplit(state, collection, coreNodeName, sliceName, replicaProps);
clusterState = checkAndCompleteShardSplit(clusterState, collection, coreNodeName, sliceName, replicaProps);
// get the current slice again because it may have been updated due to checkAndCompleteShardSplit method
slice = state.getSlice(collection, sliceName);
slice = clusterState.getSlice(collection, sliceName);
sliceProps = slice.getProperties();
replicas = slice.getReplicasCopy();
} else {
@ -600,7 +634,7 @@ public class Overseer {
replicas.put(replica.getName(), replica);
slice = new Slice(sliceName, replicas, sliceProps);
ClusterState newClusterState = updateSlice(state, collection, slice);
ClusterState newClusterState = updateSlice(clusterState, collection, slice);
return newClusterState;
}
@ -849,13 +883,9 @@ public class Overseer {
* Remove collection from cloudstate
*/
private ClusterState removeCollection(final ClusterState clusterState, ZkNodeProps message) {
final String collection = message.getStr("name");
if (!checkKeyExistence(message, "name")) return clusterState;
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
// newCollections.remove(collection);
// ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections);
return clusterState.copyWith(singletonMap(collection, (DocCollection)null));
}
@ -863,32 +893,28 @@ public class Overseer {
* Remove collection slice from cloudstate
*/
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
DocCollection coll = clusterState.getCollection(collection);
Map<String, Slice> newSlices = new LinkedHashMap<String, Slice>(coll.getSlicesMap());
newSlices.remove(sliceId);
DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter());
// newCollections.put(newCollection.getName(), newCollection);
return newState(clusterState, singletonMap(collection,newCollection));
// return new ClusterState(clusterState.getLiveNodes(), newCollections);
}
/*
* Remove core from cloudstate
*/
private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) {
String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
final String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
if (!checkCollectionKeyExistence(message)) return clusterState;
// final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
// DocCollection coll = newCollections.get(collection);

View File

@ -1627,7 +1627,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
if (!created)
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully createcollection: " + message.getStr("name"));
log.info("going to create cores replicas shardNames {} , repFactor : {}", shardNames, repFactor);
log.info("Creating SolrCores for new collection, shardNames {} , replicationFactor : {}", shardNames, repFactor);
Map<String ,ShardRequest> coresToCreate = new LinkedHashMap<String, ShardRequest>();
for (int i = 1; i <= shardNames.size(); i++) {
String sliceName = shardNames.get(i-1);
@ -1671,14 +1671,17 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
sreq.actualShards = sreq.shards;
sreq.params = params;
if(isLegacyCloud) shardHandler.submit(sreq, sreq.shards[0], sreq.params);
else coresToCreate.put(coreName, sreq);
if(isLegacyCloud) {
shardHandler.submit(sreq, sreq.shards[0], sreq.params);
} else {
coresToCreate.put(coreName, sreq);
}
}
}
if(!isLegacyCloud) {
//wait for all replica entries to be created
Map<String, Replica> replicas = lookupReplicas(collectionName, coresToCreate.keySet());
// wait for all replica entries to be created
Map<String, Replica> replicas = waitToSeeReplicasInState(collectionName, coresToCreate.keySet());
for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
ShardRequest sreq = e.getValue();
sreq.params.set(CoreAdminParams.CORE_NODE_NAME, replicas.get(e.getKey()).getName());
@ -1704,37 +1707,35 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
}
}
private Map<String, Replica> lookupReplicas(String collectionName, Collection<String> coreNames) throws InterruptedException {
private Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
Map<String, Replica> result = new HashMap<String, Replica>();
long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS);
for(;;) {
DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
for (String coreName : coreNames) {
if(result.containsKey(coreName)) continue;
long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
while (true) {
DocCollection coll = zkStateReader.getClusterState().getCollection(
collectionName);
for (String coreName : coreNames) {
if (result.containsKey(coreName)) continue;
for (Slice slice : coll.getSlices()) {
for (Replica replica : slice.getReplicas()) {
if(coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
result.put(coreName,replica);
if (coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
result.put(coreName, replica);
break;
}
}
}
}
if(result.size() == coreNames.size()) {
if (result.size() == coreNames.size()) {
return result;
}
if( System.nanoTime() > endTime) {
//time up . throw exception and go out
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to create replica entries in ZK");
if (System.nanoTime() > endTime) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas in cluster state.");
}
Thread.sleep(100);
}
}
private void addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
String collection = message.getStr(COLLECTION_PROP);
String node = message.getStr("node");
@ -1789,7 +1790,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
ZkStateReader.BASE_URL_PROP,zkStateReader.getBaseUrlForNodeName(node));
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props));
params.set(CoreAdminParams.CORE_NODE_NAME, lookupReplicas(collection, Collections.singletonList(coreName)).get(coreName).getName());
params.set(CoreAdminParams.CORE_NODE_NAME, waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
}

View File

@ -31,6 +31,7 @@ import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@ -160,6 +161,7 @@ public final class ZkController {
protected volatile Overseer overseer;
private int leaderVoteWait;
private int leaderConflictResolveWait;
private boolean genericCoreNodeNames;
@ -168,8 +170,10 @@ public final class ZkController {
private volatile boolean isClosed;
public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort,
String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException,
TimeoutException, IOException {
String localHostContext, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect)
throws InterruptedException, TimeoutException, IOException
{
if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
this.cc = cc;
this.genericCoreNodeNames = genericCoreNodeNames;
@ -188,6 +192,8 @@ public final class ZkController {
this.localHostContext);
this.leaderVoteWait = leaderVoteWait;
this.leaderConflictResolveWait = leaderConflictResolveWait;
this.clientTimeout = zkClientTimeout;
zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout,
zkClientConnectTimeout, new DefaultConnectionStrategy(),
@ -850,19 +856,28 @@ public final class ZkController {
shardId, timeoutms * 2); // since we found it in zk, we are willing to
// wait a while to find it in state
int tries = 0;
final long msInSec = 1000L;
int maxTries = (int)Math.floor(leaderConflictResolveWait/msInSec);
while (!leaderUrl.equals(clusterStateLeaderUrl)) {
if (tries == 60) {
if (tries > maxTries) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"There is conflicting information about the leader of shard: "
+ cloudDesc.getShardId() + " our state says:"
+ clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl);
}
Thread.sleep(1000);
Thread.sleep(msInSec);
tries++;
clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId,
timeoutms);
leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms)
.getCoreUrl();
if (tries % 30 == 0) {
String warnMsg = String.format(Locale.ENGLISH, "Still seeing conflicting information about the leader "
+ "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s",
cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl);
log.warn(warnMsg);
}
}
} catch (Exception e) {
@ -1013,7 +1028,8 @@ public final class ZkController {
core.close();
}
}
log.info("publishing core={} state={}", cd.getName(), state);
String collection = cd.getCloudDescriptor().getCollectionName();
log.info("publishing core={} state={} collection={}", cd.getName(), state, collection);
//System.out.println(Thread.currentThread().getStackTrace()[3]);
Integer numShards = cd.getCloudDescriptor().getNumShards();
if (numShards == null) { //XXX sys prop hack
@ -1021,8 +1037,7 @@ public final class ZkController {
numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
}
assert cd.getCloudDescriptor().getCollectionName() != null && cd.getCloudDescriptor()
.getCollectionName().length() > 0;
assert collection != null && collection.length() > 0;
String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
//assert cd.getCloudDescriptor().getShardId() != null;
@ -1033,12 +1048,9 @@ public final class ZkController {
ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles(),
ZkStateReader.NODE_NAME_PROP, getNodeName(),
ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId(),
ZkStateReader.COLLECTION_PROP, cd.getCloudDescriptor()
.getCollectionName(),
ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString()
: null,
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName
: null);
ZkStateReader.COLLECTION_PROP, collection,
ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() : null,
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName : null);
if (updateLastState) {
cd.getCloudDescriptor().lastPublished = state;
}
@ -1064,6 +1076,12 @@ public final class ZkController {
final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
final String collection = cd.getCloudDescriptor().getCollectionName();
assert collection != null;
if (collection == null || collection.trim().length() == 0) {
log.error("No collection was specified.");
return;
}
ElectionContext context = electionContexts.remove(new ContextKey(collection, coreNodeName));
if (context != null) {
@ -1362,7 +1380,6 @@ public final class ZkController {
CloudDescriptor cloudDesc = cd.getCloudDescriptor();
// make sure the node name is set on the descriptor
if (cloudDesc.getCoreNodeName() == null) {
cloudDesc.setCoreNodeName(coreNodeName);

View File

@ -138,6 +138,7 @@ public abstract class ConfigSolr {
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000;
private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes
private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
private static final int DEFAULT_CORE_LOAD_THREADS = 3;
protected static final String DEFAULT_CORE_ADMIN_PATH = "/admin/cores";
@ -158,6 +159,10 @@ public abstract class ConfigSolr {
return getInt(CfgProp.SOLR_LEADERVOTEWAIT, DEFAULT_LEADER_VOTE_WAIT);
}
public int getLeaderConflictResolveWait() {
return getInt(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT);
}
public boolean getGenericCoreNodeNames() {
return getBool(CfgProp.SOLR_GENERICCORENODENAMES, false);
}
@ -255,6 +260,7 @@ public abstract class ConfigSolr {
SOLR_GENERICCORENODENAMES,
SOLR_ZKCLIENTTIMEOUT,
SOLR_ZKHOST,
SOLR_LEADERCONFLICTRESOLVEWAIT,
//TODO: Remove all of these elements for 5.0
SOLR_PERSISTENT,

View File

@ -67,6 +67,7 @@ public class ConfigSolrXml extends ConfigSolr {
failIfFound("solr/cores/@hostContext");
failIfFound("solr/cores/@hostPort");
failIfFound("solr/cores/@leaderVoteWait");
failIfFound("solr/cores/@leaderConflictResolveWait");
failIfFound("solr/cores/@genericCoreNodeNames");
failIfFound("solr/cores/@managementPath");
failIfFound("solr/cores/@shareSchema");
@ -113,6 +114,7 @@ public class ConfigSolrXml extends ConfigSolr {
propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']"));
propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']"));
propMap.put(CfgProp.SOLR_LEADERVOTEWAIT, doSub("solr/solrcloud/int[@name='leaderVoteWait']"));
propMap.put(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, doSub("solr/solrcloud/int[@name='leaderConflictResolveWait']"));
propMap.put(CfgProp.SOLR_GENERICCORENODENAMES, doSub("solr/solrcloud/bool[@name='genericCoreNodeNames']"));
propMap.put(CfgProp.SOLR_MANAGEMENTPATH, doSub("solr/str[@name='managementPath']"));
propMap.put(CfgProp.SOLR_SHAREDLIB, doSub("solr/str[@name='sharedLib']"));

View File

@ -51,6 +51,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
public static final String BLOCKCACHE_SLAB_COUNT = "solr.hdfs.blockcache.slab.count";
public static final String BLOCKCACHE_DIRECT_MEMORY_ALLOCATION = "solr.hdfs.blockcache.direct.memory.allocation";
public static final String BLOCKCACHE_ENABLED = "solr.hdfs.blockcache.enabled";
public static final String BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
public static final String BLOCKCACHE_READ_ENABLED = "solr.hdfs.blockcache.read.enabled";
public static final String BLOCKCACHE_WRITE_ENABLED = "solr.hdfs.blockcache.write.enabled";
@ -73,6 +74,8 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
private String confDir;
private static BlockCache globalBlockCache;
public static Metrics metrics;
private static Boolean kerberosInit;
@ -102,6 +105,7 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
}
boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true);
boolean blockCacheGlobal = params.getBool(BLOCKCACHE_GLOBAL, false); // default to false for back compat
boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED,
true);
boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true);
@ -117,8 +121,6 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
boolean directAllocation = params.getBool(
BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true);
BlockCache blockCache;
int slabSize = numberOfBlocksPerBank * blockSize;
LOG.info(
"Number of slabs of block cache [{}] with direct memory allocation set to [{}]",
@ -131,22 +133,13 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128);
int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128);
BufferStore.initNewBuffer(bufferSize, bufferCount);
long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank
* (long) blockSize;
try {
blockCache = new BlockCache(metrics, directAllocation, totalMemory,
slabSize, blockSize);
} catch (OutOfMemoryError e) {
throw new RuntimeException(
"The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
+ " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
+ " your java heap size might not be large enough."
+ " Failed allocating ~" + totalMemory / 1000000.0 + " MB.", e);
}
Cache cache = new BlockDirectoryCache(blockCache, metrics);
BlockCache blockCache = getBlockDirectoryCache(path, numberOfBlocksPerBank,
blockSize, bankCount, directAllocation, slabSize,
bufferSize, bufferCount, blockCacheGlobal);
Cache cache = new BlockDirectoryCache(blockCache, path, metrics);
HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf);
dir = new BlockDirectory("solrcore", hdfsDirectory, cache, null,
dir = new BlockDirectory(path, hdfsDirectory, cache, null,
blockCacheReadEnabled, blockCacheWriteEnabled);
} else {
dir = new HdfsDirectory(new Path(path), conf);
@ -165,6 +158,45 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory {
return dir;
}
private BlockCache getBlockDirectoryCache(String path,
int numberOfBlocksPerBank, int blockSize, int bankCount,
boolean directAllocation, int slabSize, int bufferSize, int bufferCount, boolean staticBlockCache) {
if (!staticBlockCache) {
LOG.info("Creating new single instance HDFS BlockCache");
return createBlockCache(numberOfBlocksPerBank, blockSize, bankCount, directAllocation, slabSize, bufferSize, bufferCount);
}
LOG.info("Creating new global HDFS BlockCache");
synchronized (HdfsDirectoryFactory.class) {
if (globalBlockCache == null) {
globalBlockCache = createBlockCache(numberOfBlocksPerBank, blockSize, bankCount,
directAllocation, slabSize, bufferSize, bufferCount);
}
}
return globalBlockCache;
}
private BlockCache createBlockCache(int numberOfBlocksPerBank, int blockSize,
int bankCount, boolean directAllocation, int slabSize, int bufferSize,
int bufferCount) {
BufferStore.initNewBuffer(bufferSize, bufferCount);
long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank
* (long) blockSize;
BlockCache blockCache;
try {
blockCache = new BlockCache(metrics, directAllocation, totalMemory, slabSize, blockSize);
} catch (OutOfMemoryError e) {
throw new RuntimeException(
"The max direct memory is likely too low. Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
+ " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
+ " your java heap size might not be large enough."
+ " Failed allocating ~" + totalMemory / 1000000.0 + " MB.",
e);
}
return blockCache;
}
@Override
public boolean exists(String path) {
Path hdfsDirPath = new Path(path);

View File

@ -66,11 +66,12 @@ public class ZkContainer {
initZooKeeper(cc, solrHome,
config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(),
config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames());
config.getHost(), config.getLeaderVoteWait(), config.getLeaderConflictResolveWait(), config.getGenericCoreNodeNames());
}
public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort,
String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames) {
String hostContext, String host, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames) {
ZkController zkController = null;
// if zkHost sys property is not set, we are not using ZooKeeper
@ -135,7 +136,7 @@ public class ZkContainer {
}
zkController = new ZkController(cc, zookeeperHost, zkClientTimeout,
zkClientConnectTimeout, host, hostPort, hostContext,
leaderVoteWait, genericCoreNodeNames,
leaderVoteWait, leaderConflictResolveWait, genericCoreNodeNames,
new CurrentCoreDescriptorProvider() {
@Override

View File

@ -25,12 +25,14 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
@ -500,12 +502,32 @@ public class QueryComponent extends SearchComponent
// sort ids from lowest to highest so we can access them in order
int nDocs = docList.size();
long[] sortedIds = new long[nDocs];
DocIterator it = rb.getResults().docList.iterator();
final long[] sortedIds = new long[nDocs];
final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds
DocList docs = rb.getResults().docList;
DocIterator it = docs.iterator();
for (int i=0; i<nDocs; i++) {
sortedIds[i] = (((long)it.nextDoc()) << 32) | i;
scores[i] = docs.hasScores() ? it.score() : Float.NaN;
}
Arrays.sort(sortedIds);
// sort ids and scores together
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpId = sortedIds[i];
float tmpScore = scores[i];
sortedIds[i] = sortedIds[j];
scores[i] = scores[j];
sortedIds[j] = tmpId;
scores[j] = tmpScore;
}
@Override
protected int compare(int i, int j) {
return Long.compare(sortedIds[i], sortedIds[j]);
}
}.sort(0, sortedIds.length);
SortSpec sortSpec = rb.getSortSpec();
Sort sort = searcher.weightSort(sortSpec.getSort());
@ -527,7 +549,9 @@ public class QueryComponent extends SearchComponent
int lastIdx = -1;
int idx = 0;
for (long idAndPos : sortedIds) {
for (int i = 0; i < sortedIds.length; ++i) {
long idAndPos = sortedIds[i];
float score = scores[i];
int doc = (int)(idAndPos >>> 32);
int position = (int)idAndPos;
@ -546,6 +570,7 @@ public class QueryComponent extends SearchComponent
}
doc -= currentLeaf.docBase; // adjust for what segment this is in
comparator.setScorer(new FakeScorer(doc, score));
comparator.copy(0, doc);
Object val = comparator.value(0);
if (null != ft) val = ft.marshalSortValue(val);
@ -1157,4 +1182,50 @@ public class QueryComponent extends SearchComponent
public URL[] getDocs() {
return null;
}
/**
* Fake scorer for a single document
*
* TODO: when SOLR-5595 is fixed, this wont be needed, as we dont need to recompute sort values here from the comparator
*/
private static class FakeScorer extends Scorer {
final int docid;
final float score;
FakeScorer(int docid, float score) {
super(null);
this.docid = docid;
this.score = score;
}
@Override
public int docID() {
return docid;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return 1;
}
}
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
@ -90,7 +91,8 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
try {
return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion,
new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars);
FSDirectory.open(new File(indexPath)), indexAnalyzer,
queryAnalyzer, minPrefixChars);
} catch (IOException e) {
throw new RuntimeException();
}

View File

@ -23,8 +23,9 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester.BlenderType;
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
@ -94,7 +95,9 @@ public class BlendedInfixLookupFactory extends AnalyzingInfixLookupFactory {
try {
return new BlendedInfixSuggester(core.getSolrConfig().luceneMatchVersion,
new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor);
FSDirectory.open(new File(indexPath)),
indexAnalyzer, queryAnalyzer, minPrefixChars,
blenderType, numFactor);
} catch (IOException e) {
throw new RuntimeException();
}

View File

@ -24,6 +24,9 @@ import java.util.concurrent.atomic.AtomicInteger;
import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap;
import com.googlecode.concurrentlinkedhashmap.EvictionListener;
/**
* @lucene.experimental
*/
public class BlockCache {
public static final int _128M = 134217728;

View File

@ -16,11 +16,22 @@ package org.apache.solr.store.blockcache;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @lucene.experimental
*/
public class BlockCacheKey implements Cloneable {
private long block;
private int file;
private String path;
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public long getBlock() {
return block;
@ -44,6 +55,7 @@ public class BlockCacheKey implements Cloneable {
int result = 1;
result = prime * result + (int) (block ^ (block >>> 32));
result = prime * result + file;
result = prime * result + ((path == null) ? 0 : path.hashCode());
return result;
}
@ -55,6 +67,9 @@ public class BlockCacheKey implements Cloneable {
BlockCacheKey other = (BlockCacheKey) obj;
if (block != other.block) return false;
if (file != other.file) return false;
if (path == null) {
if (other.path != null) return false;
} else if (!path.equals(other.path)) return false;
return true;
}

View File

@ -19,6 +19,9 @@ package org.apache.solr.store.blockcache;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* @lucene.experimental
*/
public class BlockCacheLocation {
private int block;

View File

@ -34,6 +34,9 @@ import org.apache.solr.store.hdfs.HdfsDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @lucene.experimental
*/
public class BlockDirectory extends Directory {
public static Logger LOG = LoggerFactory.getLogger(BlockDirectory.class);
@ -82,7 +85,7 @@ public class BlockDirectory extends Directory {
private Directory directory;
private int blockSize;
private String dirName;
private Cache cache;
private final Cache cache;
private Set<String> blockCacheFileTypes;
private final boolean blockCacheReadEnabled;
private final boolean blockCacheWriteEnabled;
@ -265,6 +268,15 @@ public class BlockDirectory extends Directory {
return dirName + "/" + name;
}
/**
* Expert: mostly for tests
*
* @lucene.experimental
*/
public Cache getCache() {
return cache;
}
@Override
public void copy(Directory to, String src, String dest, IOContext context)
throws IOException {
@ -383,4 +395,13 @@ public class BlockDirectory extends Directory {
return directory;
}
public boolean isBlockCacheReadEnabled() {
return blockCacheReadEnabled;
}
public boolean isBlockCacheWriteEnabled() {
return blockCacheWriteEnabled;
}
}

View File

@ -21,17 +21,31 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @lucene.experimental
*/
public class BlockDirectoryCache implements Cache {
private BlockCache blockCache;
private final BlockCache blockCache;
private AtomicInteger counter = new AtomicInteger();
private Map<String,Integer> names = new ConcurrentHashMap<String,Integer>();
private String path;
private Metrics metrics;
public BlockDirectoryCache(BlockCache blockCache, Metrics metrics) {
public BlockDirectoryCache(BlockCache blockCache, String path, Metrics metrics) {
this.blockCache = blockCache;
this.path = path;
this.metrics = metrics;
}
/**
* Expert: mostly for tests
*
* @lucene.experimental
*/
public BlockCache getBlockCache() {
return blockCache;
}
@Override
public void delete(String name) {
names.remove(name);
@ -46,6 +60,7 @@ public class BlockDirectoryCache implements Cache {
names.put(name, file);
}
BlockCacheKey blockCacheKey = new BlockCacheKey();
blockCacheKey.setPath(path);
blockCacheKey.setBlock(blockId);
blockCacheKey.setFile(file);
blockCache.store(blockCacheKey, blockOffset, buffer, offset, length);
@ -59,6 +74,7 @@ public class BlockDirectoryCache implements Cache {
return false;
}
BlockCacheKey blockCacheKey = new BlockCacheKey();
blockCacheKey.setPath(path);
blockCacheKey.setBlock(blockId);
blockCacheKey.setFile(file);
boolean fetch = blockCache.fetch(blockCacheKey, b, blockOffset, off,

View File

@ -21,6 +21,9 @@ import java.util.concurrent.atomic.AtomicLongArray;
import org.apache.lucene.util.LongBitSet;
/**
* @lucene.experimental
*/
public class BlockLocks {
private AtomicLongArray bits;

View File

@ -22,7 +22,9 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
/**
* @lucene.experimental
*/
public class BufferStore implements Store {
private static final Store EMPTY = new Store() {

View File

@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
* limitations under the License.
*/
/**
* @lucene.experimental
*/
public interface Cache {
/**

View File

@ -21,10 +21,11 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
/*
/**
* Cache the blocks as they are written. The cache file name is the name of
* the file until the file is closed, at which point the cache is updated
* to include the last modified date (which is unknown until that point).
* @lucene.experimental
*/
public class CachedIndexOutput extends ReusedBufferedIndexOutput {
private final BlockDirectory directory;

View File

@ -23,6 +23,9 @@ import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
/**
* @lucene.experimental
*/
public abstract class CustomBufferedIndexInput extends IndexInput {
public static final int BUFFER_SIZE = 32768;

View File

@ -29,6 +29,9 @@ import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.metrics.Updater;
import org.apache.hadoop.metrics.jvm.JvmMetrics;
/**
* @lucene.experimental
*/
public class Metrics implements Updater {
public static class MethodCall {

View File

@ -21,6 +21,9 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
/**
* @lucene.experimental
*/
public abstract class ReusedBufferedIndexOutput extends IndexOutput {
public static final int BUFFER_SIZE = 1024;

View File

@ -17,6 +17,9 @@ package org.apache.solr.store.blockcache;
* limitations under the License.
*/
/**
* @lucene.experimental
*/
public interface Store {
byte[] takeBuffer(int bufferSize);

View File

@ -28,6 +28,9 @@ import org.apache.lucene.store.DataInput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @lucene.experimental
*/
public class HdfsFileReader extends DataInput {
public static Logger LOG = LoggerFactory.getLogger(HdfsFileReader.class);

View File

@ -32,6 +32,9 @@ import org.apache.lucene.store.DataOutput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @lucene.experimental
*/
public class HdfsFileWriter extends DataOutput implements Closeable {
public static Logger LOG = LoggerFactory.getLogger(HdfsFileWriter.class);

View File

@ -21,6 +21,9 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
/**
* @lucene.experimental
*/
public class NullIndexOutput extends IndexOutput {
private long pos;

View File

@ -0,0 +1,41 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="test-custom-comparator" version="1.5">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldType class="org.apache.solr.schema.WrappedIntField" name="wrapped_int"/>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="payload" type="wrapped_int" indexed="false"
stored="true" multiValued="false" docValues="true" required="true"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -287,6 +287,16 @@ valued. -->
class="solr.ExternalFileField"/>
<fieldType name="text_no_analyzer" stored="false" indexed="true" class="solr.TextField" />
<fieldtype name="text_length" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="32768"/>
</analyzer>
</fieldtype>
</types>
@ -324,6 +334,9 @@ valued. -->
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
<field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="cat_length" type="text_length" indexed="true" stored="true" multiValued="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have

View File

@ -28,6 +28,7 @@
<int name="solr.hdfs.blockcache.blocksperbank">${solr.hdfs.blockcache.blocksperbank:1024}</int>
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
</directoryFactory>
<dataDir>${solr.data.dir:}</dataDir>

View File

@ -47,6 +47,7 @@
<double name="maxWriteMBPerSecRead">4000000</double>
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<bool name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</bool>
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:false}</str>
</directoryFactory>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>

View File

@ -131,7 +131,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
int threadCount = 1;
int i = 0;
for (i = 0; i < threadCount; i++) {
StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true);
StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
threads.add(indexThread);
indexThread.start();
}
@ -270,7 +270,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
public FullThrottleStopableIndexingThread(List<SolrServer> clients,
String id, boolean doDeletes) {
super(id, doDeletes);
super(controlClient, cloudClient, id, doDeletes);
setName("FullThrottleStopableIndexingThread");
setDaemon(true);
this.clients = clients;

View File

@ -108,7 +108,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
int threadCount = 2;
for (int i = 0; i < threadCount; i++) {
StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true);
StopableIndexingThread indexThread = new StopableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
threads.add(indexThread);
indexThread.start();
}

View File

@ -203,14 +203,14 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
testCollectionsAPI();
testCollectionsAPIAddRemoveStress();
testErrorHandling();
testNoCollectionSpecified();
deletePartiallyCreatedCollection();
deleteCollectionRemovesStaleZkCollectionsNode();
clusterPropTest();
addReplicaTest();
// last
deleteCollectionWithDownNodes();
if (DEBUG) {
super.printLayout();
}
@ -579,6 +579,40 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
assertTrue(val1.contains("SolrException") || val2.contains("SolrException"));
}
private void testNoCollectionSpecified() throws Exception {
cloudClient.getZkStateReader().updateClusterState(true);
assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
assertFalse(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
// try and create a SolrCore with no collection name
Create createCmd = new Create();
createCmd.setCoreName("corewithnocollection");
createCmd.setCollection("");
String dataDir = SolrTestCaseJ4.dataDir.getAbsolutePath() + File.separator
+ System.currentTimeMillis() + "corewithnocollection" + "_1v";
createCmd.setDataDir(dataDir);
createCmd.setNumShards(1);
if (secondConfigSet) {
createCmd.setCollectionConfigName("conf1");
}
createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
.request(createCmd);
// try and create a SolrCore with no collection name
createCmd.setCollection(null);
createCmd.setCoreName("corewithnocollection2");
createNewSolrServer("", getBaseUrl((HttpSolrServer) clients.get(1)))
.request(createCmd);
// in both cases, the collection should have default to the core name
cloudClient.getZkStateReader().updateClusterState(true);
assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection"));
assertTrue(cloudClient.getZkStateReader().getAllCollections().contains("corewithnocollection2"));
}
private void testNodesUsedByCreate() throws Exception {
// we can use this client because we just want base url
final String baseUrl = getBaseUrl((HttpSolrServer) clients.get(0));
@ -631,7 +665,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
boolean disableLegacy = random().nextBoolean();
CloudSolrServer client1 = null;
if(disableLegacy) {
if (disableLegacy) {
log.info("legacyCloud=false");
client1 = createCloudClient(null);
setClusterProp(client1, ZkStateReader.LEGACY_CLOUD, "false");

View File

@ -64,18 +64,18 @@ public class OverseerTest extends SolrTestCaseJ4 {
private List<Overseer> overseers = new ArrayList<Overseer>();
private List<ZkStateReader> readers = new ArrayList<ZkStateReader>();
private String collection = "collection1";
public static class MockZKController{
private final SolrZkClient zkClient;
private final ZkStateReader zkStateReader;
private final String nodeName;
private final String collection;
private final LeaderElector elector;
private final Map<String, ElectionContext> electionContext = Collections.synchronizedMap(new HashMap<String, ElectionContext>());
public MockZKController(String zkAddress, String nodeName, String collection) throws InterruptedException, TimeoutException, IOException, KeeperException {
public MockZKController(String zkAddress, String nodeName) throws InterruptedException, TimeoutException, IOException, KeeperException {
this.nodeName = nodeName;
this.collection = collection;
zkClient = new SolrZkClient(zkAddress, TIMEOUT);
zkStateReader = new ZkStateReader(zkClient);
zkStateReader.createClusterStateWatchersAndUpdate();
@ -105,7 +105,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
zkClient.close();
}
public String publishState(String coreName, String coreNodeName, String stateName, int numShards)
public String publishState(String collection, String coreName, String coreNodeName, String stateName, int numShards)
throws KeeperException, InterruptedException, IOException {
if (stateName == null) {
ElectionContext ec = electionContext.remove(coreName);
@ -134,41 +134,40 @@ public class OverseerTest extends SolrTestCaseJ4 {
q.offer(ZkStateReader.toJSON(m));
}
for (int i = 0; i < 120; i++) {
String shardId = getShardId("http://" + nodeName + "/solr/", coreName);
if (shardId != null) {
try {
zkClient.makePath("/collections/" + collection + "/leader_elect/"
+ shardId + "/election", true);
} catch (NodeExistsException nee) {}
ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
"http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP,
nodeName, ZkStateReader.CORE_NAME_PROP, coreName,
ZkStateReader.SHARD_ID_PROP, shardId,
ZkStateReader.COLLECTION_PROP, collection,
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
elector, shardId, collection, nodeName + "_" + coreName, props,
zkStateReader);
elector.setup(ctx);
elector.joinElection(ctx, false);
return shardId;
if (collection.length() > 0) {
for (int i = 0; i < 120; i++) {
String shardId = getShardId(collection, coreNodeName);
if (shardId != null) {
try {
zkClient.makePath("/collections/" + collection + "/leader_elect/"
+ shardId + "/election", true);
} catch (NodeExistsException nee) {}
ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
"http://" + nodeName + "/solr/", ZkStateReader.NODE_NAME_PROP,
nodeName, ZkStateReader.CORE_NAME_PROP, coreName,
ZkStateReader.SHARD_ID_PROP, shardId,
ZkStateReader.COLLECTION_PROP, collection,
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
elector, shardId, collection, nodeName + "_" + coreName, props,
zkStateReader);
elector.setup(ctx);
elector.joinElection(ctx, false);
return shardId;
}
Thread.sleep(500);
}
Thread.sleep(500);
}
return null;
}
private String getShardId(final String baseUrl, final String coreName) {
Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap(
collection);
private String getShardId(String collection, String coreNodeName) {
Map<String,Slice> slices = zkStateReader.getClusterState().getSlicesMap(collection);
if (slices != null) {
for (Slice slice : slices.values()) {
for (Replica replica : slice.getReplicas()) {
// TODO: for really large clusters, we could 'index' on this
String rbaseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
String rcore = replica.getStr(ZkStateReader.CORE_NAME_PROP);
if (baseUrl.equals(rbaseUrl) && coreName.equals(rcore)) {
String cnn = replica.getName();
if (coreNodeName.equals(cnn)) {
return slice.getName();
}
}
@ -226,17 +225,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
ZkStateReader reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1", "collection1");
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
final int numShards=6;
for (int i = 0; i < numShards; i++) {
assertNotNull("shard got no id?", zkController.publishState("core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
}
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap().size());
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size());
assertEquals(2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size());
Map<String,Replica> rmap = reader.getClusterState().getSlice("collection1", "shard1").getReplicasMap();
assertEquals(rmap.toString(), 2, rmap.size());
assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard2").getReplicasMap().size());
assertEquals(rmap.toString(), 2, reader.getClusterState().getSlice("collection1", "shard3").getReplicasMap().size());
//make sure leaders are in cloud state
assertNotNull(reader.getLeaderUrl("collection1", "shard1", 15000));
@ -258,6 +257,81 @@ public class OverseerTest extends SolrTestCaseJ4 {
}
}
@Test
public void testBadQueueItem() throws Exception {
String zkDir = dataDir.getAbsolutePath() + File.separator
+ "zookeeper/server1/data";
ZkTestServer server = new ZkTestServer(zkDir);
MockZKController zkController = null;
SolrZkClient zkClient = null;
SolrZkClient overseerClient = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
overseerClient = electNewOverseer(server.getZkAddress());
ZkStateReader reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
zkController = new MockZKController(server.getZkAddress(), "127.0.0.1");
final int numShards=3;
for (int i = 0; i < numShards; i++) {
assertNotNull("shard got no id?", zkController.publishState(collection, "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
}
assertEquals(1, reader.getClusterState().getSlice(collection, "shard1").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice(collection, "shard2").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice(collection, "shard3").getReplicasMap().size());
//make sure leaders are in cloud state
assertNotNull(reader.getLeaderUrl(collection, "shard1", 15000));
assertNotNull(reader.getLeaderUrl(collection, "shard2", 15000));
assertNotNull(reader.getLeaderUrl(collection, "shard3", 15000));
// publish a bad queue item
String emptyCollectionName = "";
zkController.publishState(emptyCollectionName, "core0", "node0", ZkStateReader.ACTIVE, 1);
zkController.publishState(emptyCollectionName, "core0", "node0", null, 1);
// make sure the Overseer is still processing items
for (int i = 0; i < numShards; i++) {
assertNotNull("shard got no id?", zkController.publishState("collection2", "core" + (i+1), "node" + (i+1), ZkStateReader.ACTIVE, 3));
}
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard1").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard2").getReplicasMap().size());
assertEquals(1, reader.getClusterState().getSlice("collection2", "shard3").getReplicasMap().size());
//make sure leaders are in cloud state
assertNotNull(reader.getLeaderUrl("collection2", "shard1", 15000));
assertNotNull(reader.getLeaderUrl("collection2", "shard2", 15000));
assertNotNull(reader.getLeaderUrl("collection2", "shard3", 15000));
} finally {
if (DEBUG) {
if (zkController != null) {
zkClient.printLayoutToStdOut();
}
}
close(zkClient);
if (zkController != null) {
zkController.close();
}
close(overseerClient);
server.shutdown();
}
}
@Test
public void testShardAssignmentBigger() throws Exception {
String zkDir = dataDir.getAbsolutePath() + File.separator
@ -289,7 +363,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader.createClusterStateWatchersAndUpdate();
for (int i = 0; i < nodeCount; i++) {
controllers[i] = new MockZKController(server.getZkAddress(), "node" + i, "collection1");
controllers[i] = new MockZKController(server.getZkAddress(), "node" + i);
}
for (int i = 0; i < nodeCount; i++) {
nodeExecutors[i] = Executors.newFixedThreadPool(1, new DefaultSolrThreadFactory("testShardAssignment"));
@ -306,7 +380,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
final String coreName = "core" + slot;
try {
ids[slot]=controllers[slot % nodeCount].publishState(coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount);
ids[slot]=controllers[slot % nodeCount].publishState(collection, coreName, "node" + slot, ZkStateReader.ACTIVE, sliceCount);
} catch (Throwable e) {
e.printStackTrace();
fail("register threw exception:" + e.getClass());
@ -551,21 +625,20 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
mockController = new MockZKController(server.getZkAddress(), "node1",
"collection1");
mockController = new MockZKController(server.getZkAddress(), "node1");
overseerClient = electNewOverseer(server.getZkAddress());
Thread.sleep(1000);
mockController.publishState("core1", "core_node1",
mockController.publishState(collection, "core1", "core_node1",
ZkStateReader.RECOVERING, 1);
waitForCollections(reader, "collection1");
waitForCollections(reader, collection);
verifyStatus(reader, ZkStateReader.RECOVERING);
int version = getClusterStateVersion(zkClient);
mockController.publishState("core1", "core_node1", ZkStateReader.ACTIVE,
mockController.publishState(collection, "core1", "core_node1", ZkStateReader.ACTIVE,
1);
while (version == getClusterStateVersion(zkClient));
@ -575,7 +648,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
overseerClient.close();
Thread.sleep(1000); // wait for overseer to get killed
mockController.publishState("core1", "core_node1",
mockController.publishState(collection, "core1", "core_node1",
ZkStateReader.RECOVERING, 1);
version = getClusterStateVersion(zkClient);
@ -588,13 +661,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
assertEquals("Live nodes count does not match", 1, reader
.getClusterState().getLiveNodes().size());
assertEquals("Shard count does not match", 1, reader.getClusterState()
.getSlice("collection1", "shard1").getReplicasMap().size());
.getSlice(collection, "shard1").getReplicasMap().size());
version = getClusterStateVersion(zkClient);
mockController.publishState("core1", "core_node1", null, 1);
mockController.publishState(collection, "core1", "core_node1", null, 1);
while (version == getClusterStateVersion(zkClient));
Thread.sleep(500);
assertFalse("collection1 should be gone after publishing the null state",
reader.getClusterState().getCollections().contains("collection1"));
reader.getClusterState().getCollections().contains(collection));
} finally {
close(mockController);
close(overseerClient);
@ -676,17 +749,17 @@ public class OverseerTest extends SolrTestCaseJ4 {
for (int i = 0; i < atLeast(4); i++) {
killCounter.incrementAndGet(); //for each round allow 1 kill
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
mockController.publishState("core1", "node1", "state1",1);
mockController = new MockZKController(server.getZkAddress(), "node1");
mockController.publishState(collection, "core1", "node1", "state1",1);
if(mockController2!=null) {
mockController2.close();
mockController2 = null;
}
mockController.publishState("core1", "node1","state2",1);
mockController2 = new MockZKController(server.getZkAddress(), "node2", "collection1");
mockController.publishState("core1", "node1", "state1",1);
mockController.publishState(collection, "core1", "node1","state2",1);
mockController2 = new MockZKController(server.getZkAddress(), "node2");
mockController.publishState(collection, "core1", "node1", "state1",1);
verifyShardLeader(reader, "collection1", "shard1", "core1");
mockController2.publishState("core4", "node2", "state2" ,1);
mockController2.publishState(collection, "core4", "node2", "state2" ,1);
mockController.close();
mockController = null;
verifyShardLeader(reader, "collection1", "shard1", "core4");
@ -729,11 +802,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader = new ZkStateReader(controllerClient);
reader.createClusterStateWatchersAndUpdate();
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
mockController = new MockZKController(server.getZkAddress(), "node1");
overseerClient = electNewOverseer(server.getZkAddress());
mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1);
mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
waitForCollections(reader, "collection1");
@ -743,8 +816,8 @@ public class OverseerTest extends SolrTestCaseJ4 {
int version = getClusterStateVersion(controllerClient);
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
mockController.publishState("core1", "core_node1", ZkStateReader.RECOVERING, 1);
mockController = new MockZKController(server.getZkAddress(), "node1");
mockController.publishState(collection, "core1", "core_node1", ZkStateReader.RECOVERING, 1);
while (version == getClusterStateVersion(controllerClient));
@ -794,11 +867,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader = new ZkStateReader(controllerClient);
reader.createClusterStateWatchersAndUpdate();
mockController = new MockZKController(server.getZkAddress(), "node1", "collection1");
mockController = new MockZKController(server.getZkAddress(), "node1");
overseerClient = electNewOverseer(server.getZkAddress());
mockController.publishState("core1", "node1", ZkStateReader.RECOVERING, 12);
mockController.publishState(collection, "core1", "node1", ZkStateReader.RECOVERING, 12);
waitForCollections(reader, "collection1");

View File

@ -66,10 +66,10 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
int maxDoc = maxDocList[random().nextInt(maxDocList.length - 1)];
indexThread = new StopableIndexingThread("1", true, maxDoc);
indexThread = new StopableIndexingThread(controlClient, cloudClient, "1", true, maxDoc);
indexThread.start();
indexThread2 = new StopableIndexingThread("2", true, maxDoc);
indexThread2 = new StopableIndexingThread(controlClient, cloudClient, "2", true, maxDoc);
indexThread2.start();
@ -100,7 +100,7 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase {
Thread.sleep(1000);
waitForThingsToLevelOut(45);
waitForThingsToLevelOut(90);
Thread.sleep(2000);

View File

@ -103,6 +103,8 @@ public class TestDistribDocBasedVersion extends AbstractFullDistribZkTestBase {
doTestDocVersions();
doTestHardFail();
commit(); // work arround SOLR-5628
testFinished = true;
} finally {
if (!testFinished) {

View File

@ -190,7 +190,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
cc = getCoreContainer();
ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
"127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
"127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override
public List<CoreDescriptor> getCurrentDescriptors() {
@ -230,7 +230,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
cc = getCoreContainer();
zkController = new ZkController(cc, server.getZkAddress(),
TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override
public List<CoreDescriptor> getCurrentDescriptors() {
@ -284,7 +284,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
try {
zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
"http://127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
"http://127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override
public List<CoreDescriptor> getCurrentDescriptors() {

View File

@ -64,6 +64,8 @@ public class HdfsTestUtil {
System.setProperty("solr.hdfs.home", "/solr_hdfs_home");
System.setProperty("solr.hdfs.blockcache.global", Boolean.toString(LuceneTestCase.random().nextBoolean()));
final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
dfsCluster.waitActive();
@ -92,6 +94,7 @@ public class HdfsTestUtil {
System.clearProperty("test.build.data");
System.clearProperty("test.cache.data");
System.clearProperty("solr.hdfs.home");
System.clearProperty("solr.hdfs.blockcache.global");
if (dfsCluster != null) {
timers.remove(dfsCluster);
dfsCluster.shutdown();

View File

@ -0,0 +1,170 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud.hdfs;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.LuceneTestCase.Nightly;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.cloud.BasicDistributedZkTest;
import org.apache.solr.cloud.StopableIndexingThread;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.HdfsDirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.servlet.SolrDispatchFilter;
import org.apache.solr.store.blockcache.BlockCache;
import org.apache.solr.store.blockcache.BlockDirectory;
import org.apache.solr.store.blockcache.BlockDirectoryCache;
import org.apache.solr.store.blockcache.Cache;
import org.apache.solr.util.RefCounted;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
@Slow
@Nightly
@ThreadLeakScope(Scope.NONE) // hdfs client currently leaks thread(s)
public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest {
private static final String SOLR_HDFS_HOME = "solr.hdfs.home";
private static final String SOLR_HDFS_BLOCKCACHE_GLOBAL = "solr.hdfs.blockcache.global";
private static final String ACOLLECTION = "acollection";
private static MiniDFSCluster dfsCluster;
@BeforeClass
public static void setupClass() throws Exception {
schemaString = "schema15.xml"; // we need a string id
dfsCluster = HdfsTestUtil.setupClass(new File(TEMP_DIR,
HdfsBasicDistributedZk2Test.class.getName() + "_"
+ System.currentTimeMillis()).getAbsolutePath());
System.setProperty(SOLR_HDFS_HOME, dfsCluster.getURI().toString() + "/solr");
}
@AfterClass
public static void teardownClass() throws Exception {
HdfsTestUtil.teardownClass(dfsCluster);
System.clearProperty(SOLR_HDFS_HOME);
dfsCluster = null;
}
@Override
protected String getDataDir(String dataDir) throws IOException {
return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
}
public HdfsWriteToMultipleCollectionsTest() {
super();
sliceCount = 1;
shardCount = 3;
}
protected String getSolrXml() {
return "solr-no-core.xml";
}
@Override
public void doTest() throws Exception {
int docCount = random().nextInt(1313) + 1;
int cnt = random().nextInt(4) + 1;
for (int i = 0; i < cnt; i++) {
createCollection(ACOLLECTION + i, 2, 2, 9);
}
for (int i = 0; i < cnt; i++) {
waitForRecoveriesToFinish(ACOLLECTION + i, false);
}
List<CloudSolrServer> cloudServers = new ArrayList<CloudSolrServer>();
List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
for (int i = 0; i < cnt; i++) {
CloudSolrServer server = new CloudSolrServer(zkServer.getZkAddress());
server.setDefaultCollection(ACOLLECTION + i);
cloudServers.add(server);
StopableIndexingThread indexThread = new StopableIndexingThread(null, server, "1", true, docCount);
threads.add(indexThread);
indexThread.start();
}
int addCnt = 0;
for (StopableIndexingThread thread : threads) {
thread.join();
addCnt += thread.getNumAdds() - thread.getNumDeletes();
}
long collectionsCount = 0;
for (CloudSolrServer server : cloudServers) {
server.commit();
collectionsCount += server.query(new SolrQuery("*:*")).getResults().getNumFound();
}
for (CloudSolrServer server : cloudServers) {
server.shutdown();
}
assertEquals(addCnt, collectionsCount);
BlockCache lastBlockCache = null;
// assert that we are using the block directory and that write and read caching are being used
for (JettySolrRunner jetty : jettys) {
CoreContainer cores = ((SolrDispatchFilter) jetty.getDispatchFilter()
.getFilter()).getCores();
Collection<SolrCore> solrCores = cores.getCores();
for (SolrCore core : solrCores) {
if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName()
.startsWith(ACOLLECTION)) {
assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory);
RefCounted<IndexWriter> iwRef = core.getUpdateHandler()
.getSolrCoreState().getIndexWriter(core);
try {
IndexWriter iw = iwRef.get();
NRTCachingDirectory directory = (NRTCachingDirectory) iw
.getDirectory();
BlockDirectory blockDirectory = (BlockDirectory) directory
.getDelegate();
assertTrue(blockDirectory.isBlockCacheReadEnabled());
assertTrue(blockDirectory.isBlockCacheWriteEnabled());
Cache cache = blockDirectory.getCache();
// we know its a BlockDirectoryCache, but future proof
assertTrue(cache instanceof BlockDirectoryCache);
BlockCache blockCache = ((BlockDirectoryCache) cache)
.getBlockCache();
if (lastBlockCache != null) {
if (Boolean.getBoolean(SOLR_HDFS_BLOCKCACHE_GLOBAL)) {
assertEquals(lastBlockCache, blockCache);
} else {
assertNotSame(lastBlockCache, blockCache);
}
}
lastBlockCache = blockCache;
} finally {
iwRef.decref();
}
}
}
}
}
}

View File

@ -30,7 +30,11 @@ import org.apache.solr.util.RefCounted;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TestNonNRTOpen extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(TestNonNRTOpen.class);
@BeforeClass
public static void beforeClass() throws Exception {
@ -80,6 +84,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
// core reload
String core = h.getCore().getName();
log.info("Reloading core: " + h.getCore().toString());
h.getCoreContainer().reload(core);
assertNotNRT(1);
@ -90,6 +95,7 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
// add a doc and core reload
assertU(adoc("bazz", "doc2"));
log.info("Reloading core: " + h.getCore().toString());
h.getCoreContainer().reload(core);
assertNotNRT(3);
}
@ -127,11 +133,15 @@ public class TestNonNRTOpen extends SolrTestCaseJ4 {
}
static void assertNotNRT(int maxDoc) {
RefCounted<SolrIndexSearcher> searcher = h.getCore().getSearcher();
SolrCore core = h.getCore();
log.info("Checking notNRT & maxDoc=" + maxDoc + " of core=" + core.toString());
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
DirectoryReader ir = searcher.get().getIndexReader();
assertEquals(maxDoc, ir.maxDoc());
assertFalse("expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt"));
SolrIndexSearcher s = searcher.get();
DirectoryReader ir = s.getIndexReader();
assertEquals("SOLR-5815? : wrong maxDoc: core=" + core.toString() +" searcher=" + s.toString(),
maxDoc, ir.maxDoc());
assertFalse("SOLR-5815? : expected non-NRT reader, got: " + ir, ir.toString().contains(":nrt"));
} finally {
searcher.decref();
}

View File

@ -0,0 +1,46 @@
package org.apache.solr.schema;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.search.SortField;
/**
* Custom field wrapping an int, to test sorting via a custom comparator.
*/
public class WrappedIntField extends TrieIntField {
Expression expr;
public WrappedIntField() {
try {
expr = JavascriptCompiler.compile("payload % 3");
} catch (Exception e) {
throw new RuntimeException("impossible?", e);
}
}
@Override
public SortField getSortField(final SchemaField field, final boolean reverse) {
field.checkSortability();
SimpleBindings bindings = new SimpleBindings();
bindings.add(super.getSortField(field, reverse));
return expr.getSortField(bindings, reverse);
}
}

View File

@ -0,0 +1,53 @@
package org.apache.solr.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
/**
* Test QueryComponent.doFieldSortValues
*/
@SuppressCodecs({"Lucene3x"})
public class TestFieldSortValues extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-minimal.xml", "schema-field-sort-values.xml");
}
public void testCustomComparator() throws Exception {
clearIndex();
assertU(adoc(sdoc("id", "1", "payload", "2")));
assertU(adoc(sdoc("id", "2", "payload", "3")));
assertU(adoc(sdoc("id", "3", "payload", "1")));
assertU(adoc(sdoc("id", "4", "payload", "5")));
assertU(adoc(sdoc("id", "5", "payload", "4")));
assertU(commit());
// payload is backed by a custom sort field which returns the payload value mod 3
assertQ(req("q", "*:*", "fl", "id", "sort", "payload asc, id asc", "fsv", "true")
, "//result/doc[int='2' and position()=1]"
, "//result/doc[int='3' and position()=2]"
, "//result/doc[int='5' and position()=3]"
, "//result/doc[int='1' and position()=4]"
, "//result/doc[int='4' and position()=5]");
}
}

View File

@ -51,6 +51,7 @@ public class BlockCacheTest extends LuceneTestCase {
int file = 0;
blockCacheKey.setBlock(block);
blockCacheKey.setFile(file);
blockCacheKey.setPath("/");
if (blockCache.fetch(blockCacheKey, buffer)) {
hitsInCache.incrementAndGet();
@ -91,6 +92,7 @@ public class BlockCacheTest extends LuceneTestCase {
BlockCacheKey blockCacheKey = new BlockCacheKey();
blockCacheKey.setBlock(0);
blockCacheKey.setFile(0);
blockCacheKey.setPath("/");
byte[] newData = new byte[blockSize*3];
byte[] testData = testData(random, blockSize, newData);

View File

@ -0,0 +1,153 @@
package org.apache.solr.update;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.SolrTestCaseJ4;
import java.util.Locale;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestExceedMaxTermLength extends SolrTestCaseJ4 {
public final static String TEST_SOLRCONFIG_NAME = "solrconfig.xml";
public final static String TEST_SCHEMAXML_NAME = "schema11.xml";
private final static int minTestTermLength = IndexWriter.MAX_TERM_LENGTH + 1;
private final static int maxTestTermLegnth = IndexWriter.MAX_TERM_LENGTH * 2;
@BeforeClass
public static void beforeTests() throws Exception {
initCore(TEST_SOLRCONFIG_NAME, TEST_SCHEMAXML_NAME);
}
@After
public void cleanup() throws Exception {
assertU(delQ("*:*"));
assertU(commit());
}
@Test
public void testExceededMaxTermLength(){
// problematic field
final String longFieldName = "cat";
final String longFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
final String okayFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
boolean includeOkayFields = random().nextBoolean();
if(random().nextBoolean()) {
//Use XML
String doc;
if(includeOkayFields) {
doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
} else {
doc = adoc("id", "1", longFieldName, longFieldValue);
}
assertFailedU(doc);
} else {
//Use JSON
try {
if(includeOkayFields) {
String jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue,
okayFieldName, okayFieldValue);
updateJ(json(jsonStr), null);
} else {
String jsonStr = "[{'id':'1','%s':'%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
updateJ(json(jsonStr), null);
}
} catch (Exception e) {
//expected
String msg= e.getCause().getMessage();
assertTrue(msg.contains("one immense term in field=\"cat\""));
}
}
assertU(commit());
assertQ(req("q", "*:*"), "//*[@numFound='0']");
}
@Test
public void testExceededMaxTermLengthWithLimitingFilter(){
// problematic field
final String longFieldName = "cat_length";
final String longFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
final String okayFieldName = TestUtil.randomSimpleString(random(), 1, 50) + "_sS" ; //Dynamic field
final String okayFieldValue = TestUtil.randomSimpleString(random(),
minTestTermLength,
maxTestTermLegnth);
boolean includeOkayFields = random().nextBoolean();
if(random().nextBoolean()) {
//Use XML
String doc;
if(includeOkayFields) {
doc = adoc("id", "1", longFieldName, longFieldValue, okayFieldName, okayFieldValue);
} else {
doc = adoc("id", "1", longFieldName, longFieldValue);
}
assertU(doc);
} else {
//Use JSON
String jsonStr = null;
try {
if(includeOkayFields) {
jsonStr = "[{'id':'1','%s':'%s', '%s': '%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue,
okayFieldName, okayFieldValue);
updateJ(json(jsonStr), null);
} else {
jsonStr = "[{'id':'1','%s':'%s'}]";
jsonStr = String.format(Locale.ROOT, jsonStr, longFieldName, longFieldValue);
updateJ(json(jsonStr), null);
}
} catch (Exception e) {
//expected
fail("Should not have failed adding doc " + jsonStr);
String msg= e.getCause().getMessage();
assertTrue(msg.contains("one immense term in field=\"cat\""));
}
}
assertU(commit());
assertQ(req("q", "*:*"), "//*[@numFound='1']");
}
}

View File

@ -129,6 +129,9 @@
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<!-- Enable/Disable the hdfs cache. -->
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
<!-- Enable/Disable using one global cache for all SolrCores.
The settings used will be from the first HdfsDirectoryFactory created. -->
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
</directoryFactory>

View File

@ -626,7 +626,7 @@ public class ZkStateReader {
}
/**
* Returns the baseURL corrisponding to a given node's nodeName --
* Returns the baseURL corresponding to a given node's nodeName --
* NOTE: does not (currently) imply that the nodeName (or resulting
* baseURL) exists in the cluster.
* @lucene.experimental

View File

@ -1428,122 +1428,13 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
return rsp;
}
abstract class StopableThread extends Thread {
static abstract class StopableThread extends Thread {
public StopableThread(String name) {
super(name);
}
public abstract void safeStop();
}
class StopableIndexingThread extends StopableThread {
private volatile boolean stop = false;
protected final String id;
protected final List<String> deletes = new ArrayList<String>();
protected Set<String> addFails = new HashSet<String>();
protected Set<String> deleteFails = new HashSet<String>();
protected boolean doDeletes;
private int numCycles;
public StopableIndexingThread(String id, boolean doDeletes) {
this(id, doDeletes, -1);
}
public StopableIndexingThread(String id, boolean doDeletes, int numCycles) {
super("StopableIndexingThread");
this.id = id;
this.doDeletes = doDeletes;
this.numCycles = numCycles;
setDaemon(true);
}
@Override
public void run() {
int i = 0;
int numDone = 0;
int numDeletes = 0;
int numAdds = 0;
while (true && !stop) {
if (numCycles != -1) {
if (numDone > numCycles) {
break;
}
}
++numDone;
String id = this.id + "-" + i;
++i;
boolean addFailed = false;
if (doDeletes && random().nextBoolean() && deletes.size() > 0) {
String delete = deletes.remove(0);
try {
numDeletes++;
UpdateRequest req = new UpdateRequest();
req.deleteById(delete);
req.setParam("CONTROL", "TRUE");
req.process(controlClient);
cloudClient.deleteById(delete);
} catch (Exception e) {
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
deleteFails.add(id);
}
}
try {
numAdds++;
indexr("id", id, i1, 50, t1,
"to come to the aid of their country.");
} catch (Exception e) {
addFailed = true;
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
addFails.add(id);
}
if (!addFailed && doDeletes && random().nextBoolean()) {
deletes.add(id);
}
try {
Thread.currentThread().sleep(random().nextInt(100));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
+ " deletes:" + numDeletes);
}
@Override
public void safeStop() {
stop = true;
}
public Set<String> getAddFails() {
return addFails;
}
public Set<String> getDeleteFails() {
return deleteFails;
}
public int getFailCount() {
return addFails.size() + deleteFails.size();
}
};
class StopableSearchThread extends StopableThread {
private volatile boolean stop = false;
protected final AtomicInteger queryFails = new AtomicInteger();

View File

@ -0,0 +1,185 @@
package org.apache.solr.cloud;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class StopableIndexingThread extends AbstractFullDistribZkTestBase.StopableThread {
private static String t1 = "a_t";
private static String i1 = "a_si";
private volatile boolean stop = false;
protected final String id;
protected final List<String> deletes = new ArrayList<String>();
protected Set<String> addFails = new HashSet<String>();
protected Set<String> deleteFails = new HashSet<String>();
protected boolean doDeletes;
private int numCycles;
private SolrServer controlClient;
private SolrServer cloudClient;
private int numDeletes;
private int numAdds;
public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes) {
this(controlClient, cloudClient, id, doDeletes, -1);
}
public StopableIndexingThread(SolrServer controlClient, SolrServer cloudClient, String id, boolean doDeletes, int numCycles) {
super("StopableIndexingThread");
this.controlClient = controlClient;
this.cloudClient = cloudClient;
this.id = id;
this.doDeletes = doDeletes;
this.numCycles = numCycles;
setDaemon(true);
}
@Override
public void run() {
int i = 0;
int numDone = 0;
numDeletes = 0;
numAdds = 0;
while (true && !stop) {
if (numCycles != -1) {
if (numDone > numCycles) {
break;
}
}
++numDone;
String id = this.id + "-" + i;
++i;
boolean addFailed = false;
if (doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean() && deletes.size() > 0) {
String delete = deletes.remove(0);
try {
numDeletes++;
if (controlClient != null) {
UpdateRequest req = new UpdateRequest();
req.deleteById(delete);
req.setParam("CONTROL", "TRUE");
req.process(controlClient);
}
cloudClient.deleteById(delete);
} catch (Exception e) {
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
deleteFails.add(id);
}
}
try {
numAdds++;
indexr("id", id, i1, 50, t1,
"to come to the aid of their country.");
} catch (Exception e) {
addFailed = true;
System.err.println("REQUEST FAILED:");
e.printStackTrace();
if (e instanceof SolrServerException) {
System.err.println("ROOT CAUSE:");
((SolrServerException) e).getRootCause().printStackTrace();
}
addFails.add(id);
}
if (!addFailed && doDeletes && AbstractFullDistribZkTestBase.random().nextBoolean()) {
deletes.add(id);
}
try {
Thread.currentThread().sleep(AbstractFullDistribZkTestBase.random().nextInt(100));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
System.err.println("added docs:" + numAdds + " with " + (addFails.size() + deleteFails.size()) + " fails"
+ " deletes:" + numDeletes);
}
@Override
public void safeStop() {
stop = true;
}
public Set<String> getAddFails() {
return addFails;
}
public Set<String> getDeleteFails() {
return deleteFails;
}
public int getFailCount() {
return addFails.size() + deleteFails.size();
}
protected void addFields(SolrInputDocument doc, Object... fields) {
for (int i = 0; i < fields.length; i += 2) {
doc.addField((String) (fields[i]), fields[i + 1]);
}
}
protected void indexr(Object... fields) throws Exception {
SolrInputDocument doc = new SolrInputDocument();
addFields(doc, fields);
addFields(doc, "rnd_b", true);
indexDoc(doc);
}
protected void indexDoc(SolrInputDocument doc) throws IOException,
SolrServerException {
if (controlClient != null) {
UpdateRequest req = new UpdateRequest();
req.add(doc);
req.setParam("CONTROL", "TRUE");
req.process(controlClient);
}
UpdateRequest ureq = new UpdateRequest();
ureq.add(doc);
ureq.process(cloudClient);
}
public int getNumDeletes() {
return numDeletes;
}
public int getNumAdds() {
return numAdds;
}
}