merge lucene_4

This commit is contained in:
Shay Banon 2012-11-12 13:50:14 +01:00
commit 6ede42bc37
504 changed files with 6631 additions and 8496 deletions

17
pom.xml
View File

@ -30,7 +30,7 @@
</parent>
<properties>
<lucene.version>3.6.1</lucene.version>
<lucene.version>4.0.0</lucene.version>
</properties>
<repositories>
@ -51,7 +51,13 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers</artifactId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-codecs</artifactId>
<version>${lucene.version}</version>
<scope>compile</scope>
</dependency>
@ -79,6 +85,13 @@
<version>${lucene.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
<scope>compile</scope>
</dependency>
<!-- START: dependencies that are shaded -->
<dependency>

View File

@ -0,0 +1,85 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis;
import java.io.Reader;
/**
* Extension to {@link Analyzer} suitable for Analyzers which wrap
* other Analyzers.
* <p/>
* {@link #getWrappedAnalyzer(String)} allows the Analyzer
* to wrap multiple Analyzers which are selected on a per field basis.
* <p/>
* {@link #wrapComponents(String, Analyzer.TokenStreamComponents)} allows the
* TokenStreamComponents of the wrapped Analyzer to then be wrapped
* (such as adding a new {@link TokenFilter} to form new TokenStreamComponents.
*/
public abstract class CustomAnalyzerWrapper extends Analyzer {
/**
* Creates a new CustomAnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
* the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed
*/
protected CustomAnalyzerWrapper() {
super(new PerFieldReuseStrategy());
}
/**
* Retrieves the wrapped Analyzer appropriate for analyzing the field with
* the given name
*
* @param fieldName Name of the field which is to be analyzed
* @return Analyzer for the field with the given name. Assumed to be non-null
*/
protected abstract Analyzer getWrappedAnalyzer(String fieldName);
/**
* Wraps / alters the given TokenStreamComponents, taken from the wrapped
* Analyzer, to form new components. It is through this method that new
* TokenFilters can be added by AnalyzerWrappers.
*
*
* @param fieldName Name of the field which is to be analyzed
* @param components TokenStreamComponents taken from the wrapped Analyzer
* @return Wrapped / altered TokenStreamComponents.
*/
protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
@Override
protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
}
@Override
public int getPositionIncrementGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
@Override
public int getOffsetGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getOffsetGap(fieldName);
}
@Override
public final Reader initReader(String fieldName, Reader reader) {
return getWrappedAnalyzer(fieldName).initReader(fieldName, reader);
}
}

View File

@ -1,63 +0,0 @@
package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import java.io.IOException;
/**
*/
// LUCENE MONITOR: Next version of Lucene (4.0) will have this as part of the analyzers module
public final class TrimFilter extends TokenFilter {
final boolean updateOffsets;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public TrimFilter(TokenStream in, boolean updateOffsets) {
super(in);
this.updateOffsets = updateOffsets;
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
char[] termBuffer = termAtt.buffer();
int len = termAtt.length();
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
//also return true
if (len == 0) {
return true;
}
int start = 0;
int end = 0;
int endOff = 0;
// eat the first characters
//QUESTION: Should we use Character.isWhitespace() instead?
for (start = 0; start < len && termBuffer[start] <= ' '; start++) {
}
// eat the end characters
for (end = len; end >= start && termBuffer[end - 1] <= ' '; end--) {
endOff++;
}
if (start > 0 || end < len) {
if (start < end) {
termAtt.copyBuffer(termBuffer, start, (end - start));
} else {
termAtt.setEmpty();
}
if (updateOffsets) {
int newStart = offsetAtt.startOffset() + start;
int newEnd = offsetAtt.endOffset() - (start < end ? endOff : 0);
offsetAtt.setOffset(newStart, newEnd);
}
}
return true;
}
}

View File

@ -19,11 +19,11 @@
package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import java.io.IOException;

View File

@ -1,574 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
/**
* Splits words into subwords and performs optional transformations on subword groups.
* Words are split into subwords with the following rules:
* - split on intra-word delimiters (by default, all non alpha-numeric characters).
* - "Wi-Fi" -> "Wi", "Fi"
* - split on case transitions
* - "PowerShot" -> "Power", "Shot"
* - split on letter-number transitions
* - "SD500" -> "SD", "500"
* - leading and trailing intra-word delimiters on each subword are ignored
* - "//hello---there, 'dude'" -> "hello", "there", "dude"
* - trailing "'s" are removed for each subword
* - "O'Neil's" -> "O", "Neil"
* - Note: this step isn't performed in a separate filter because of possible subword combinations.
* <p/>
* The <b>combinations</b> parameter affects how subwords are combined:
* - combinations="0" causes no subword combinations.
* - "PowerShot" -> 0:"Power", 1:"Shot" (0 and 1 are the token positions)
* - combinations="1" means that in addition to the subwords, maximum runs of non-numeric subwords are catenated and produced at the same position of the last subword in the run.
* - "PowerShot" -> 0:"Power", 1:"Shot" 1:"PowerShot"
* - "A's+B's&C's" -> 0:"A", 1:"B", 2:"C", 2:"ABC"
* - "Super-Duper-XL500-42-AutoCoder!" -> 0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"
* <p/>
* One use for WordDelimiterFilter is to help match words with different subword delimiters.
* For example, if the source text contained "wi-fi" one may want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match.
* One way of doing so is to specify combinations="1" in the analyzer used for indexing, and combinations="0" (the default)
* in the analyzer used for querying. Given that the current StandardTokenizer immediately removes many intra-word
* delimiters, it is recommended that this filter be used after a tokenizer that does not do this (such as WhitespaceTokenizer).
*/
// LUCENE MONITOR: Part of Lucene 4.0, once we upgrade, remove it
public final class WordDelimiterFilter extends TokenFilter {
public static final int LOWER = 0x01;
public static final int UPPER = 0x02;
public static final int DIGIT = 0x04;
public static final int SUBWORD_DELIM = 0x08;
// combinations: for testing, not for setting bits
public static final int ALPHA = 0x03;
public static final int ALPHANUM = 0x07;
/**
* If true, causes parts of words to be generated:
* <p/>
* "PowerShot" => "Power" "Shot"
*/
final boolean generateWordParts;
/**
* If true, causes number subwords to be generated:
* <p/>
* "500-42" => "500" "42"
*/
final boolean generateNumberParts;
/**
* If true, causes maximum runs of word parts to be catenated:
* <p/>
* "wi-fi" => "wifi"
*/
final boolean catenateWords;
/**
* If true, causes maximum runs of number parts to be catenated:
* <p/>
* "500-42" => "50042"
*/
final boolean catenateNumbers;
/**
* If true, causes all subword parts to be catenated:
* <p/>
* "wi-fi-4000" => "wifi4000"
*/
final boolean catenateAll;
/**
* If true, original words are preserved and added to the subword list (Defaults to false)
* <p/>
* "500-42" => "500" "42" "500-42"
*/
final boolean preserveOriginal;
/**
* If not null is the set of tokens to protect from being delimited
*/
final CharArraySet protWords;
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
// used for iterating word delimiter breaks
private final WordDelimiterIterator iterator;
// used for concatenating runs of similar typed subwords (word,number)
private final WordDelimiterConcatenation concat = new WordDelimiterConcatenation();
// number of subwords last output by concat.
private int lastConcatCount = 0;
// used for catenate all
private final WordDelimiterConcatenation concatAll = new WordDelimiterConcatenation();
// used for accumulating position increment gaps
private int accumPosInc = 0;
private char savedBuffer[] = new char[1024];
private int savedStartOffset;
private int savedEndOffset;
private String savedType;
private boolean hasSavedState = false;
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
private boolean hasIllegalOffsets = false;
// for a run of the same subword type within a word, have we output anything?
private boolean hasOutputToken = false;
// when preserve original is on, have we output any token following it?
// this token must have posInc=0!
private boolean hasOutputFollowingOriginal = false;
/**
* @param in Token stream to be filtered.
* @param charTypeTable
* @param generateWordParts If 1, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
* @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42"
* @param catenateWords 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
* @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042"
* @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
* @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
* @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42"
* @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se"
* @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
* @param protWords If not null is the set of tokens to protect from being delimited
*/
public WordDelimiterFilter(TokenStream in,
byte[] charTypeTable,
int generateWordParts,
int generateNumberParts,
int catenateWords,
int catenateNumbers,
int catenateAll,
int splitOnCaseChange,
int preserveOriginal,
int splitOnNumerics,
int stemEnglishPossessive,
CharArraySet protWords) {
super(in);
this.generateWordParts = generateWordParts != 0;
this.generateNumberParts = generateNumberParts != 0;
this.catenateWords = catenateWords != 0;
this.catenateNumbers = catenateNumbers != 0;
this.catenateAll = catenateAll != 0;
this.preserveOriginal = preserveOriginal != 0;
this.protWords = protWords;
this.iterator = new WordDelimiterIterator(charTypeTable, splitOnCaseChange != 0, splitOnNumerics != 0, stemEnglishPossessive != 0);
}
/**
* @param in Token stream to be filtered.
* @param generateWordParts If 1, causes parts of words to be generated: "PowerShot", "Power-Shot" => "Power" "Shot"
* @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42"
* @param catenateWords 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
* @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042"
* @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
* @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
* @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42"
* @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se"
* @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
* @param protWords If not null is the set of tokens to protect from being delimited
*/
public WordDelimiterFilter(TokenStream in,
int generateWordParts,
int generateNumberParts,
int catenateWords,
int catenateNumbers,
int catenateAll,
int splitOnCaseChange,
int preserveOriginal,
int splitOnNumerics,
int stemEnglishPossessive,
CharArraySet protWords) {
this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, splitOnNumerics, stemEnglishPossessive, protWords);
}
public boolean incrementToken() throws IOException {
while (true) {
if (!hasSavedState) {
// process a new input word
if (!input.incrementToken()) {
return false;
}
int termLength = termAttribute.length();
char[] termBuffer = termAttribute.buffer();
accumPosInc += posIncAttribute.getPositionIncrement();
iterator.setText(termBuffer, termLength);
iterator.next();
// word of no delimiters, or protected word: just return it
if ((iterator.current == 0 && iterator.end == termLength) ||
(protWords != null && protWords.contains(termBuffer, 0, termLength))) {
posIncAttribute.setPositionIncrement(accumPosInc);
accumPosInc = 0;
return true;
}
// word of simply delimiters
if (iterator.end == WordDelimiterIterator.DONE && !preserveOriginal) {
// if the posInc is 1, simply ignore it in the accumulation
if (posIncAttribute.getPositionIncrement() == 1) {
accumPosInc--;
}
continue;
}
saveState();
hasOutputToken = false;
hasOutputFollowingOriginal = !preserveOriginal;
lastConcatCount = 0;
if (preserveOriginal) {
posIncAttribute.setPositionIncrement(accumPosInc);
accumPosInc = 0;
return true;
}
}
// at the end of the string, output any concatenations
if (iterator.end == WordDelimiterIterator.DONE) {
if (!concat.isEmpty()) {
if (flushConcatenation(concat)) {
return true;
}
}
if (!concatAll.isEmpty()) {
// only if we haven't output this same combo above!
if (concatAll.subwordCount > lastConcatCount) {
concatAll.writeAndClear();
return true;
}
concatAll.clear();
}
// no saved concatenations, on to the next input word
hasSavedState = false;
continue;
}
// word surrounded by delimiters: always output
if (iterator.isSingleWord()) {
generatePart(true);
iterator.next();
return true;
}
int wordType = iterator.type();
// do we already have queued up incompatible concatenations?
if (!concat.isEmpty() && (concat.type & wordType) == 0) {
if (flushConcatenation(concat)) {
hasOutputToken = false;
return true;
}
hasOutputToken = false;
}
// add subwords depending upon options
if (shouldConcatenate(wordType)) {
if (concat.isEmpty()) {
concat.type = wordType;
}
concatenate(concat);
}
// add all subwords (catenateAll)
if (catenateAll) {
concatenate(concatAll);
}
// if we should output the word or number part
if (shouldGenerateParts(wordType)) {
generatePart(false);
iterator.next();
return true;
}
iterator.next();
}
}
/**
* {@inheritDoc}
*/
@Override
public void reset() throws IOException {
super.reset();
hasSavedState = false;
concat.clear();
concatAll.clear();
accumPosInc = 0;
}
// ================================================= Helper Methods ================================================
/**
* Saves the existing attribute states
*/
private void saveState() {
// otherwise, we have delimiters, save state
savedStartOffset = offsetAttribute.startOffset();
savedEndOffset = offsetAttribute.endOffset();
// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
savedType = typeAttribute.type();
if (savedBuffer.length < termAttribute.length()) {
savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
}
System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
iterator.text = savedBuffer;
hasSavedState = true;
}
/**
* Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
*
* @param concatenation WordDelimiterConcatenation that will be flushed
* @return {@code true} if the concatenation was written before it was cleared, {@code} false otherwise
*/
private boolean flushConcatenation(WordDelimiterConcatenation concatenation) {
lastConcatCount = concatenation.subwordCount;
if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type)) {
concatenation.writeAndClear();
return true;
}
concatenation.clear();
return false;
}
/**
* Determines whether to concatenate a word or number if the current word is the given type
*
* @param wordType Type of the current word used to determine if it should be concatenated
* @return {@code true} if concatenation should occur, {@code false} otherwise
*/
private boolean shouldConcatenate(int wordType) {
return (catenateWords && isAlpha(wordType)) || (catenateNumbers && isDigit(wordType));
}
/**
* Determines whether a word/number part should be generated for a word of the given type
*
* @param wordType Type of the word used to determine if a word/number part should be generated
* @return {@code true} if a word/number part should be generated, {@code false} otherwise
*/
private boolean shouldGenerateParts(int wordType) {
return (generateWordParts && isAlpha(wordType)) || (generateNumberParts && isDigit(wordType));
}
/**
* Concatenates the saved buffer to the given WordDelimiterConcatenation
*
* @param concatenation WordDelimiterConcatenation to concatenate the buffer to
*/
private void concatenate(WordDelimiterConcatenation concatenation) {
if (concatenation.isEmpty()) {
concatenation.startOffset = savedStartOffset + iterator.current;
}
concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
concatenation.endOffset = savedStartOffset + iterator.end;
}
/**
* Generates a word/number part, updating the appropriate attributes
*
* @param isSingleWord {@code true} if the generation is occurring from a single word, {@code false} otherwise
*/
private void generatePart(boolean isSingleWord) {
clearAttributes();
termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset;
int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end;
offsetAttribute.setOffset(startOffSet, endOffSet);
posIncAttribute.setPositionIncrement(position(false));
typeAttribute.setType(savedType);
}
/**
* Get the position increment gap for a subword or concatenation
*
* @param inject true if this token wants to be injected
* @return position increment gap
*/
private int position(boolean inject) {
int posInc = accumPosInc;
if (hasOutputToken) {
accumPosInc = 0;
return inject ? 0 : Math.max(1, posInc);
}
hasOutputToken = true;
if (!hasOutputFollowingOriginal) {
// the first token following the original is 0 regardless
hasOutputFollowingOriginal = true;
return 0;
}
// clear the accumulated position increment
accumPosInc = 0;
return Math.max(1, posInc);
}
/**
* Checks if the given word type includes {@link #ALPHA}
*
* @param type Word type to check
* @return {@code true} if the type contains ALPHA, {@code false} otherwise
*/
static boolean isAlpha(int type) {
return (type & ALPHA) != 0;
}
/**
* Checks if the given word type includes {@link #DIGIT}
*
* @param type Word type to check
* @return {@code true} if the type contains DIGIT, {@code false} otherwise
*/
static boolean isDigit(int type) {
return (type & DIGIT) != 0;
}
/**
* Checks if the given word type includes {@link #SUBWORD_DELIM}
*
* @param type Word type to check
* @return {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise
*/
static boolean isSubwordDelim(int type) {
return (type & SUBWORD_DELIM) != 0;
}
/**
* Checks if the given word type includes {@link #UPPER}
*
* @param type Word type to check
* @return {@code true} if the type contains UPPER, {@code false} otherwise
*/
static boolean isUpper(int type) {
return (type & UPPER) != 0;
}
// ================================================= Inner Classes =================================================
/**
* A WDF concatenated 'run'
*/
final class WordDelimiterConcatenation {
final StringBuilder buffer = new StringBuilder();
int startOffset;
int endOffset;
int type;
int subwordCount;
/**
* Appends the given text of the given length, to the concetenation at the given offset
*
* @param text Text to append
* @param offset Offset in the concetenation to add the text
* @param length Length of the text to append
*/
void append(char text[], int offset, int length) {
buffer.append(text, offset, length);
subwordCount++;
}
/**
* Writes the concatenation to the attributes
*/
void write() {
clearAttributes();
if (termAttribute.length() < buffer.length()) {
termAttribute.resizeBuffer(buffer.length());
}
char termbuffer[] = termAttribute.buffer();
buffer.getChars(0, buffer.length(), termbuffer, 0);
termAttribute.setLength(buffer.length());
if (hasIllegalOffsets) {
offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
} else {
offsetAttribute.setOffset(startOffset, endOffset);
}
posIncAttribute.setPositionIncrement(position(true));
typeAttribute.setType(savedType);
accumPosInc = 0;
}
/**
* Determines if the concatenation is empty
*
* @return {@code true} if the concatenation is empty, {@code false} otherwise
*/
boolean isEmpty() {
return buffer.length() == 0;
}
/**
* Clears the concatenation and resets its state
*/
void clear() {
buffer.setLength(0);
startOffset = endOffset = type = subwordCount = 0;
}
/**
* Convenience method for the common scenario of having to write the concetenation and then clearing its state
*/
void writeAndClear() {
write();
clear();
}
}
// questions:
// negative numbers? -42 indexed as just 42?
// dollar sign? $42
// percent sign? 33%
// downsides: if source text is "powershot" then a query of "PowerShot" won't match!
}

View File

@ -1,341 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
/**
* A BreakIterator-like API for iterating over subwords in text, according to WordDelimiterFilter rules.
*
* @lucene.internal
*/
public final class WordDelimiterIterator {
/**
* Indicates the end of iteration
*/
public static final int DONE = -1;
public static final byte[] DEFAULT_WORD_DELIM_TABLE;
char text[];
int length;
/**
* start position of text, excluding leading delimiters
*/
int startBounds;
/**
* end position of text, excluding trailing delimiters
*/
int endBounds;
/**
* Beginning of subword
*/
int current;
/**
* End of subword
*/
int end;
/* does this string end with a possessive such as 's */
private boolean hasFinalPossessive = false;
/**
* If false, causes case changes to be ignored (subwords will only be generated
* given SUBWORD_DELIM tokens). (Defaults to true)
*/
final boolean splitOnCaseChange;
/**
* If false, causes numeric changes to be ignored (subwords will only be generated
* given SUBWORD_DELIM tokens). (Defaults to true)
*/
final boolean splitOnNumerics;
/**
* If true, causes trailing "'s" to be removed for each subword. (Defaults to true)
* <p/>
* "O'Neil's" => "O", "Neil"
*/
final boolean stemEnglishPossessive;
private final byte[] charTypeTable;
/**
* if true, need to skip over a possessive found in the last call to next()
*/
private boolean skipPossessive = false;
// TODO: should there be a WORD_DELIM category for chars that only separate words (no catenation of subwords will be
// done if separated by these chars?) "," would be an obvious candidate...
static {
byte[] tab = new byte[256];
for (int i = 0; i < 256; i++) {
byte code = 0;
if (Character.isLowerCase(i)) {
code |= LOWER;
} else if (Character.isUpperCase(i)) {
code |= UPPER;
} else if (Character.isDigit(i)) {
code |= DIGIT;
}
if (code == 0) {
code = SUBWORD_DELIM;
}
tab[i] = code;
}
DEFAULT_WORD_DELIM_TABLE = tab;
}
/**
* Create a new WordDelimiterIterator operating with the supplied rules.
*
* @param charTypeTable table containing character types
* @param splitOnCaseChange if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
* @param splitOnNumerics if true, causes "j2se" to be three tokens; "j" "2" "se"
* @param stemEnglishPossessive if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
*/
WordDelimiterIterator(byte[] charTypeTable, boolean splitOnCaseChange, boolean splitOnNumerics, boolean stemEnglishPossessive) {
this.charTypeTable = charTypeTable;
this.splitOnCaseChange = splitOnCaseChange;
this.splitOnNumerics = splitOnNumerics;
this.stemEnglishPossessive = stemEnglishPossessive;
}
/**
* Advance to the next subword in the string.
*
* @return index of the next subword, or {@link #DONE} if all subwords have been returned
*/
int next() {
current = end;
if (current == DONE) {
return DONE;
}
if (skipPossessive) {
current += 2;
skipPossessive = false;
}
int lastType = 0;
while (current < endBounds && (isSubwordDelim(lastType = charType(text[current])))) {
current++;
}
if (current >= endBounds) {
return end = DONE;
}
for (end = current + 1; end < endBounds; end++) {
int type = charType(text[end]);
if (isBreak(lastType, type)) {
break;
}
lastType = type;
}
if (end < endBounds - 1 && endsWithPossessive(end + 2)) {
skipPossessive = true;
}
return end;
}
/**
* Return the type of the current subword.
* This currently uses the type of the first character in the subword.
*
* @return type of the current word
*/
int type() {
if (end == DONE) {
return 0;
}
int type = charType(text[current]);
switch (type) {
// return ALPHA word type for both lower and upper
case LOWER:
case UPPER:
return ALPHA;
default:
return type;
}
}
/**
* Reset the text to a new value, and reset all state
*
* @param text New text
* @param length length of the text
*/
void setText(char text[], int length) {
this.text = text;
this.length = this.endBounds = length;
current = startBounds = end = 0;
skipPossessive = hasFinalPossessive = false;
setBounds();
}
// ================================================= Helper Methods ================================================
/**
* Determines whether the transition from lastType to type indicates a break
*
* @param lastType Last subword type
* @param type Current subword type
* @return {@code true} if the transition indicates a break, {@code false} otherwise
*/
private boolean isBreak(int lastType, int type) {
if ((type & lastType) != 0) {
return false;
}
if (!splitOnCaseChange && isAlpha(lastType) && isAlpha(type)) {
// ALPHA->ALPHA: always ignore if case isn't considered.
return false;
} else if (isUpper(lastType) && isAlpha(type)) {
// UPPER->letter: Don't split
return false;
} else if (!splitOnNumerics && ((isAlpha(lastType) && isDigit(type)) || (isDigit(lastType) && isAlpha(type)))) {
// ALPHA->NUMERIC, NUMERIC->ALPHA :Don't split
return false;
}
return true;
}
/**
* Determines if the current word contains only one subword. Note, it could be potentially surrounded by delimiters
*
* @return {@code true} if the current word contains only one subword, {@code false} otherwise
*/
boolean isSingleWord() {
if (hasFinalPossessive) {
return current == startBounds && end == endBounds - 2;
} else {
return current == startBounds && end == endBounds;
}
}
/**
* Set the internal word bounds (remove leading and trailing delimiters). Note, if a possessive is found, don't remove
* it yet, simply note it.
*/
private void setBounds() {
while (startBounds < length && (isSubwordDelim(charType(text[startBounds])))) {
startBounds++;
}
while (endBounds > startBounds && (isSubwordDelim(charType(text[endBounds - 1])))) {
endBounds--;
}
if (endsWithPossessive(endBounds)) {
hasFinalPossessive = true;
}
current = startBounds;
}
/**
* Determines if the text at the given position indicates an English possessive which should be removed
*
* @param pos Position in the text to check if it indicates an English possessive
* @return {@code true} if the text at the position indicates an English posessive, {@code false} otherwise
*/
private boolean endsWithPossessive(int pos) {
return (stemEnglishPossessive &&
pos > 2 &&
text[pos - 2] == '\'' &&
(text[pos - 1] == 's' || text[pos - 1] == 'S') &&
isAlpha(charType(text[pos - 3])) &&
(pos == endBounds || isSubwordDelim(charType(text[pos]))));
}
/**
* Determines the type of the given character
*
* @param ch Character whose type is to be determined
* @return Type of the character
*/
private int charType(int ch) {
if (ch < charTypeTable.length) {
return charTypeTable[ch];
}
return getType(ch);
}
/**
* Computes the type of the given character
*
* @param ch Character whose type is to be determined
* @return Type of the character
*/
public static byte getType(int ch) {
switch (Character.getType(ch)) {
case Character.UPPERCASE_LETTER:
return UPPER;
case Character.LOWERCASE_LETTER:
return LOWER;
case Character.TITLECASE_LETTER:
case Character.MODIFIER_LETTER:
case Character.OTHER_LETTER:
case Character.NON_SPACING_MARK:
case Character.ENCLOSING_MARK: // depends what it encloses?
case Character.COMBINING_SPACING_MARK:
return ALPHA;
case Character.DECIMAL_DIGIT_NUMBER:
case Character.LETTER_NUMBER:
case Character.OTHER_NUMBER:
return DIGIT;
// case Character.SPACE_SEPARATOR:
// case Character.LINE_SEPARATOR:
// case Character.PARAGRAPH_SEPARATOR:
// case Character.CONTROL:
// case Character.FORMAT:
// case Character.PRIVATE_USE:
case Character.SURROGATE: // prevent splitting
return ALPHA | DIGIT;
// case Character.DASH_PUNCTUATION:
// case Character.START_PUNCTUATION:
// case Character.END_PUNCTUATION:
// case Character.CONNECTOR_PUNCTUATION:
// case Character.OTHER_PUNCTUATION:
// case Character.MATH_SYMBOL:
// case Character.CURRENCY_SYMBOL:
// case Character.MODIFIER_SYMBOL:
// case Character.OTHER_SYMBOL:
// case Character.INITIAL_QUOTE_PUNCTUATION:
// case Character.FINAL_QUOTE_PUNCTUATION:
default:
return SUBWORD_DELIM;
}
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis.pattern;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A TokenFilter which applies a Pattern to each token in the stream,
* replacing match occurances with the specified replacement string.
* <p/>
* <p>
* <b>Note:</b> Depending on the input and the pattern used and the input
* TokenStream, this TokenFilter may produce Tokens whose text is the empty
* string.
* </p>
*
* @see Pattern
*/
public final class PatternReplaceFilter extends TokenFilter {
private final Pattern p;
private final String replacement;
private final boolean all;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final Matcher m;
/**
* Constructs an instance to replace either the first, or all occurances
*
* @param in the TokenStream to process
* @param p the patterm to apply to each Token
* @param replacement the "replacement string" to substitute, if null a
* blank string will be used. Note that this is not the literal
* string that will be used, '$' and '\' have special meaning.
* @param all if true, all matches will be replaced otherwise just the first match.
* @see Matcher#quoteReplacement
*/
public PatternReplaceFilter(TokenStream in,
Pattern p,
String replacement,
boolean all) {
super(in);
this.p = p;
this.replacement = (null == replacement) ? "" : replacement;
this.all = all;
this.m = p.matcher(termAtt);
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
m.reset();
if (m.find()) {
// replaceAll/replaceFirst will reset() this previous find.
String transformed = all ? m.replaceAll(replacement) : m.replaceFirst(replacement);
termAtt.setEmpty().append(transformed);
}
return true;
}
}

View File

@ -1,153 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis.pattern;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import java.io.IOException;
import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This tokenizer uses regex pattern matching to construct distinct tokens
* for the input stream. It takes two arguments: "pattern" and "group".
* <p/>
* <ul>
* <li>"pattern" is the regular expression.</li>
* <li>"group" says which group to extract into tokens.</li>
* </ul>
* <p>
* group=-1 (the default) is equivalent to "split". In this case, the tokens will
* be equivalent to the output from (without empty tokens):
* {@link String#split(java.lang.String)}
* </p>
* <p>
* Using group >= 0 selects the matching group as the token. For example, if you have:<br/>
* <pre>
* pattern = \'([^\']+)\'
* group = 0
* input = aaa 'bbb' 'ccc'
* </pre>
* the output will be two tokens: 'bbb' and 'ccc' (including the ' marks). With the same input
* but using group=1, the output would be: bbb and ccc (no ' marks)
* </p>
* <p>NOTE: This Tokenizer does not output tokens that are of zero length.</p>
*
* @see Pattern
*/
public final class PatternTokenizer extends Tokenizer {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final StringBuilder str = new StringBuilder();
private int index;
private final Pattern pattern;
private final int group;
private final Matcher matcher;
/**
* creates a new PatternTokenizer returning tokens from group (-1 for split functionality)
*/
public PatternTokenizer(Reader input, Pattern pattern, int group) throws IOException {
super(input);
this.pattern = pattern;
this.group = group;
fillBuffer(str, input);
matcher = pattern.matcher(str);
index = 0;
}
@Override
public boolean incrementToken() throws IOException {
if (index >= str.length()) return false;
clearAttributes();
if (group >= 0) {
// match a specific group
while (matcher.find()) {
index = matcher.start(group);
final int endIndex = matcher.end(group);
if (index == endIndex) continue;
termAtt.setEmpty().append(str, index, endIndex);
offsetAtt.setOffset(correctOffset(index), correctOffset(endIndex));
return true;
}
index = Integer.MAX_VALUE; // mark exhausted
return false;
} else {
// String.split() functionality
while (matcher.find()) {
if (matcher.start() - index > 0) {
// found a non-zero-length token
termAtt.setEmpty().append(str, index, matcher.start());
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
index = matcher.end();
return true;
}
index = matcher.end();
}
if (str.length() - index == 0) {
index = Integer.MAX_VALUE; // mark exhausted
return false;
}
termAtt.setEmpty().append(str, index, str.length());
offsetAtt.setOffset(correctOffset(index), correctOffset(str.length()));
index = Integer.MAX_VALUE; // mark exhausted
return true;
}
}
@Override
public void end() throws IOException {
final int ofs = correctOffset(str.length());
offsetAtt.setOffset(ofs, ofs);
}
@Override
public void reset(Reader input) throws IOException {
super.reset(input);
fillBuffer(str, input);
matcher.reset(str);
index = 0;
}
// TODO: we should see if we can make this tokenizer work without reading
// the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ?
final char[] buffer = new char[8192];
private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
int len;
sb.setLength(0);
while ((len = input.read(buffer)) > 0) {
sb.append(buffer, 0, len);
}
}
}

View File

@ -1,451 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.bloom.BloomFilter;
import org.elasticsearch.index.cache.bloom.BloomCache;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
/* Tracks the stream of {@link BuffereDeletes}.
* When DocumensWriter flushes, its buffered
* deletes are appended to this stream. We later
* apply these deletes (resolve them to the actual
* docIDs, per segment) when a merge is started
* (only to the to-be-merged segments). We
* also apply to all segments when NRT reader is pulled,
* commit/close is called, or when too many deletes are
* buffered and must be flushed (by RAM usage or by count).
*
* Each packet is assigned a generation, and each flushed or
* merged segment is also assigned a generation, so we can
* track which BufferedDeletes packets to apply to any given
* segment. */
// LUCENE MONITOR: We copied this class from Lucene, effectively overriding it with our implementation
// if it comes first in the classpath, allowing for faster apply deletes based on terms
class BufferedDeletesStream implements XIndexWriter.XBufferedDeletesStream {
// TODO: maybe linked list?
private final List<FrozenBufferedDeletes> deletes = new ArrayList<FrozenBufferedDeletes>();
// Starts at 1 so that SegmentInfos that have never had
// deletes applied (whose bufferedDelGen defaults to 0)
// will be correct:
private long nextGen = 1;
// used only by assert
private Term lastDeleteTerm;
private PrintStream infoStream;
private final AtomicLong bytesUsed = new AtomicLong();
private final AtomicInteger numTerms = new AtomicInteger();
private final int messageID;
private BloomCache bloomCache;
public BufferedDeletesStream(int messageID) {
this.messageID = messageID;
}
private synchronized void message(String message) {
if (infoStream != null) {
infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
}
}
public synchronized void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
}
public void setBloomCache(BloomCache bloomCache) {
this.bloomCache = bloomCache;
}
// Appends a new packet of buffered deletes to the stream,
// setting its generation:
public synchronized void push(FrozenBufferedDeletes packet) {
assert packet.any();
assert checkDeleteStats();
assert packet.gen < nextGen;
deletes.add(packet);
numTerms.addAndGet(packet.numTermDeletes);
bytesUsed.addAndGet(packet.bytesUsed);
if (infoStream != null) {
message("push deletes " + packet + " delGen=" + packet.gen + " packetCount=" + deletes.size());
}
assert checkDeleteStats();
}
public synchronized void clear() {
deletes.clear();
nextGen = 1;
numTerms.set(0);
bytesUsed.set(0);
}
public boolean any() {
return bytesUsed.get() != 0;
}
public int numTerms() {
return numTerms.get();
}
public long bytesUsed() {
return bytesUsed.get();
}
public static class ApplyDeletesResult {
// True if any actual deletes took place:
public final boolean anyDeletes;
// Current gen, for the merged segment:
public final long gen;
// If non-null, contains segments that are 100% deleted
public final List<SegmentInfo> allDeleted;
ApplyDeletesResult(boolean anyDeletes, long gen, List<SegmentInfo> allDeleted) {
this.anyDeletes = anyDeletes;
this.gen = gen;
this.allDeleted = allDeleted;
}
}
// Sorts SegmentInfos from smallest to biggest bufferedDelGen:
private static final Comparator<SegmentInfo> sortByDelGen = new Comparator<SegmentInfo>() {
// @Override -- not until Java 1.6
public int compare(SegmentInfo si1, SegmentInfo si2) {
final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen();
if (cmp > 0) {
return 1;
} else if (cmp < 0) {
return -1;
} else {
return 0;
}
}
};
/**
* Resolves the buffered deleted Term/Query/docIDs, into
* actual deleted docIDs in the deletedDocs BitVector for
* each SegmentReader.
*/
public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List<SegmentInfo> infos) throws IOException {
final long t0 = System.currentTimeMillis();
if (infos.size() == 0) {
return new ApplyDeletesResult(false, nextGen++, null);
}
assert checkDeleteStats();
if (!any()) {
message("applyDeletes: no deletes; skipping");
return new ApplyDeletesResult(false, nextGen++, null);
}
if (infoStream != null) {
message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
}
List<SegmentInfo> infos2 = new ArrayList<SegmentInfo>();
infos2.addAll(infos);
Collections.sort(infos2, sortByDelGen);
CoalescedDeletes coalescedDeletes = null;
boolean anyNewDeletes = false;
int infosIDX = infos2.size() - 1;
int delIDX = deletes.size() - 1;
List<SegmentInfo> allDeleted = null;
while (infosIDX >= 0) {
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
final FrozenBufferedDeletes packet = delIDX >= 0 ? deletes.get(delIDX) : null;
final SegmentInfo info = infos2.get(infosIDX);
final long segGen = info.getBufferedDeletesGen();
if (packet != null && segGen < packet.gen) {
//System.out.println(" coalesce");
if (coalescedDeletes == null) {
coalescedDeletes = new CoalescedDeletes();
}
coalescedDeletes.update(packet);
delIDX--;
} else if (packet != null && segGen == packet.gen) {
//System.out.println(" eq");
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
SegmentReader reader = readerPool.get(info, false);
int delCount = 0;
final boolean segAllDeletes;
try {
if (coalescedDeletes != null) {
//System.out.println(" del coalesced");
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader);
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader);
}
//System.out.println(" del exact");
// Don't delete by Term here; DocumentsWriter
// already did that on flush:
delCount += applyQueryDeletes(packet.queriesIterable(), reader);
segAllDeletes = reader.numDocs() == 0;
} finally {
readerPool.release(reader);
}
anyNewDeletes |= delCount > 0;
if (segAllDeletes) {
if (allDeleted == null) {
allDeleted = new ArrayList<SegmentInfo>();
}
allDeleted.add(info);
}
if (infoStream != null) {
message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
}
if (coalescedDeletes == null) {
coalescedDeletes = new CoalescedDeletes();
}
coalescedDeletes.update(packet);
delIDX--;
infosIDX--;
info.setBufferedDeletesGen(nextGen);
} else {
//System.out.println(" gt");
if (coalescedDeletes != null) {
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
SegmentReader reader = readerPool.get(info, false);
int delCount = 0;
final boolean segAllDeletes;
try {
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader);
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader);
segAllDeletes = reader.numDocs() == 0;
} finally {
readerPool.release(reader);
}
anyNewDeletes |= delCount > 0;
if (segAllDeletes) {
if (allDeleted == null) {
allDeleted = new ArrayList<SegmentInfo>();
}
allDeleted.add(info);
}
if (infoStream != null) {
message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
}
}
info.setBufferedDeletesGen(nextGen);
infosIDX--;
}
}
assert checkDeleteStats();
if (infoStream != null) {
message("applyDeletes took " + (System.currentTimeMillis() - t0) + " msec");
}
// assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;
return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted);
}
public synchronized long getNextGen() {
return nextGen++;
}
// Lock order IW -> BD
/* Removes any BufferedDeletes that we no longer need to
* store because all segments in the index have had the
* deletes applied. */
public synchronized void prune(SegmentInfos segmentInfos) {
assert checkDeleteStats();
long minGen = Long.MAX_VALUE;
for (SegmentInfo info : segmentInfos) {
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
}
if (infoStream != null) {
message("prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + deletes.size());
}
final int limit = deletes.size();
for (int delIDX = 0; delIDX < limit; delIDX++) {
if (deletes.get(delIDX).gen >= minGen) {
prune(delIDX);
assert checkDeleteStats();
return;
}
}
// All deletes pruned
prune(limit);
assert !any();
assert checkDeleteStats();
}
private synchronized void prune(int count) {
if (count > 0) {
if (infoStream != null) {
message("pruneDeletes: prune " + count + " packets; " + (deletes.size() - count) + " packets remain");
}
for (int delIDX = 0; delIDX < count; delIDX++) {
final FrozenBufferedDeletes packet = deletes.get(delIDX);
numTerms.addAndGet(-packet.numTermDeletes);
assert numTerms.get() >= 0;
bytesUsed.addAndGet(-packet.bytesUsed);
assert bytesUsed.get() >= 0;
}
deletes.subList(0, count).clear();
}
}
// ES CHANGE: Add bloom filter usage
// Delete by Term
private synchronized long applyTermDeletes(Iterable<Term> termsIter, SegmentReader reader) throws IOException {
long delCount = 0;
assert checkDeleteTerm(null);
BloomFilter filter = bloomCache == null ? BloomFilter.NONE : bloomCache.filter(reader, UidFieldMapper.NAME, true);
UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
TermDocs docs = reader.termDocs();
for (Term term : termsIter) {
if (term.field() == UidFieldMapper.NAME) {
Unicode.fromStringAsUtf8(term.text(), utf8);
if (!filter.isPresent(utf8.result, 0, utf8.length)) {
continue;
}
}
if (docs == null) {
docs = reader.termDocs();
}
// Since we visit terms sorted, we gain performance
// by re-using the same TermsEnum and seeking only
// forwards
assert checkDeleteTerm(term);
docs.seek(term);
while (docs.next()) {
final int docID = docs.doc();
reader.deleteDocument(docID);
// TODO: we could/should change
// reader.deleteDocument to return boolean
// true if it did in fact delete, because here
// we could be deleting an already-deleted doc
// which makes this an upper bound:
delCount++;
}
}
return delCount;
}
public static class QueryAndLimit {
public final Query query;
public final int limit;
public QueryAndLimit(Query query, int limit) {
this.query = query;
this.limit = limit;
}
}
// Delete by query
private synchronized long applyQueryDeletes(Iterable<QueryAndLimit> queriesIter, SegmentReader reader) throws IOException {
long delCount = 0;
for (QueryAndLimit ent : queriesIter) {
Query query = ent.query;
int limit = ent.limit;
final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(reader);
if (docs != null) {
final DocIdSetIterator it = docs.iterator();
if (it != null) {
while (true) {
int doc = it.nextDoc();
if (doc >= limit)
break;
reader.deleteDocument(doc);
// TODO: we could/should change
// reader.deleteDocument to return boolean
// true if it did in fact delete, because here
// we could be deleting an already-deleted doc
// which makes this an upper bound:
delCount++;
}
}
}
}
return delCount;
}
// used only by assert
private boolean checkDeleteTerm(Term term) {
if (term != null) {
assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0 : "lastTerm=" + lastDeleteTerm + " vs term=" + term;
}
// TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert
lastDeleteTerm = term == null ? null : new Term(term.field(), term.text());
return true;
}
// only for assert
private boolean checkDeleteStats() {
int numTerms2 = 0;
long bytesUsed2 = 0;
for (FrozenBufferedDeletes packet : deletes) {
numTerms2 += packet.numTermDeletes;
bytesUsed2 += packet.bytesUsed;
}
assert numTerms2 == numTerms.get() : "numTerms2=" + numTerms2 + " vs " + numTerms.get();
assert bytesUsed2 == bytesUsed.get() : "bytesUsed2=" + bytesUsed2 + " vs " + bytesUsed;
return true;
}
}

View File

@ -1,49 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.index;
import org.apache.lucene.search.IndexSearcher;
/**
*
*/
public class ExtendedIndexSearcher extends IndexSearcher {
public ExtendedIndexSearcher(ExtendedIndexSearcher searcher) {
super(searcher.getIndexReader(), searcher.subReaders(), searcher.docStarts());
setSimilarity(searcher.getSimilarity());
}
public ExtendedIndexSearcher(IndexReader r) {
super(r);
}
public IndexReader[] subReaders() {
return this.subReaders;
}
public int[] docStarts() {
return this.docStarts;
}
public int readerIndex(int doc) {
return DirectoryReader.readerIndex(doc, docStarts, subReaders.length);
}
}

View File

@ -84,7 +84,7 @@ public class TrackingConcurrentMergeScheduler extends ConcurrentMergeScheduler {
currentMergesNumDocs.inc(totalNumDocs);
currentMergesSizeInBytes.inc(totalSizeInBytes);
if (logger.isTraceEnabled()) {
logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
}
try {
TrackingMergeScheduler.setCurrentMerge(merge);
@ -101,9 +101,9 @@ public class TrackingConcurrentMergeScheduler extends ConcurrentMergeScheduler {
totalMergesSizeInBytes.inc(totalSizeInBytes);
totalMerges.inc(took);
if (took > 20000) { // if more than 20 seconds, DEBUG log it
logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took));
logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
} else if (logger.isTraceEnabled()) {
logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took));
logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
}
}
}

View File

@ -96,7 +96,7 @@ public class TrackingSerialMergeScheduler extends MergeScheduler {
// sadly, segment name is not available since mergeInit is called from merge itself...
if (logger.isTraceEnabled()) {
logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
}
try {
TrackingMergeScheduler.setCurrentMerge(merge);
@ -113,9 +113,9 @@ public class TrackingSerialMergeScheduler extends MergeScheduler {
totalMergesSizeInBytes.inc(totalSizeInBytes);
totalMerges.inc(took);
if (took > 20000) { // if more than 20 seconds, DEBUG log it
logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took));
logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
} else if (logger.isTraceEnabled()) {
logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took));
logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took));
}
}
}

View File

@ -1,29 +0,0 @@
package org.apache.lucene.index;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.index.cache.bloom.BloomCache;
import java.io.IOException;
/**
*/
public class XIndexWriter extends IndexWriter {
private final ESLogger logger;
public XIndexWriter(Directory d, IndexWriterConfig conf, ESLogger logger, BloomCache bloomCache) throws CorruptIndexException, LockObtainFailedException, IOException {
super(d, conf);
this.logger = logger;
if (bufferedDeletesStream instanceof XBufferedDeletesStream) {
logger.debug("using bloom filter enhanced delete handling");
((XBufferedDeletesStream) bufferedDeletesStream).setBloomCache(bloomCache);
}
}
public static interface XBufferedDeletesStream {
void setBloomCache(BloomCache bloomCache);
}
}

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.lucene.queryParser;
package org.apache.lucene.queryparser.classic;
import org.apache.lucene.search.DeletionAwareConstantScoreQuery;
import org.apache.lucene.search.Filter;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.lucene.queryParser;
package org.apache.lucene.queryparser.classic;
import org.apache.lucene.search.Query;
import org.elasticsearch.index.query.QueryParseContext;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.lucene.queryParser;
package org.apache.lucene.queryparser.classic;
import com.google.common.base.Objects;
import com.google.common.collect.ImmutableMap;
@ -287,7 +287,7 @@ public class MapperQueryParser extends QueryParser {
}
@Override
protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException {
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
if ("*".equals(part1)) {
part1 = null;
}
@ -297,13 +297,13 @@ public class MapperQueryParser extends QueryParser {
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
return getRangeQuerySingle(fields.iterator().next(), part1, part2, inclusive);
return getRangeQuerySingle(fields.iterator().next(), part1, part2, startInclusive, endInclusive);
}
if (settings.useDisMax()) {
DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker());
boolean added = false;
for (String mField : fields) {
Query q = getRangeQuerySingle(mField, part1, part2, inclusive);
Query q = getRangeQuerySingle(mField, part1, part2, startInclusive, endInclusive);
if (q != null) {
added = true;
applyBoost(mField, q);
@ -317,7 +317,7 @@ public class MapperQueryParser extends QueryParser {
} else {
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
for (String mField : fields) {
Query q = getRangeQuerySingle(mField, part1, part2, inclusive);
Query q = getRangeQuerySingle(mField, part1, part2, startInclusive, endInclusive);
if (q != null) {
applyBoost(mField, q);
clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
@ -328,18 +328,18 @@ public class MapperQueryParser extends QueryParser {
return getBooleanQuery(clauses, true);
}
} else {
return getRangeQuerySingle(field, part1, part2, inclusive);
return getRangeQuerySingle(field, part1, part2, startInclusive, endInclusive);
}
}
private Query getRangeQuerySingle(String field, String part1, String part2, boolean inclusive) {
private Query getRangeQuerySingle(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) {
currentMapper = null;
MapperService.SmartNameFieldMappers fieldMappers = parseContext.smartFieldMappers(field);
if (fieldMappers != null) {
currentMapper = fieldMappers.fieldMappers().mapper();
if (currentMapper != null) {
try {
Query rangeQuery = currentMapper.rangeQuery(part1, part2, inclusive, inclusive, parseContext);
Query rangeQuery = currentMapper.rangeQuery(part1, part2, startInclusive, startInclusive, parseContext);
return wrapSmartNameQuery(rangeQuery, fieldMappers, parseContext);
} catch (RuntimeException e) {
if (settings.lenient()) {
@ -349,7 +349,7 @@ public class MapperQueryParser extends QueryParser {
}
}
}
return newRangeQuery(field, part1, part2, inclusive);
return newRangeQuery(field, part1, part2, startInclusive, endInclusive);
}
@Override
@ -395,7 +395,8 @@ public class MapperQueryParser extends QueryParser {
currentMapper = fieldMappers.fieldMappers().mapper();
if (currentMapper != null) {
try {
Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, minSimilarity, fuzzyPrefixLength, settings.fuzzyMaxExpansions());
//LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed
Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, minSimilarity, fuzzyPrefixLength, settings.fuzzyMaxExpansions(), false);
return wrapSmartNameQuery(fuzzyQuery, fieldMappers, parseContext);
} catch (RuntimeException e) {
if (settings.lenient()) {
@ -410,7 +411,10 @@ public class MapperQueryParser extends QueryParser {
@Override
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
FuzzyQuery query = new FuzzyQuery(term, minimumSimilarity, prefixLength, settings.fuzzyMaxExpansions());
String text = term.text();
int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, text.codePointCount(0, text.length()));
//LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed
FuzzyQuery query = new FuzzyQuery(term, numEdits, prefixLength, settings.fuzzyMaxExpansions(), false);
QueryParsers.setRewriteMethod(query, settings.fuzzyRewriteMethod());
return query;
}
@ -503,7 +507,7 @@ public class MapperQueryParser extends QueryParser {
// get Analyzer from superclass and tokenize the term
TokenStream source;
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source = getAnalyzer().tokenStream(field, new StringReader(termStr));
} catch (IOException e) {
return super.getPrefixQuery(field, termStr);
}
@ -631,7 +635,7 @@ public class MapperQueryParser extends QueryParser {
if (c == '?' || c == '*') {
if (isWithinToken) {
try {
TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString()));
TokenStream source = getAnalyzer().tokenStream(field, new FastStringReader(tmp.toString()));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();
@ -660,7 +664,7 @@ public class MapperQueryParser extends QueryParser {
}
if (isWithinToken) {
try {
TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString()));
TokenStream source = getAnalyzer().tokenStream(field, new FastStringReader(tmp.toString()));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.lucene.queryParser;
package org.apache.lucene.queryparser.classic;
import org.apache.lucene.search.DeletionAwareConstantScoreQuery;
import org.apache.lucene.search.Filter;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.lucene.queryParser;
package org.apache.lucene.queryparser.classic;
import gnu.trove.map.hash.TObjectFloatHashMap;
import org.apache.lucene.analysis.Analyzer;

View File

@ -28,6 +28,7 @@ import org.elasticsearch.common.lucene.search.NotDeletedFilter;
// So it can basically be cached safely even with a reader that changes deletions but remain with teh same cache key
// See more: https://issues.apache.org/jira/browse/LUCENE-2468
// TODO Lucene 4.0 won't need this, since live docs are "and'ed" while scoring
// LUCENE 4 UPGRADE: we probably don't need this anymore, because of acceptDocs
public class DeletionAwareConstantScoreQuery extends ConstantScoreQuery {
private final Filter actualFilter;

View File

@ -1,116 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
/**
*
*/
// LUCENE MONITOR: Against TermsFilter
public class PublicTermsFilter extends Filter {
Set<Term> terms = new TreeSet<Term>();
/**
* Adds a term to the list of acceptable terms
*
* @param term
*/
public void addTerm(Term term) {
terms.add(term);
}
public Set<Term> getTerms() {
return terms;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if ((obj == null) || (obj.getClass() != this.getClass()))
return false;
PublicTermsFilter test = (PublicTermsFilter) obj;
return (terms == test.terms ||
(terms != null && terms.equals(test.terms)));
}
@Override
public int hashCode() {
int hash = 9;
for (Iterator<Term> iter = terms.iterator(); iter.hasNext(); ) {
Term term = iter.next();
hash = 31 * hash + term.hashCode();
}
return hash;
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
FixedBitSet result = null;
TermDocs td = reader.termDocs();
try {
// batch read, in Lucene 4.0 its no longer needed
int[] docs = new int[Lucene.BATCH_ENUM_DOCS];
int[] freqs = new int[Lucene.BATCH_ENUM_DOCS];
for (Term term : terms) {
td.seek(term);
int number = td.read(docs, freqs);
if (number > 0) {
if (result == null) {
result = new FixedBitSet(reader.maxDoc());
}
while (number > 0) {
for (int i = 0; i < number; i++) {
result.set(docs[i]);
}
number = td.read(docs, freqs);
}
}
}
} finally {
td.close();
}
return result;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
for (Term term : terms) {
if (builder.length() > 0) {
builder.append(' ');
}
builder.append(term);
}
return builder.toString();
}
}

View File

@ -19,13 +19,12 @@
package org.apache.lucene.search;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.search.controller.ShardFieldDoc;
import java.io.IOException;
import java.text.Collator;
import java.util.Locale;
/**
*
@ -48,7 +47,7 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
* @param size The number of hits to retain. Must be greater than zero.
*/
public ShardFieldDocSortedHitQueue(SortField[] fields, int size) {
initialize(size);
super(size);
setFields(fields);
}
@ -83,31 +82,11 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
return fields;
}
/**
* Returns an array of collators, possibly <code>null</code>. The collators
* correspond to any SortFields which were given a specific locale.
*
* @param fields Array of sort fields.
* @return Array, possibly <code>null</code>.
*/
private Collator[] hasCollators(final SortField[] fields) {
if (fields == null) return null;
Collator[] ret = new Collator[fields.length];
for (int i = 0; i < fields.length; ++i) {
Locale locale = fields[i].getLocale();
if (locale != null)
ret[i] = Collator.getInstance(locale);
}
return ret;
}
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>.
*
* @param a ScoreDoc
* @param b ScoreDoc
* @param docA ScoreDoc
* @param docB ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
@SuppressWarnings("unchecked")
@ -116,10 +95,10 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
final int n = fields.length;
int c = 0;
for (int i = 0; i < n && c == 0; ++i) {
final int type = fields[i].getType();
if (type == SortField.STRING) {
final String s1 = (String) docA.fields[i];
final String s2 = (String) docB.fields[i];
final SortField.Type type = fields[i].getType();
if (type == SortField.Type.STRING) {
final BytesRef s1 = (BytesRef) docA.fields[i];
final BytesRef s2 = (BytesRef) docB.fields[i];
// null values need to be sorted first, because of how FieldCache.getStringIndex()
// works - in that routine, any documents without a value in the given field are
// put first. If both are null, the next SortField is used

View File

@ -19,91 +19,168 @@
package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.*;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
/**
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
* when cached, and also uses bulk read
*/
// LUCENE MONITOR: Against TermsFilter
// LUCENE 4 UPGRADE: Make sure to sync this against latest 4.1
// LUCENE 4.1: once its out, we can use TermsFilter from it
public class XTermsFilter extends Filter {
private final Term[] terms;
private final Term[] filterTerms;
private final boolean[] resetTermsEnum;// true if the enum must be reset when building the bitset
private final int length;
public XTermsFilter(Term term) {
this.terms = new Term[]{term};
/**
* Creates a new {@link XTermsFilter} from the given collection. The collection
* can contain duplicate terms and multiple fields.
*/
public XTermsFilter(Collection<Term> terms) {
this(terms.toArray(new Term[terms.size()]));
}
public XTermsFilter(Term[] terms) {
/**
* Creates a new {@link XTermsFilter} from the given array. The array can
* contain duplicate terms and multiple fields.
*/
public XTermsFilter(Term... terms) {
if (terms == null || terms.length == 0) {
throw new IllegalArgumentException("TermsFilter requires at least one term");
}
Arrays.sort(terms);
this.terms = terms;
this.filterTerms = new Term[terms.length];
this.resetTermsEnum = new boolean[terms.length];
int index = 0;
for (int i = 0; i < terms.length; i++) {
Term currentTerm = terms[i];
boolean fieldChanged = true;
if (index > 0) {
// deduplicate
if (filterTerms[index - 1].field().equals(currentTerm.field())) {
fieldChanged = false;
if (filterTerms[index - 1].bytes().bytesEquals(currentTerm.bytes())) {
continue;
}
}
}
this.filterTerms[index] = currentTerm;
this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration
index++;
}
length = index;
}
public Term[] getTerms() {
return terms;
return filterTerms;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
Fields fields = reader.fields();
if (fields == null) {
return result;
}
final BytesRef br = new BytesRef();
Terms terms = null;
TermsEnum termsEnum = null;
DocsEnum docs = null;
assert resetTermsEnum[0];
for (int i = 0; i < length; i++) {
Term term = this.filterTerms[i];
if (resetTermsEnum[i]) {
terms = fields.terms(term.field());
if (terms == null) {
i = skipToNextField(i + 1, length); // skip to the next field since this field is not indexed
continue;
}
}
if ((termsEnum = terms.iterator(termsEnum)) != null) {
br.copyBytes(term.bytes());
assert termsEnum != null;
if (termsEnum.seekExact(br, true)) {
docs = termsEnum.docs(acceptDocs, docs, 0);
if (result == null) {
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
result = new FixedBitSet(reader.maxDoc());
// lazy init but don't do it in the hot loop since we could read many docs
result.set(docs.docID());
}
}
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
result.set(docs.docID());
}
}
}
}
return result;
}
private final int skipToNextField(int index, int length) {
for (int i = index; i < length; i++) {
if (resetTermsEnum[i]) {
return i - 1;
}
}
return length;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
if (this == obj) {
return true;
if ((obj == null) || (obj.getClass() != this.getClass()))
}
if ((obj == null) || (obj.getClass() != this.getClass())) {
return false;
}
XTermsFilter test = (XTermsFilter) obj;
return Arrays.equals(terms, test.terms);
if (filterTerms != test.filterTerms) {
if (length == test.length) {
for (int i = 0; i < length; i++) {
// can not be null!
if (!filterTerms[i].equals(test.filterTerms[i])) {
return false;
}
}
} else {
return false;
}
}
return true;
}
@Override
public int hashCode() {
return Arrays.hashCode(terms);
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
FixedBitSet result = null;
TermDocs td = reader.termDocs();
try {
// batch read, in Lucene 4.0 its no longer needed
int[] docs = new int[Lucene.BATCH_ENUM_DOCS];
int[] freqs = new int[Lucene.BATCH_ENUM_DOCS];
for (Term term : terms) {
td.seek(term);
int number = td.read(docs, freqs);
if (number > 0) {
if (result == null) {
result = new FixedBitSet(reader.maxDoc());
}
while (number > 0) {
for (int i = 0; i < number; i++) {
result.set(docs[i]);
}
number = td.read(docs, freqs);
}
}
}
} finally {
td.close();
int hash = 9;
for (int i = 0; i < length; i++) {
hash = 31 * hash + filterTerms[i].hashCode();
}
return result;
return hash;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
for (Term term : terms) {
for (int i = 0; i < length; i++) {
if (builder.length() > 0) {
builder.append(' ');
}
builder.append(term);
builder.append(filterTerms[i]);
}
return builder.toString();
}
}

View File

@ -144,7 +144,7 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder {
}
}
if (!toffsList.isEmpty()) {
subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.text, toffsList, subInfo.getSeqnum()));
subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum()));
}
if (subInfo.getTermsOffsets().isEmpty()) {
@ -175,9 +175,7 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder {
private final static List<FieldPhraseList.WeightedPhraseInfo> EMPTY = Collections.emptyList();
private WeightedFragInfo(int startOffset, int endOffset, float totalBoost, List<FieldFragList.WeightedFragInfo.SubInfo> subInfos) {
super(startOffset, endOffset, EMPTY);
this.subInfos = subInfos;
this.totalBoost = totalBoost;
super(startOffset, endOffset, subInfos, totalBoost);
}
}

View File

@ -65,12 +65,12 @@ public class XScoreOrderFragmentsBuilder extends AbstractFragmentsBuilder {
public static class ScoreComparator implements Comparator<WeightedFragInfo> {
public int compare(WeightedFragInfo o1, WeightedFragInfo o2) {
if (o1.totalBoost > o2.totalBoost) return -1;
else if (o1.totalBoost < o2.totalBoost) return 1;
if (o1.getTotalBoost() > o2.getTotalBoost()) return -1;
else if (o1.getTotalBoost() < o2.getTotalBoost()) return 1;
// if same score then check startOffset
else {
if (o1.startOffset < o2.startOffset) return -1;
else if (o1.startOffset > o2.startOffset) return 1;
if (o1.getStartOffset() < o2.getStartOffset()) return -1;
else if (o1.getStartOffset() > o2.getStartOffset()) return 1;
}
return 0;
}

View File

@ -13,7 +13,7 @@ class XFSIndexOutput extends FSDirectory.FSIndexOutput {
private final StoreRateLimiting.Listener rateListener;
XFSIndexOutput(FSDirectory parent, String name, RateLimiter rateLimiter, StoreRateLimiting.Listener rateListener) throws IOException {
super(parent, name);
super(parent, name, null /* we have our own rate limiter */);
this.rateLimiter = rateLimiter;
this.rateListener = rateListener;
}

View File

@ -40,12 +40,12 @@ public class XMMapFSDirectory extends MMapDirectory {
}
@Override
public IndexOutput createOutput(String name) throws IOException {
public IndexOutput createOutput(String name, IOContext context) throws IOException {
StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
StoreRateLimiting.Type type = rateLimiting.getType();
RateLimiter limiter = rateLimiting.getRateLimiter();
if (type == StoreRateLimiting.Type.NONE || limiter == null) {
return super.createOutput(name);
return super.createOutput(name, context);
}
if (TrackingMergeScheduler.getCurrentMerge() != null) {
// we are mering, and type is either MERGE or ALL, rate limit...
@ -59,6 +59,6 @@ public class XMMapFSDirectory extends MMapDirectory {
return new XFSIndexOutput(this, name, limiter, rateListener);
}
// we shouldn't really get here...
return super.createOutput(name);
return super.createOutput(name, context);
}
}

View File

@ -40,12 +40,12 @@ public class XNIOFSDirectory extends NIOFSDirectory {
}
@Override
public IndexOutput createOutput(String name) throws IOException {
public IndexOutput createOutput(String name, IOContext context) throws IOException {
StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
StoreRateLimiting.Type type = rateLimiting.getType();
RateLimiter limiter = rateLimiting.getRateLimiter();
if (type == StoreRateLimiting.Type.NONE || limiter == null) {
return super.createOutput(name);
return super.createOutput(name, context);
}
if (TrackingMergeScheduler.getCurrentMerge() != null) {
// we are mering, and type is either MERGE or ALL, rate limit...
@ -59,6 +59,6 @@ public class XNIOFSDirectory extends NIOFSDirectory {
return new XFSIndexOutput(this, name, limiter, rateListener);
}
// we shouldn't really get here...
return super.createOutput(name);
return super.createOutput(name, context);
}
}

View File

@ -40,12 +40,12 @@ public class XSimpleFSDirectory extends SimpleFSDirectory {
}
@Override
public IndexOutput createOutput(String name) throws IOException {
public IndexOutput createOutput(String name, IOContext context) throws IOException {
StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
StoreRateLimiting.Type type = rateLimiting.getType();
RateLimiter limiter = rateLimiting.getRateLimiter();
if (type == StoreRateLimiting.Type.NONE || limiter == null) {
return super.createOutput(name);
return super.createOutput(name, context);
}
if (TrackingMergeScheduler.getCurrentMerge() != null) {
// we are mering, and type is either MERGE or ALL, rate limit...
@ -59,6 +59,6 @@ public class XSimpleFSDirectory extends SimpleFSDirectory {
return new XFSIndexOutput(this, name, limiter, rateListener);
}
// we shouldn't really get here...
return super.createOutput(name);
return super.createOutput(name, context);
}
}

View File

@ -17,10 +17,7 @@ package org.apache.lucene.store.bytebuffer;
* limitations under the License.
*/
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.SingleInstanceLockFactory;
import org.apache.lucene.store.*;
import java.io.FileNotFoundException;
import java.io.IOException;
@ -98,36 +95,6 @@ public class ByteBufferDirectory extends Directory {
return files.containsKey(name);
}
@Override
public long fileModified(String name) throws IOException {
ByteBufferFile file = files.get(name);
if (file == null)
throw new FileNotFoundException(name);
return file.getLastModified();
}
@Override
public void touchFile(String name) throws IOException {
ByteBufferFile file = files.get(name);
if (file == null)
throw new FileNotFoundException(name);
long ts2, ts1 = System.currentTimeMillis();
do {
try {
Thread.sleep(0, 1);
} catch (java.lang.InterruptedException ie) {
// In 3.0 we will change this to throw
// InterruptedException instead
Thread.currentThread().interrupt();
throw new RuntimeException(ie);
}
ts2 = System.currentTimeMillis();
} while (ts1 == ts2);
file.setLastModified(ts2);
}
@Override
public void deleteFile(String name) throws IOException {
ByteBufferFile file = files.remove(name);
@ -146,7 +113,7 @@ public class ByteBufferDirectory extends Directory {
}
@Override
public IndexOutput createOutput(String name) throws IOException {
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ByteBufferAllocator.Type allocatorType = ByteBufferAllocator.Type.LARGE;
if (name.contains("segments") || name.endsWith(".del")) {
allocatorType = ByteBufferAllocator.Type.SMALL;
@ -166,7 +133,7 @@ public class ByteBufferDirectory extends Directory {
}
@Override
public IndexInput openInput(String name) throws IOException {
public IndexInput openInput(String name, IOContext context) throws IOException {
ByteBufferFile file = files.get(name);
if (file == null)
throw new FileNotFoundException(name);

View File

@ -186,7 +186,7 @@ public class ByteBufferIndexInput extends IndexInput {
}
@Override
public Object clone() {
public IndexInput clone() {
ByteBufferIndexInput cloned = (ByteBufferIndexInput) super.clone();
cloned.file.incRef(); // inc ref on cloned one
if (currentBuffer != EMPTY_BUFFER) {

View File

@ -198,7 +198,7 @@ public class TransportAnalyzeAction extends TransportSingleCustomOperationAction
List<AnalyzeResponse.AnalyzeToken> tokens = Lists.newArrayList();
TokenStream stream = null;
try {
stream = analyzer.reusableTokenStream(field, new FastStringReader(request.text()));
stream = analyzer.tokenStream(field, new FastStringReader(request.text()));
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

View File

@ -34,7 +34,6 @@ public class ClearIndicesCacheRequest extends BroadcastOperationRequest<ClearInd
private boolean filterCache = false;
private boolean fieldDataCache = false;
private boolean idCache = false;
private boolean bloomCache = false;
private String[] fields = null;
ClearIndicesCacheRequest() {
@ -82,26 +81,16 @@ public class ClearIndicesCacheRequest extends BroadcastOperationRequest<ClearInd
return this;
}
public boolean bloomCache() {
return this.bloomCache;
}
public ClearIndicesCacheRequest bloomCache(boolean bloomCache) {
this.bloomCache = bloomCache;
return this;
}
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
filterCache = in.readBoolean();
fieldDataCache = in.readBoolean();
idCache = in.readBoolean();
bloomCache = in.readBoolean();
int size = in.readVInt();
if (size > 0) {
fields = new String[size];
for (int i = 0; i < size; i++) {
fields[i] = in.readUTF();
fields[i] = in.readString();
}
}
}
@ -111,13 +100,12 @@ public class ClearIndicesCacheRequest extends BroadcastOperationRequest<ClearInd
out.writeBoolean(filterCache);
out.writeBoolean(fieldDataCache);
out.writeBoolean(idCache);
out.writeBoolean(bloomCache);
if (fields == null) {
out.writeVInt(0);
} else {
out.writeVInt(fields.length);
for (String field : fields) {
out.writeUTF(field);
out.writeString(field);
}
}
}

View File

@ -53,11 +53,6 @@ public class ClearIndicesCacheRequestBuilder extends BroadcastOperationRequestBu
return this;
}
public ClearIndicesCacheRequestBuilder setBloomCache(boolean bloomCache) {
request.bloomCache(bloomCache);
return this;
}
@Override
protected void doExecute(ActionListener<ClearIndicesCacheResponse> listener) {
((IndicesAdminClient) client).clearCache(request, listener);

View File

@ -33,7 +33,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest {
private boolean filterCache = false;
private boolean fieldDataCache = false;
private boolean idCache = false;
private boolean bloomCache = false;
private String[] fields = null;
ShardClearIndicesCacheRequest() {
@ -44,7 +43,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest {
filterCache = request.filterCache();
fieldDataCache = request.fieldDataCache();
idCache = request.idCache();
bloomCache = request.bloomCache();
fields = request.fields();
}
@ -60,10 +58,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest {
return this.idCache;
}
public boolean bloomCache() {
return this.bloomCache;
}
public String[] fields() {
return this.fields;
}
@ -79,7 +73,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest {
filterCache = in.readBoolean();
fieldDataCache = in.readBoolean();
idCache = in.readBoolean();
bloomCache = in.readBoolean();
int size = in.readVInt();
if (size > 0) {
fields = new String[size];
@ -95,7 +88,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest {
out.writeBoolean(filterCache);
out.writeBoolean(fieldDataCache);
out.writeBoolean(idCache);
out.writeBoolean(bloomCache);
if (fields == null) {
out.writeVInt(0);
} else {

View File

@ -138,10 +138,6 @@ public class TransportClearIndicesCacheAction extends TransportBroadcastOperatio
clearedAtLeastOne = true;
service.cache().idCache().clear();
}
if (request.bloomCache()) {
clearedAtLeastOne = true;
service.cache().bloomCache().clear();
}
if (!clearedAtLeastOne) {
if (request.fields() != null && request.fields().length > 0) {
// only clear caches relating to the specified fields

View File

@ -86,7 +86,7 @@ public class TransportExplainAction extends TransportShardSingleOperationAction<
protected ExplainResponse shardOperation(ExplainRequest request, int shardId) throws ElasticSearchException {
IndexService indexService = indicesService.indexService(request.index());
IndexShard indexShard = indexService.shardSafe(shardId);
Term uidTerm = UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(request.type(), request.id()));
Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUid(request.type(), request.id()));
Engine.GetResult result = indexShard.get(new Engine.Get(false, uidTerm));
if (!result.exists()) {
return new ExplainResponse(false);
@ -104,7 +104,7 @@ public class TransportExplainAction extends TransportShardSingleOperationAction<
try {
context.parsedQuery(parseQuery(request, indexService));
context.preProcess();
int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().docStart;
int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().reader.docBase;
Explanation explanation = context.searcher().explain(context.query(), topLevelDocId);
if (request.fields() != null) {

View File

@ -19,7 +19,7 @@
package org.elasticsearch.action.mlt;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.ActionListener;
@ -263,7 +263,7 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
}
docMapper.parse(SourceToParse.source(getResponse.sourceRef()).type(request.type()).id(request.id()), new DocumentMapper.ParseListenerAdapter() {
@Override
public boolean beforeFieldAdded(FieldMapper fieldMapper, Fieldable field, Object parseContext) {
public boolean beforeFieldAdded(FieldMapper fieldMapper, Field field, Object parseContext) {
if (fieldMapper instanceof InternalMapper) {
return true;
}
@ -281,7 +281,7 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
});
}
private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, FieldMapper fieldMapper, Fieldable field) {
private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, FieldMapper fieldMapper, Field field) {
addMoreLikeThis(request, boolBuilder, field.name(), fieldMapper.valueAsString(field));
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.util.concurrent.ThreadLocals;
@ -29,10 +30,10 @@ import java.util.Arrays;
*/
public class Unicode {
private static ThreadLocal<ThreadLocals.CleanableValue<UnicodeUtil.UTF8Result>> cachedUtf8Result = new ThreadLocal<ThreadLocals.CleanableValue<UnicodeUtil.UTF8Result>>() {
private static ThreadLocal<ThreadLocals.CleanableValue<BytesRef>> cachedUtf8Result = new ThreadLocal<ThreadLocals.CleanableValue<BytesRef>>() {
@Override
protected ThreadLocals.CleanableValue<UnicodeUtil.UTF8Result> initialValue() {
return new ThreadLocals.CleanableValue<UnicodeUtil.UTF8Result>(new UnicodeUtil.UTF8Result());
protected ThreadLocals.CleanableValue<BytesRef> initialValue() {
return new ThreadLocals.CleanableValue<BytesRef>(new BytesRef());
}
};
@ -47,20 +48,20 @@ public class Unicode {
if (source == null) {
return null;
}
UnicodeUtil.UTF8Result result = unsafeFromStringAsUtf8(source);
return Arrays.copyOfRange(result.result, 0, result.length);
BytesRef result = unsafeFromStringAsUtf8(source);
return Arrays.copyOfRange(result.bytes, result.offset, result.length);
}
public static UnicodeUtil.UTF8Result fromStringAsUtf8(String source) {
public static BytesRef fromStringAsUtf8(String source) {
if (source == null) {
return null;
}
UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
BytesRef result = new BytesRef();
UnicodeUtil.UTF16toUTF8(source, 0, source.length(), result);
return result;
}
public static void fromStringAsUtf8(String source, UnicodeUtil.UTF8Result result) {
public static void fromStringAsUtf8(String source, BytesRef result) {
if (source == null) {
result.length = 0;
return;
@ -68,11 +69,11 @@ public class Unicode {
UnicodeUtil.UTF16toUTF8(source, 0, source.length(), result);
}
public static UnicodeUtil.UTF8Result unsafeFromStringAsUtf8(String source) {
public static BytesRef unsafeFromStringAsUtf8(String source) {
if (source == null) {
return null;
}
UnicodeUtil.UTF8Result result = cachedUtf8Result.get().get();
BytesRef result = cachedUtf8Result.get().get();
UnicodeUtil.UTF16toUTF8(source, 0, source.length(), result);
return result;
}

View File

@ -1,172 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.bloom;
/**
* The following calculations are taken from:
* http://www.cs.wisc.edu/~cao/papers/summary-cache/node8.html
* "Bloom Filters - the math"
* <p/>
* This class's static methods are meant to facilitate the use of the Bloom
* Filter class by helping to choose correct values of 'bits per element' and
* 'number of hash functions, k'.
*/
class BloomCalculations {
private static final int minBuckets = 2;
private static final int minK = 1;
private static final int EXCESS = 20;
/**
* In the following table, the row 'i' shows false positive rates if i buckets
* per element are used. Column 'j' shows false positive rates if j hash
* functions are used. The first row is 'i=0', the first column is 'j=0'.
* Each cell (i,j) the false positive rate determined by using i buckets per
* element and j hash functions.
*/
static final double[][] probs = new double[][]{
{1.0}, // dummy row representing 0 buckets per element
{1.0, 1.0}, // dummy row representing 1 buckets per element
{1.0, 0.393, 0.400},
{1.0, 0.283, 0.237, 0.253},
{1.0, 0.221, 0.155, 0.147, 0.160},
{1.0, 0.181, 0.109, 0.092, 0.092, 0.101}, // 5
{1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638},
{1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364},
{1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229},
{1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145},
{1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846}, // 10
{1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509},
{1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314},
{1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194},
{1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012},
{1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744}, // 15
{1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459},
{1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284},
{1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198, 0.000183, 0.000176},
{1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109},
{1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20
}; // the first column is a dummy column representing K=0.
/**
* The optimal number of hashes for a given number of bits per element.
* These values are automatically calculated from the data above.
*/
private static final int[] optKPerBuckets = new int[probs.length];
static {
for (int i = 0; i < probs.length; i++) {
double min = Double.MAX_VALUE;
double[] prob = probs[i];
for (int j = 0; j < prob.length; j++) {
if (prob[j] < min) {
min = prob[j];
optKPerBuckets[i] = Math.max(minK, j);
}
}
}
}
/**
* Given the number of buckets that can be used per element, return a
* specification that minimizes the false positive rate.
*
* @param bucketsPerElement The number of buckets per element for the filter.
* @return A spec that minimizes the false positive rate.
*/
public static BloomSpecification computeBloomSpec(int bucketsPerElement) {
assert bucketsPerElement >= 1;
assert bucketsPerElement <= probs.length - 1;
return new BloomSpecification(optKPerBuckets[bucketsPerElement], bucketsPerElement);
}
/**
* A wrapper class that holds two key parameters for a Bloom Filter: the
* number of hash functions used, and the number of buckets per element used.
*/
public static class BloomSpecification {
final int K; // number of hash functions.
final int bucketsPerElement;
public BloomSpecification(int k, int bucketsPerElement) {
K = k;
this.bucketsPerElement = bucketsPerElement;
}
}
/**
* Given a maximum tolerable false positive probability, compute a Bloom
* specification which will give less than the specified false positive rate,
* but minimize the number of buckets per element and the number of hash
* functions used. Because bandwidth (and therefore total bitvector size)
* is considered more expensive than computing power, preference is given
* to minimizing buckets per element rather than number of hash functions.
*
* @param maxBucketsPerElement The maximum number of buckets available for the filter.
* @param maxFalsePosProb The maximum tolerable false positive rate.
* @return A Bloom Specification which would result in a false positive rate
* less than specified by the function call
* @throws UnsupportedOperationException if a filter satisfying the parameters cannot be met
*/
public static BloomSpecification computeBloomSpec(int maxBucketsPerElement, double maxFalsePosProb) {
assert maxBucketsPerElement >= 1;
assert maxBucketsPerElement <= probs.length - 1;
int maxK = probs[maxBucketsPerElement].length - 1;
// Handle the trivial cases
if (maxFalsePosProb >= probs[minBuckets][minK]) {
return new BloomSpecification(2, optKPerBuckets[2]);
}
if (maxFalsePosProb < probs[maxBucketsPerElement][maxK]) {
throw new UnsupportedOperationException(String.format("Unable to satisfy %s with %s buckets per element",
maxFalsePosProb, maxBucketsPerElement));
}
// First find the minimal required number of buckets:
int bucketsPerElement = 2;
int K = optKPerBuckets[2];
while (probs[bucketsPerElement][K] > maxFalsePosProb) {
bucketsPerElement++;
K = optKPerBuckets[bucketsPerElement];
}
// Now that the number of buckets is sufficient, see if we can relax K
// without losing too much precision.
while (probs[bucketsPerElement][K - 1] <= maxFalsePosProb) {
K--;
}
return new BloomSpecification(K, bucketsPerElement);
}
/**
* Calculates the maximum number of buckets per element that this implementation
* can support. Crucially, it will lower the bucket count if necessary to meet
* BitSet's size restrictions.
*/
public static int maxBucketsPerElement(long numElements) {
numElements = Math.max(1, numElements);
double v = (Long.MAX_VALUE - EXCESS) / (double) numElements;
if (v < 1.0) {
throw new UnsupportedOperationException("Cannot compute probabilities for " + numElements + " elements.");
}
return Math.min(BloomCalculations.probs.length - 1, (int) v);
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.bloom;
/**
*
*/
public interface BloomFilter {
public static final BloomFilter NONE = new BloomFilter() {
@Override
public void add(byte[] key, int offset, int length) {
}
@Override
public boolean isPresent(byte[] key, int offset, int length) {
return true;
}
@Override
public long sizeInBytes() {
return 0;
}
};
public static final BloomFilter EMPTY = new BloomFilter() {
@Override
public void add(byte[] key, int offset, int length) {
}
@Override
public boolean isPresent(byte[] key, int offset, int length) {
return false;
}
@Override
public long sizeInBytes() {
return 0;
}
};
void add(byte[] key, int offset, int length);
boolean isPresent(byte[] key, int offset, int length);
long sizeInBytes();
}

View File

@ -1,98 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.bloom;
import org.elasticsearch.common.UUID;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.SizeValue;
import org.elasticsearch.common.unit.TimeValue;
import java.io.UnsupportedEncodingException;
/**
*
*/
public class BloomFilterFactory {
private static ESLogger logger = ESLoggerFactory.getLogger(BloomFilterFactory.class.getName());
private static final int EXCESS = 20;
/**
* @return A BloomFilter with the lowest practical false positive probability
* for the given number of elements.
*/
public static BloomFilter getFilter(long numElements, int targetBucketsPerElem) {
int maxBucketsPerElement = Math.max(1, BloomCalculations.maxBucketsPerElement(numElements));
int bucketsPerElement = Math.min(targetBucketsPerElem, maxBucketsPerElement);
if (bucketsPerElement < targetBucketsPerElem) {
logger.warn(String.format("Cannot provide an optimal BloomFilter for %d elements (%d/%d buckets per element).",
numElements, bucketsPerElement, targetBucketsPerElem));
}
BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement);
return new ObsBloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
}
/**
* @return The smallest BloomFilter that can provide the given false positive
* probability rate for the given number of elements.
* <p/>
* Asserts that the given probability can be satisfied using this filter.
*/
public static BloomFilter getFilter(long numElements, double maxFalsePosProbability) {
assert maxFalsePosProbability <= 1.0 : "Invalid probability";
int bucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability);
return new ObsBloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
}
private static long bucketsFor(long numElements, int bucketsPer) {
return numElements * bucketsPer + EXCESS;
}
public static void main(String[] args) throws UnsupportedEncodingException {
long elements = SizeValue.parseSizeValue("100m").singles();
BloomFilter filter = BloomFilterFactory.getFilter(elements, 15);
System.out.println("Filter size: " + new ByteSizeValue(filter.sizeInBytes()));
for (long i = 0; i < elements; i++) {
byte[] utf8s = UUID.randomBase64UUID().getBytes("UTF8");
filter.add(utf8s, 0, utf8s.length);
}
long falsePositives = 0;
for (long i = 0; i < elements; i++) {
byte[] utf8s = UUID.randomBase64UUID().getBytes("UTF8");
if (filter.isPresent(utf8s, 0, utf8s.length)) {
falsePositives++;
}
}
System.out.println("false positives: " + falsePositives);
byte[] utf8s = UUID.randomBase64UUID().getBytes("UTF8");
long time = System.currentTimeMillis();
for (long i = 0; i < elements; i++) {
if (filter.isPresent(utf8s, 0, utf8s.length)) {
}
}
long timeSize = System.currentTimeMillis() - time;
System.out.println("Indexed in " + new TimeValue(timeSize) + ", TPS: " + (elements / timeSize) + " per millisecond");
}
}

View File

@ -1,97 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.bloom;
import org.apache.lucene.util.OpenBitSet;
import org.elasticsearch.common.MurmurHash;
import org.elasticsearch.common.RamUsage;
public class ObsBloomFilter implements BloomFilter {
private final int hashCount;
private final OpenBitSet bitset;
private final long size;
ObsBloomFilter(int hashCount, long size) {
this.hashCount = hashCount;
this.bitset = new OpenBitSet(size);
this.size = size;
}
long emptyBuckets() {
long n = 0;
for (long i = 0; i < buckets(); i++) {
if (!bitset.get(i)) {
n++;
}
}
return n;
}
private long buckets() {
return size;
}
private long[] getHashBuckets(byte[] key, int offset, int length) {
return getHashBuckets(key, offset, length, hashCount, buckets());
}
static long[] getHashBuckets(byte[] b, int offset, int length, int hashCount, long max) {
long[] result = new long[hashCount];
long[] hash = MurmurHash.hash3_x64_128(b, offset, length, 0L);
for (int i = 0; i < hashCount; ++i) {
result[i] = Math.abs((hash[0] + (long) i * hash[1]) % max);
}
return result;
}
@Override
public void add(byte[] key, int offset, int length) {
// inline the hash buckets so we don't have to create the int[] each time...
long[] hash = MurmurHash.hash3_x64_128(key, offset, length, 0L);
for (int i = 0; i < hashCount; ++i) {
long bucketIndex = Math.abs((hash[0] + (long) i * hash[1]) % size);
bitset.fastSet(bucketIndex);
}
}
@Override
public boolean isPresent(byte[] key, int offset, int length) {
// inline the hash buckets so we don't have to create the int[] each time...
long[] hash = MurmurHash.hash3_x64_128(key, offset, length, 0L);
for (int i = 0; i < hashCount; ++i) {
long bucketIndex = Math.abs((hash[0] + (long) i * hash[1]) % size);
if (!bitset.fastGet(bucketIndex)) {
return false;
}
}
return true;
}
public void clear() {
bitset.clear(0, bitset.size());
}
@Override
public long sizeInBytes() {
return bitset.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_ARRAY_HEADER + RamUsage.NUM_BYTES_INT /* wlen */;
}
}

View File

@ -125,6 +125,16 @@ public class ByteBufferBytesReference implements BytesReference {
return buffer.arrayOffset() + buffer.position();
}
@Override
public int hashCode() {
return Helper.bytesHashCode(this);
}
@Override
public boolean equals(Object obj) {
return Helper.bytesEqual(this, (BytesReference) obj);
}
@Override
public String toUtf8() {
if (!buffer.hasRemaining()) {

View File

@ -20,6 +20,7 @@
package org.elasticsearch.common.bytes;
import com.google.common.base.Charsets;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Bytes;
import org.elasticsearch.common.io.stream.BytesStreamInput;
@ -43,6 +44,23 @@ public class BytesArray implements BytesReference {
this(bytes.getBytes(Charsets.UTF_8));
}
public BytesArray(BytesRef bytesRef) {
this(bytesRef, false);
}
public BytesArray(BytesRef bytesRef, boolean deepCopy) {
if (deepCopy) {
BytesRef copy = BytesRef.deepCopyOf(bytesRef);
bytes = copy.bytes;
offset = copy.offset;
length = copy.length;
} else {
bytes = bytesRef.bytes;
offset = bytesRef.offset;
length = bytesRef.length;
}
}
public BytesArray(byte[] bytes) {
this.bytes = bytes;
this.offset = 0;
@ -130,33 +148,12 @@ public class BytesArray implements BytesReference {
}
@Override
public boolean equals(Object obj) {
return bytesEquals((BytesArray) obj);
}
public boolean bytesEquals(BytesArray other) {
if (length == other.length) {
int otherUpto = other.offset;
final byte[] otherBytes = other.bytes;
final int end = offset + length;
for (int upto = offset; upto < end; upto++, otherUpto++) {
if (bytes[upto] != otherBytes[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
public int hashCode() {
return Helper.bytesHashCode(this);
}
@Override
public int hashCode() {
int result = 0;
final int end = offset + length;
for (int i = offset; i < end; i++) {
result = 31 * result + bytes[i];
}
return result;
public boolean equals(Object obj) {
return Helper.bytesEqual(this, (BytesReference) obj);
}
}

View File

@ -24,12 +24,53 @@ import org.jboss.netty.buffer.ChannelBuffer;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Comparator;
/**
* A reference to bytes.
*/
public interface BytesReference {
public static class Helper {
public static boolean bytesEqual(BytesReference a, BytesReference b) {
if (a == b) {
return true;
}
if (a.length() != b.length()) {
return false;
}
if (!a.hasArray()) {
a = a.toBytesArray();
}
if (!b.hasArray()) {
b = b.toBytesArray();
}
int bUpTo = b.arrayOffset();
final byte[] aArray = a.array();
final byte[] bArray = b.array();
final int end = a.arrayOffset() + a.length();
for (int aUpTo = a.arrayOffset(); aUpTo < end; aUpTo++, bUpTo++) {
if (aArray[aUpTo] != bArray[bUpTo]) {
return false;
}
}
return true;
}
public static int bytesHashCode(BytesReference a) {
if (!a.hasArray()) {
a = a.toBytesArray();
}
int result = 0;
final int end = a.arrayOffset() + a.length();
for (int i = a.arrayOffset(); i < end; i++) {
result = 31 * result + a.array()[i];
}
return result;
}
}
/**
* Returns the byte at the specified index. Need to be between 0 and length.
*/
@ -94,4 +135,6 @@ public interface BytesReference {
* Converts to a string based on utf8.
*/
String toUtf8();
}

View File

@ -106,4 +106,14 @@ public class ChannelBufferBytesReference implements BytesReference {
public String toUtf8() {
return buffer.toString(Charsets.UTF_8);
}
@Override
public int hashCode() {
return Helper.bytesHashCode(this);
}
@Override
public boolean equals(Object obj) {
return Helper.bytesEqual(this, (BytesReference) obj);
}
}

View File

@ -124,14 +124,12 @@ public class HashedBytesArray implements BytesReference {
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
HashedBytesArray bytesWrap = (HashedBytesArray) o;
return Arrays.equals(bytes, bytesWrap.bytes);
public int hashCode() {
return Helper.bytesHashCode(this);
}
@Override
public int hashCode() {
return hashCode;
public boolean equals(Object obj) {
return Helper.bytesEqual(this, (BytesReference) obj);
}
}

View File

@ -1,7 +1,9 @@
package org.elasticsearch.common.compress;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.apache.lucene.store.*;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.index.store.support.ForceSyncDirectory;
import java.io.IOException;
@ -60,16 +62,6 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory
return dir.fileExists(name);
}
@Override
public long fileModified(String name) throws IOException {
return dir.fileModified(name);
}
@Override
public void touchFile(String name) throws IOException {
dir.touchFile(name);
}
@Override
public void deleteFile(String name) throws IOException {
dir.deleteFile(name);
@ -82,11 +74,12 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory
@Override
public long fileLength(String name) throws IOException {
if (actualLength && decompressExtensions.contains(getExtension(name))) {
IndexInput in = openInput(name);
// LUCENE 4 UPGRADE: Is this the right IOContext?
IndexInput in = openInput(name, IOContext.READONCE);
try {
return in.length();
} catch (Exception e) {
in.close();
} finally {
IOUtils.close(in);
}
}
return dir.fileLength(name);
@ -97,24 +90,19 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory
dir.sync(names);
}
@Override
public void sync(String name) throws IOException {
dir.sync(name);
}
@Override
public void forceSync(String name) throws IOException {
if (dir instanceof ForceSyncDirectory) {
((ForceSyncDirectory) dir).forceSync(name);
} else {
dir.sync(name);
dir.sync(ImmutableList.of(name));
}
}
@Override
public IndexInput openInput(String name) throws IOException {
public IndexInput openInput(String name, IOContext context) throws IOException {
if (decompressExtensions.contains(getExtension(name))) {
IndexInput in = dir.openInput(name);
IndexInput in = dir.openInput(name, context);
Compressor compressor1 = CompressorFactory.compressor(in);
if (compressor1 != null) {
return compressor1.indexInput(in);
@ -122,29 +110,15 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory
return in;
}
}
return dir.openInput(name);
return dir.openInput(name, context);
}
@Override
public IndexInput openInput(String name, int bufferSize) throws IOException {
if (decompressExtensions.contains(getExtension(name))) {
IndexInput in = dir.openInput(name, bufferSize);
Compressor compressor1 = CompressorFactory.compressor(in);
if (compressor1 != null) {
return compressor1.indexInput(in);
} else {
return in;
}
}
return dir.openInput(name, bufferSize);
}
@Override
public IndexOutput createOutput(String name) throws IOException {
public IndexOutput createOutput(String name, IOContext context) throws IOException {
if (compress && compressExtensions.contains(getExtension(name))) {
return compressor.indexOutput(dir.createOutput(name));
return compressor.indexOutput(dir.createOutput(name, context));
}
return dir.createOutput(name);
return dir.createOutput(name, context);
}
// can't override this one, we need to open the correct compression

View File

@ -203,7 +203,7 @@ public abstract class CompressedIndexInput<T extends CompressorContext> extends
protected abstract int uncompress(IndexInput in, byte[] out) throws IOException;
@Override
public Object clone() {
public IndexInput clone() {
// we clone and we need to make sure we keep the same positions!
CompressedIndexInput cloned = (CompressedIndexInput) super.clone();
cloned.uncompressed = new byte[uncompressedLength];

View File

@ -19,7 +19,7 @@
package org.elasticsearch.common.compress;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
@ -79,8 +79,8 @@ public class CompressedString implements Streamable {
}
public CompressedString(String str) throws IOException {
UnicodeUtil.UTF8Result result = Unicode.unsafeFromStringAsUtf8(str);
this.bytes = CompressorFactory.defaultCompressor().compress(result.result, 0, result.length);
BytesRef result = Unicode.unsafeFromStringAsUtf8(str);
this.bytes = CompressorFactory.defaultCompressor().compress(result.bytes, result.offset, result.length);
}
public byte[] compressed() {

View File

@ -65,7 +65,7 @@ public class LZFCompressedIndexInput extends CompressedIndexInput<LZFCompressorC
}
@Override
public Object clone() {
public IndexInput clone() {
LZFCompressedIndexInput cloned = (LZFCompressedIndexInput) super.clone();
cloned.inputBuffer = new byte[LZFChunk.MAX_CHUNK_LEN];
return cloned;

View File

@ -59,7 +59,7 @@ public abstract class SnappyCompressedIndexInput extends CompressedIndexInput<Sn
}
@Override
public Object clone() {
public IndexInput clone() {
SnappyCompressedIndexInput cloned = (SnappyCompressedIndexInput) super.clone();
cloned.inputBuffer = new byte[inputBuffer.length];
return cloned;

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.io.stream;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
@ -69,6 +70,16 @@ public class BytesStreamInput extends StreamInput {
return bytes;
}
@Override
public BytesRef readBytesRef(int length) throws IOException {
if (unsafe) {
return super.readBytesRef(length);
}
BytesRef bytes = new BytesRef(buf, pos, length);
pos += length;
return bytes;
}
@Override
public long skip(long n) throws IOException {
if (pos + n > count) {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.io.stream;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
@ -84,6 +85,20 @@ public abstract class StreamInput extends InputStream {
return new BytesArray(bytes, 0, length);
}
public BytesRef readBytesRef() throws IOException {
int length = readVInt();
return readBytesRef(length);
}
public BytesRef readBytesRef(int length) throws IOException {
if (length == 0) {
return new BytesRef();
}
byte[] bytes = new byte[length];
readBytes(bytes, 0, length);
return new BytesRef(bytes, 0, length);
}
public void readFully(byte[] b) throws IOException {
readBytes(b, 0, b.length);
}
@ -347,6 +362,8 @@ public abstract class StreamInput extends InputStream {
return readBytesReference();
case 15:
return readText();
case 16:
return readShort();
default:
throw new IOException("Can't read unknown type [" + type + "]");
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.io.stream;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesReference;
@ -106,6 +107,15 @@ public abstract class StreamOutput extends OutputStream {
bytes.writeTo(this);
}
public void writeBytesRef(BytesRef bytes) throws IOException {
if (bytes == null) {
writeVInt(0);
return;
}
writeVInt(bytes.length);
write(bytes.bytes, bytes.offset, bytes.length);
}
public final void writeShort(short v) throws IOException {
writeByte((byte) (v >> 8));
writeByte((byte) v);
@ -358,6 +368,9 @@ public abstract class StreamOutput extends OutputStream {
} else if (value instanceof Text) {
writeByte((byte) 15);
writeText((Text) value);
} else if (type == Short.class) {
writeByte((byte) 16);
writeShort((Short) value);
} else {
throw new IOException("Can't write type [" + type + "]");
}

View File

@ -1,151 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.stream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
*
*/
public class Streamables {
public static Map<String, Object> readMap(StreamInput in) throws IOException {
int size = in.readVInt();
Map<String, Object> map = new HashMap<String, Object>(size);
for (int i = 0; i < size; i++) {
map.put(in.readUTF(), readMapValue(in));
}
return map;
}
public static Object readMapValue(StreamInput in) throws IOException {
byte type = in.readByte();
if (type == -1) {
return null;
} else if (type == 0) {
return in.readUTF();
} else if (type == 1) {
return in.readInt();
} else if (type == 2) {
return in.readLong();
} else if (type == 3) {
return in.readFloat();
} else if (type == 4) {
return in.readDouble();
} else if (type == 5) {
return in.readBoolean();
} else if (type == 6) {
int bytesSize = in.readVInt();
byte[] value = new byte[bytesSize];
in.readFully(value);
return value;
} else if (type == 7) {
int size = in.readVInt();
List list = new ArrayList(size);
for (int i = 0; i < size; i++) {
list.add(readMapValue(in));
}
return list;
} else if (type == 8) {
int size = in.readVInt();
Object[] list = new Object[size];
for (int i = 0; i < size; i++) {
list[i] = readMapValue(in);
}
return list;
} else if (type == 9) {
int size = in.readVInt();
Map map = new HashMap(size);
for (int i = 0; i < size; i++) {
map.put(in.readUTF(), readMapValue(in));
}
return map;
} else {
throw new IOException("Can't read unknown type [" + type + "]");
}
}
public static void writeMap(StreamOutput out, Map<String, Object> map) throws IOException {
out.writeVInt(map.size());
for (Map.Entry<String, Object> entry : map.entrySet()) {
out.writeUTF(entry.getKey());
writeMapValue(out, entry.getValue());
}
}
private static void writeMapValue(StreamOutput out, Object value) throws IOException {
if (value == null) {
out.writeByte((byte) -1);
return;
}
Class type = value.getClass();
if (type == String.class) {
out.writeByte((byte) 0);
out.writeUTF((String) value);
} else if (type == Integer.class) {
out.writeByte((byte) 1);
out.writeInt((Integer) value);
} else if (type == Long.class) {
out.writeByte((byte) 2);
out.writeLong((Long) value);
} else if (type == Float.class) {
out.writeByte((byte) 3);
out.writeFloat((Float) value);
} else if (type == Double.class) {
out.writeByte((byte) 4);
out.writeDouble((Double) value);
} else if (type == Boolean.class) {
out.writeByte((byte) 5);
out.writeBoolean((Boolean) value);
} else if (type == byte[].class) {
out.writeByte((byte) 6);
out.writeVInt(((byte[]) value).length);
out.writeBytes(((byte[]) value));
} else if (value instanceof List) {
out.writeByte((byte) 7);
List list = (List) value;
out.writeVInt(list.size());
for (Object o : list) {
writeMapValue(out, o);
}
} else if (value instanceof Object[]) {
out.writeByte((byte) 8);
Object[] list = (Object[]) value;
out.writeVInt(list.length);
for (Object o : list) {
writeMapValue(out, o);
}
} else if (value instanceof Map) {
out.writeByte((byte) 9);
Map<String, Object> map = (Map<String, Object>) value;
out.writeVInt(map.size());
for (Map.Entry<String, Object> entry : map.entrySet()) {
out.writeUTF(entry.getKey());
writeMapValue(out, entry.getValue());
}
} else {
throw new IOException("Can't write type [" + type + "]");
}
}
}

View File

@ -1,90 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.elasticsearch.common.lucene.uid.UidField;
/**
*
*/
public class DocumentBuilder {
public static final Document EMPTY = new Document();
public static DocumentBuilder doc() {
return new DocumentBuilder();
}
public static Fieldable uidField(String value) {
return uidField(value, 0);
}
public static Fieldable uidField(String value, long version) {
return new UidField("_uid", value, version);
}
public static FieldBuilder field(String name, String value) {
return field(name, value, Field.Store.YES, Field.Index.ANALYZED);
}
public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index) {
return new FieldBuilder(name, value, store, index);
}
public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) {
return new FieldBuilder(name, value, store, index, termVector);
}
public static FieldBuilder field(String name, byte[] value, Field.Store store) {
return new FieldBuilder(name, value, store);
}
public static FieldBuilder field(String name, byte[] value, int offset, int length, Field.Store store) {
return new FieldBuilder(name, value, offset, length, store);
}
private final Document document;
private DocumentBuilder() {
this.document = new Document();
}
public DocumentBuilder boost(float boost) {
document.setBoost(boost);
return this;
}
public DocumentBuilder add(Fieldable field) {
document.add(field);
return this;
}
public DocumentBuilder add(FieldBuilder fieldBuilder) {
document.add(fieldBuilder.build());
return this;
}
public Document build() {
return document;
}
}

View File

@ -1,65 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene;
import org.apache.lucene.document.Field;
/**
*
*/
public class FieldBuilder {
private final Field field;
FieldBuilder(String name, String value, Field.Store store, Field.Index index) {
field = new Field(name, value, store, index);
}
FieldBuilder(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) {
field = new Field(name, value, store, index, termVector);
}
FieldBuilder(String name, byte[] value, Field.Store store) {
field = new Field(name, value, store);
}
FieldBuilder(String name, byte[] value, int offset, int length, Field.Store store) {
field = new Field(name, value, offset, length, store);
}
public FieldBuilder boost(float boost) {
field.setBoost(boost);
return this;
}
public FieldBuilder omitNorms(boolean omitNorms) {
field.setOmitNorms(omitNorms);
return this;
}
public FieldBuilder omitTermFreqAndPositions(boolean omitTermFreqAndPositions) {
field.setOmitTermFreqAndPositions(omitTermFreqAndPositions);
return this;
}
public Field build() {
return field;
}
}

View File

@ -86,21 +86,11 @@ public abstract class IndexCommitDelegate extends IndexCommit {
return delegate.hashCode();
}
@Override
public long getVersion() {
return delegate.getVersion();
}
@Override
public long getGeneration() {
return delegate.getGeneration();
}
@Override
public long getTimestamp() throws IOException {
return delegate.getTimestamp();
}
@Override
public Map<String, String> getUserData() throws IOException {
return delegate.getUserData();

View File

@ -19,10 +19,12 @@
package org.elasticsearch.common.lucene;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.StreamInput;
@ -40,7 +42,7 @@ import java.lang.reflect.Field;
*/
public class Lucene {
public static final Version VERSION = Version.LUCENE_36;
public static final Version VERSION = Version.LUCENE_40;
public static final Version ANALYZER_VERSION = VERSION;
public static final Version QUERYPARSER_VERSION = VERSION;
@ -57,6 +59,9 @@ public class Lucene {
if (version == null) {
return defaultVersion;
}
if ("4.0".equals(version)) {
return Version.LUCENE_40;
}
if ("3.6".equals(version)) {
return Version.LUCENE_36;
}
@ -82,6 +87,27 @@ public class Lucene {
return defaultVersion;
}
/**
* Reads the segments infos, returning null if it doesn't exists
*/
@Nullable
public static SegmentInfos readSegmentInfosIfExists(Directory directory) {
try {
return readSegmentInfos(directory);
} catch (IOException e) {
return null;
}
}
/**
* Reads the segments infos, failing if it fails to load
*/
public static SegmentInfos readSegmentInfos(Directory directory) throws IOException {
final SegmentInfos sis = new SegmentInfos();
sis.read(directory);
return sis;
}
public static long count(IndexSearcher searcher, Query query) throws IOException {
TotalHitCountCollector countCollector = new TotalHitCountCollector();
// we don't need scores, so wrap it in a constant score query
@ -92,18 +118,6 @@ public class Lucene {
return countCollector.getTotalHits();
}
public static int docId(IndexReader reader, Term term) throws IOException {
TermDocs termDocs = reader.termDocs(term);
try {
if (termDocs.next()) {
return termDocs.doc();
}
return NO_DOC;
} finally {
termDocs.close();
}
}
/**
* Closes the index writer, returning <tt>false</tt> if it failed to close.
*/
@ -134,7 +148,7 @@ public class Lucene {
if (in.readBoolean()) {
field = in.readUTF();
}
fields[i] = new SortField(field, in.readVInt(), in.readBoolean());
fields[i] = new SortField(field, readSortType(in), in.readBoolean());
}
FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()];
@ -160,6 +174,8 @@ public class Lucene {
cFields[j] = in.readShort();
} else if (type == 8) {
cFields[j] = in.readBoolean();
} else if (type == 9) {
cFields[j] = in.readBytesRef();
} else {
throw new IOException("Can't match type [" + type + "]");
}
@ -201,9 +217,9 @@ public class Lucene {
out.writeUTF(sortField.getField());
}
if (sortField.getComparatorSource() != null) {
out.writeVInt(((FieldDataType.ExtendedFieldComparatorSource) sortField.getComparatorSource()).reducedType());
writeSortType(out, ((FieldDataType.ExtendedFieldComparatorSource) sortField.getComparatorSource()).reducedType());
} else {
out.writeVInt(sortField.getType());
writeSortType(out, sortField.getType());
}
out.writeBoolean(sortField.getReverse());
}
@ -245,6 +261,9 @@ public class Lucene {
} else if (type == Boolean.class) {
out.writeByte((byte) 8);
out.writeBoolean((Boolean) field);
} else if (type == BytesRef.class) {
out.writeByte((byte) 9);
out.writeBytesRef((BytesRef) field);
} else {
throw new IOException("Can't handle sort field value of type [" + type + "]");
}
@ -271,6 +290,15 @@ public class Lucene {
}
}
// LUCENE 4 UPGRADE: We might want to maintain our own ordinal, instead of Lucene's ordinal
public static SortField.Type readSortType(StreamInput in) throws IOException {
return SortField.Type.values()[in.readVInt()];
}
public static void writeSortType(StreamOutput out, SortField.Type sortType) throws IOException {
out.writeVInt(sortType.ordinal());
}
public static Explanation readExplanation(StreamInput in) throws IOException {
float value = in.readFloat();
String description = in.readUTF();
@ -312,9 +340,9 @@ public class Lucene {
segmentReaderSegmentInfoField = segmentReaderSegmentInfoFieldX;
}
public static SegmentInfo getSegmentInfo(SegmentReader reader) {
public static SegmentInfoPerCommit getSegmentInfo(SegmentReader reader) {
try {
return (SegmentInfo) segmentReaderSegmentInfoField.get(reader);
return (SegmentInfoPerCommit) segmentReaderSegmentInfoField.get(reader);
} catch (IllegalAccessException e) {
return null;
}
@ -343,7 +371,7 @@ public class Lucene {
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
public void setNextReader(AtomicReaderContext context) throws IOException {
}
@Override

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.lucene;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ScoreCachingWrappingScorer;
@ -59,8 +60,8 @@ public class MinimumScoreCollector extends Collector {
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
collector.setNextReader(reader, docBase);
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
}
@Override

View File

@ -19,7 +19,7 @@
package org.elasticsearch.common.lucene;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ScoreCachingWrappingScorer;
import org.apache.lucene.search.Scorer;
@ -61,10 +61,10 @@ public class MultiCollector extends Collector {
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
collector.setNextReader(reader, docBase);
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
for (Collector collector : collectors) {
collector.setNextReader(reader, docBase);
collector.setNextReader(context);
}
}

View File

@ -21,8 +21,8 @@ package org.elasticsearch.common.lucene.all;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.elasticsearch.ElasticSearchException;
import java.io.IOException;
@ -31,22 +31,21 @@ import java.io.Reader;
/**
*
*/
public class AllField extends AbstractField {
public class AllField extends Field {
private final AllEntries allEntries;
private final Analyzer analyzer;
public AllField(String name, Field.Store store, Field.TermVector termVector, AllEntries allEntries, Analyzer analyzer) {
super(name, store, Field.Index.ANALYZED, termVector);
public AllField(String name, AllEntries allEntries, Analyzer analyzer, FieldType fieldType) {
super(name, fieldType);
this.allEntries = allEntries;
this.analyzer = analyzer;
}
@Override
public String stringValue() {
if (isStored()) {
if (fieldType().stored()) {
return allEntries.buildText();
}
return null;
@ -58,7 +57,7 @@ public class AllField extends AbstractField {
}
@Override
public TokenStream tokenStreamValue() {
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
try {
allEntries.reset(); // reset the all entries, just in case it was read already
return AllTokenStream.allTokenStream(name, allEntries, analyzer);

View File

@ -19,14 +19,21 @@
package org.elasticsearch.common.lucene.all;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@ -51,32 +58,35 @@ public class AllTermQuery extends SpanTermQuery {
}
@Override
public Weight createWeight(Searcher searcher) throws IOException {
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new AllTermWeight(this, searcher);
}
protected class AllTermWeight extends SpanWeight {
public AllTermWeight(AllTermQuery query, Searcher searcher) throws IOException {
public AllTermWeight(AllTermQuery query, IndexSearcher searcher) throws IOException {
super(query, searcher);
}
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder,
boolean topScorer) throws IOException {
return new AllTermSpanScorer((TermSpans) query.getSpans(reader), this, similarity, reader.norms(query.getField()));
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
if (this.stats == null) {
return null;
}
AtomicReader reader = context.reader();
SloppySimScorer sloppySimScorer = similarity.sloppySimScorer(stats, context);
return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer);
}
protected class AllTermSpanScorer extends SpanScorer {
// TODO: is this the best way to allocate this?
protected byte[] payload = new byte[4];
protected TermPositions positions;
protected DocsAndPositionsEnum positions;
protected float payloadScore;
protected int payloadsSeen;
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException {
super(spans, weight, similarity, norms);
positions = spans.getPositions();
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
positions = spans.getPostings();
}
@Override
@ -88,12 +98,11 @@ public class AllTermQuery extends SpanTermQuery {
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
Similarity similarity1 = getSimilarity();
while (more && doc == spans.doc()) {
int matchLength = spans.end() - spans.start();
freq += similarity1.sloppyFreq(matchLength);
processPayload(similarity1);
freq += docScorer.computeSlopFactor(matchLength);
processPayload();
more = spans.next();// this moves positions to the next match in this
// document
@ -101,10 +110,10 @@ public class AllTermQuery extends SpanTermQuery {
return more || (freq != 0);
}
protected void processPayload(Similarity similarity) throws IOException {
if (positions.isPayloadAvailable()) {
payload = positions.getPayload(payload, 0);
payloadScore += decodeFloat(payload);
protected void processPayload() throws IOException {
final BytesRef payload;
if ((payload = positions.getPayload()) != null) {
payloadScore += decodeFloat(payload.bytes, payload.offset);
payloadsSeen++;
} else {
@ -141,27 +150,40 @@ public class AllTermQuery extends SpanTermQuery {
return payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1;
}
@Override
protected Explanation explain(final int doc) throws IOException {
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
AllTermSpanScorer scorer = (AllTermSpanScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation inner = new ComplexExplanation();
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
inner.addDetail(scoreExplanation);
inner.setValue(scoreExplanation.getValue());
inner.setMatch(true);
ComplexExplanation result = new ComplexExplanation();
Explanation nonPayloadExpl = super.explain(doc);
result.addDetail(nonPayloadExpl);
// QUESTION: Is there a way to avoid this skipTo call? We need to know
// whether to load the payload or not
result.addDetail(inner);
Explanation payloadBoost = new Explanation();
result.addDetail(payloadBoost);
float payloadScore = getPayloadScore();
final float payloadScore = scorer.getPayloadScore();
payloadBoost.setValue(payloadScore);
// GSI: I suppose we could toString the payload, but I don't think that
// would be a good idea
payloadBoost.setDescription("allPayload(...)");
result.setValue(nonPayloadExpl.getValue() * payloadScore);
result.setValue(inner.getValue() * payloadScore);
result.setDescription("btq, product of:");
result.setMatch(nonPayloadExpl.getValue() == 0 ? Boolean.FALSE : Boolean.TRUE); // LUCENE-1303
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
}

View File

@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@ -35,8 +35,10 @@ import static org.apache.lucene.analysis.payloads.PayloadHelper.encodeFloat;
public final class AllTokenStream extends TokenFilter {
public static TokenStream allTokenStream(String allFieldName, AllEntries allEntries, Analyzer analyzer) throws IOException {
return new AllTokenStream(analyzer.reusableTokenStream(allFieldName, allEntries), allEntries);
return new AllTokenStream(analyzer.tokenStream(allFieldName, allEntries), allEntries);
}
private final BytesRef payloadSpare = new BytesRef(new byte[4]);
private final AllEntries allEntries;
@ -60,7 +62,8 @@ public final class AllTokenStream extends TokenFilter {
if (allEntries.current() != null) {
float boost = allEntries.current().boost();
if (boost != 1.0f) {
payloadAttribute.setPayload(new Payload(encodeFloat(boost)));
encodeFloat(boost, payloadSpare.bytes, payloadSpare.offset);
payloadAttribute.setPayload(payloadSpare);
} else {
payloadAttribute.setPayload(null);
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.lucene.docset;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
@ -58,9 +59,9 @@ public class DocIdSetCollector extends Collector {
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
base = docBase;
collector.setNextReader(reader, docBase);
public void setNextReader(AtomicReaderContext ctx) throws IOException {
base = ctx.docBase;
collector.setNextReader(ctx);
}
@Override

View File

@ -0,0 +1,63 @@
package org.elasticsearch.common.lucene.document;
import org.apache.lucene.document.*;
import org.apache.lucene.index.FieldInfo;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
/**
*
*/
public abstract class AbstractMultipleFieldsVisitor extends BaseFieldVisitor {
protected Document doc = new Document();
@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
doc.add(new StoredField(fieldInfo.name, value));
}
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setIndexed(fieldInfo.isIndexed());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
doc.add(new Field(fieldInfo.name, value, ft));
}
@Override
public void intField(FieldInfo fieldInfo, int value) {
doc.add(new StoredField(fieldInfo.name, value));
}
@Override
public void longField(FieldInfo fieldInfo, long value) {
doc.add(new StoredField(fieldInfo.name, value));
}
@Override
public void floatField(FieldInfo fieldInfo, float value) {
doc.add(new StoredField(fieldInfo.name, value));
}
@Override
public void doubleField(FieldInfo fieldInfo, double value) {
doc.add(new StoredField(fieldInfo.name, value));
}
@Override
public Document createDocument() {
return doc;
}
@Override
public void reset() {
if (!doc.getFields().isEmpty()) {
doc = new Document();
}
}
}

View File

@ -0,0 +1,14 @@
package org.elasticsearch.common.lucene.document;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.StoredFieldVisitor;
public abstract class BaseFieldVisitor extends StoredFieldVisitor {
// LUCENE 4 UPGRADE: Added for now to make everything work. Want to make use of Document as less as possible.
public abstract Document createDocument();
// LUCENE 4 UPGRADE: Added for now for compatibility with Selectors
public abstract void reset();
}

View File

@ -1,29 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.document;
import org.apache.lucene.document.FieldSelector;
/**
*/
public interface ResetFieldSelector extends FieldSelector {
void reset();
}

View File

@ -19,19 +19,25 @@
package org.elasticsearch.common.lucene.document;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.FieldInfo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
*
*/
public class SingleFieldSelector implements ResetFieldSelector {
public class SingleFieldVisitor extends AbstractMultipleFieldsVisitor {
private String name;
public SingleFieldSelector() {
public SingleFieldVisitor() {
}
public SingleFieldSelector(String name) {
public SingleFieldVisitor(String name) {
this.name = name;
}
@ -40,14 +46,10 @@ public class SingleFieldSelector implements ResetFieldSelector {
}
@Override
public FieldSelectorResult accept(String fieldName) {
if (name.equals(fieldName)) {
return FieldSelectorResult.LOAD;
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (name.equals(fieldInfo.name)) {
return Status.YES;
}
return FieldSelectorResult.NO_LOAD;
}
@Override
public void reset() {
return Status.NO;
}
}

View File

@ -20,9 +20,11 @@
package org.elasticsearch.common.lucene.search;
import com.google.common.collect.Lists;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.AndDocIdSet;
import org.elasticsearch.common.lucene.docset.AndDocSet;
import org.elasticsearch.common.lucene.docset.DocSet;
@ -46,14 +48,16 @@ public class AndFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
if (filters.size() == 1) {
return filters.get(0).getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave this null, until we figure out how to deal with deleted docs...
return filters.get(0).getDocIdSet(context, null);
}
List sets = Lists.newArrayListWithExpectedSize(filters.size());
boolean allAreDocSet = true;
for (Filter filter : filters) {
DocIdSet set = filter.getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave this null, until we figure out how to deal with deleted docs...
DocIdSet set = filter.getDocIdSet(context, null);
if (set == null) { // none matching for this filter, we AND, so return EMPTY
return DocSet.EMPTY_DOC_SET;
}

View File

@ -20,7 +20,7 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Weight;
import java.io.IOException;
@ -29,8 +29,8 @@ import java.io.IOException;
*/
public class EmptyScorer extends Scorer {
public EmptyScorer(Similarity similarity) {
super(similarity);
public EmptyScorer(Weight weight) {
super(weight);
}
@Override
@ -38,6 +38,11 @@ public class EmptyScorer extends Scorer {
return 0;
}
@Override
public float freq() throws IOException {
return 0;
}
@Override
public int docID() {
return NO_MORE_DOCS;

View File

@ -19,7 +19,7 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Scorer;
@ -57,9 +57,9 @@ public class FilteredCollector extends Collector {
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
collector.setNextReader(reader, docBase);
docSet = DocSets.convert(reader, filter.getDocIdSet(reader));
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
docSet = DocSets.convert(context.reader(), filter.getDocIdSet(context, null));
}
@Override

View File

@ -19,8 +19,10 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.GetDocSet;
import java.io.IOException;
@ -39,11 +41,11 @@ public class LimitFilter extends NoCacheFilter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
if (counter > limit) {
return null;
}
return new LimitDocSet(reader.maxDoc(), limit);
return new LimitDocSet(context.reader().maxDoc(), limit);
}
public class LimitDocSet extends GetDocSet {

View File

@ -19,9 +19,11 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.AllDocSet;
import java.io.IOException;
@ -34,8 +36,8 @@ import java.io.IOException;
public class MatchAllDocsFilter extends Filter {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return new AllDocSet(reader.maxDoc());
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return new AllDocSet(context.reader().maxDoc());
}
@Override

View File

@ -19,9 +19,11 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@ -31,7 +33,7 @@ import java.io.IOException;
public class MatchNoDocsFilter extends Filter {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return null;
}

View File

@ -19,9 +19,11 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.Set;
@ -43,20 +45,6 @@ public final class MatchNoDocsQuery extends Query {
* Weight implementation that matches no documents.
*/
private class MatchNoDocsWeight extends Weight {
/**
* The similarity implementation.
*/
private final Similarity similarity;
/**
* Creates a new weight that matches nothing.
*
* @param searcher the search to match for
*/
public MatchNoDocsWeight(final Searcher searcher) {
this.similarity = searcher.getSimilarity();
}
@Override
public String toString() {
@ -69,36 +57,29 @@ public final class MatchNoDocsQuery extends Query {
}
@Override
public float getValue() {
public float getValueForNormalization() throws IOException {
return 0;
}
@Override
public float sumOfSquaredWeights() {
return 0;
public void normalize(float norm, float topLevelBoost) {
}
@Override
public void normalize(final float queryNorm) {
}
@Override
public Scorer scorer(final IndexReader reader,
final boolean scoreDocsInOrder,
final boolean topScorer) throws IOException {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
return null;
}
@Override
public Explanation explain(final IndexReader reader,
public Explanation explain(final AtomicReaderContext context,
final int doc) {
return new ComplexExplanation(false, 0, "MatchNoDocs matches nothing");
}
}
@Override
public Weight createWeight(final Searcher searcher) {
return new MatchNoDocsWeight(searcher);
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new MatchNoDocsWeight();
}
@Override

View File

@ -22,7 +22,10 @@ package org.elasticsearch.common.lucene.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.elasticsearch.common.io.FastStringReader;
import java.io.IOException;
@ -35,7 +38,7 @@ public class MoreLikeThisQuery extends Query {
public static final float DEFAULT_PERCENT_TERMS_TO_MATCH = 0.3f;
private Similarity similarity;
private TFIDFSimilarity similarity;
private String likeText;
private String[] moreLikeFields;
@ -77,7 +80,8 @@ public class MoreLikeThisQuery extends Query {
mlt.setStopWords(stopWords);
mlt.setBoost(boostTerms);
mlt.setBoostFactor(boostTermsFactor);
BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText));
//LUCENE 4 UPGRADE this mapps the 3.6 behavior (only use the first field)
BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText), moreLikeFields[0]);
BooleanClause[] clauses = bq.getClauses();
bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));
@ -112,7 +116,10 @@ public class MoreLikeThisQuery extends Query {
}
public void setSimilarity(Similarity similarity) {
this.similarity = similarity;
if (similarity == null || similarity instanceof TFIDFSimilarity) {
//LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it
this.similarity = (TFIDFSimilarity) similarity;
}
}
public Analyzer getAnalyzer() {

View File

@ -19,11 +19,12 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import gnu.trove.set.hash.THashSet;
import org.apache.lucene.index.*;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
@ -137,7 +138,7 @@ public class MultiPhrasePrefixQuery extends Query {
}
Term[] suffixTerms = termArrays.get(sizeMinus1);
int position = positions.get(sizeMinus1);
List<Term> terms = new ArrayList<Term>();
Set<Term> terms = new THashSet<Term>();
for (Term term : suffixTerms) {
getPrefixTerms(terms, term, reader);
if (terms.size() > maxExpansions) {
@ -151,24 +152,33 @@ public class MultiPhrasePrefixQuery extends Query {
return query.rewrite(reader);
}
private void getPrefixTerms(List<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
TermEnum enumerator = reader.terms(prefix);
try {
do {
Term term = enumerator.term();
if (term != null
&& term.text().startsWith(prefix.text())
&& term.field().equals(field)) {
terms.add(term);
} else {
private void getPrefixTerms(Set<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
// SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
// instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
TermsEnum termsEnum = null;
List<AtomicReaderContext> leaves = reader.leaves();
for (AtomicReaderContext leaf : leaves) {
Terms _terms = leaf.reader().terms(field);
if (_terms == null) {
continue;
}
termsEnum = _terms.iterator(termsEnum);
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
if (TermsEnum.SeekStatus.END == seekStatus) {
continue;
}
for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
if (!StringHelper.startsWith(term, prefix.bytes())) {
break;
}
terms.add(new Term(field, BytesRef.deepCopyOf(term)));
if (terms.size() >= maxExpansions) {
break;
return;
}
} while (enumerator.next());
} finally {
enumerator.close();
}
}
}

View File

@ -19,7 +19,7 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
@ -41,7 +41,7 @@ public class NoopCollector extends Collector {
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
public void setNextReader(AtomicReaderContext context) throws IOException {
}
@Override

View File

@ -19,11 +19,13 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@ -39,15 +41,15 @@ public class NotDeletedFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
DocIdSet docIdSet = filter.getDocIdSet(reader);
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
DocIdSet docIdSet = filter.getDocIdSet(context, acceptDocs);
if (docIdSet == null) {
return null;
}
if (!reader.hasDeletions()) {
if (!context.reader().hasDeletions()) {
return docIdSet;
}
return new NotDeletedDocIdSet(docIdSet, reader);
return new NotDeletedDocIdSet(docIdSet, context.reader().getLiveDocs());
}
public Filter filter() {
@ -63,11 +65,11 @@ public class NotDeletedFilter extends Filter {
private final DocIdSet innerSet;
private final IndexReader reader;
private final Bits liveDocs;
NotDeletedDocIdSet(DocIdSet innerSet, IndexReader reader) {
NotDeletedDocIdSet(DocIdSet innerSet, Bits liveDocs) {
this.innerSet = innerSet;
this.reader = reader;
this.liveDocs = liveDocs;
}
@Override
@ -76,22 +78,22 @@ public class NotDeletedFilter extends Filter {
if (iterator == null) {
return null;
}
return new NotDeletedDocIdSetIterator(iterator, reader);
return new NotDeletedDocIdSetIterator(iterator, liveDocs);
}
}
static class NotDeletedDocIdSetIterator extends FilteredDocIdSetIterator {
private final IndexReader reader;
private final Bits liveDocs;
NotDeletedDocIdSetIterator(DocIdSetIterator innerIter, IndexReader reader) {
NotDeletedDocIdSetIterator(DocIdSetIterator innerIter, Bits liveDocs) {
super(innerIter);
this.reader = reader;
this.liveDocs = liveDocs;
}
@Override
protected boolean match(int doc) throws IOException {
return !reader.isDeleted(doc);
protected boolean match(int doc) {
return liveDocs == null || liveDocs.get(doc);
}
}
}

View File

@ -19,9 +19,11 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.AllDocSet;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.NotDocIdSet;
@ -45,15 +47,16 @@ public class NotFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
DocIdSet set = filter.getDocIdSet(reader);
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
DocIdSet set = filter.getDocIdSet(context, null);
if (set == null) {
return new AllDocSet(reader.maxDoc());
return new AllDocSet(context.reader().maxDoc());
}
if (set instanceof DocSet) {
return new NotDocSet((DocSet) set, reader.maxDoc());
return new NotDocSet((DocSet) set, context.reader().maxDoc());
}
return new NotDocIdSet(set, reader.maxDoc());
return new NotDocIdSet(set, context.reader().maxDoc());
}
@Override

View File

@ -20,9 +20,11 @@
package org.elasticsearch.common.lucene.search;
import com.google.common.collect.Lists;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.OrDocIdSet;
import org.elasticsearch.common.lucene.docset.OrDocSet;
@ -46,14 +48,16 @@ public class OrFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
if (filters.size() == 1) {
return filters.get(0).getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
return filters.get(0).getDocIdSet(context, null);
}
List sets = Lists.newArrayListWithExpectedSize(filters.size());
boolean allAreDocSet = true;
for (Filter filter : filters) {
DocIdSet set = filter.getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
DocIdSet set = filter.getDocIdSet(context, null);
if (set == null) { // none matching for this filter, continue
continue;
}

View File

@ -19,13 +19,11 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.*;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
@ -45,26 +43,25 @@ public class TermFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
FixedBitSet result = null;
TermDocs td = reader.termDocs();
try {
td.seek(term);
// batch read, in Lucene 4.0 its no longer needed
int[] docs = new int[Lucene.BATCH_ENUM_DOCS];
int[] freqs = new int[Lucene.BATCH_ENUM_DOCS];
int number = td.read(docs, freqs);
if (number > 0) {
result = new FixedBitSet(reader.maxDoc());
while (number > 0) {
for (int i = 0; i < number; i++) {
result.set(docs[i]);
}
number = td.read(docs, freqs);
}
}
} finally {
td.close();
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
Terms terms = context.reader().terms(term.field());
if (terms == null) {
return null;
}
TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(term.bytes(), false)) {
return null;
}
DocsEnum docsEnum = termsEnum.docs(acceptDocs, null);
int docId = docsEnum.nextDoc();
if (docId == DocsEnum.NO_MORE_DOCS) {
return null;
}
final FixedBitSet result = new FixedBitSet(context.reader().maxDoc());
for (; docId < DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
result.set(docId);
}
return result;
}

View File

@ -19,11 +19,12 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.DocSets;
@ -41,9 +42,9 @@ public class XBooleanFilter extends Filter {
ArrayList<Filter> notFilters = null;
ArrayList<Filter> mustFilters = null;
private DocIdSet getDISI(ArrayList<Filter> filters, int index, IndexReader reader)
private DocIdSet getDISI(ArrayList<Filter> filters, int index, AtomicReaderContext context, Bits acceptedDocs)
throws IOException {
DocIdSet docIdSet = filters.get(index).getDocIdSet(reader);
DocIdSet docIdSet = filters.get(index).getDocIdSet(context, acceptedDocs);
if (docIdSet == DocIdSet.EMPTY_DOCIDSET || docIdSet == DocSet.EMPTY_DOC_SET) {
return null;
}
@ -67,23 +68,26 @@ public class XBooleanFilter extends Filter {
* of the filters that have been added.
*/
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException {
FixedBitSet res = null;
if (mustFilters == null && notFilters == null && shouldFilters != null && shouldFilters.size() == 1) {
return shouldFilters.get(0).getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
return shouldFilters.get(0).getDocIdSet(context, null);
}
if (shouldFilters == null && notFilters == null && mustFilters != null && mustFilters.size() == 1) {
return mustFilters.get(0).getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
return mustFilters.get(0).getDocIdSet(context, null);
}
if (shouldFilters != null) {
for (int i = 0; i < shouldFilters.size(); i++) {
final DocIdSet disi = getDISI(shouldFilters, i, reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
final DocIdSet disi = getDISI(shouldFilters, i, context, null);
if (disi == null) continue;
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
res = new FixedBitSet(context.reader().maxDoc());
}
DocSets.or(res, disi);
}
@ -98,10 +102,11 @@ public class XBooleanFilter extends Filter {
if (notFilters != null) {
for (int i = 0; i < notFilters.size(); i++) {
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
res = new FixedBitSet(context.reader().maxDoc());
res.set(0, context.reader().maxDoc()); // NOTE: may set bits on deleted docs
}
final DocIdSet disi = getDISI(notFilters, i, reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
final DocIdSet disi = getDISI(notFilters, i, context, null);
if (disi != null) {
DocSets.andNot(res, disi);
}
@ -110,12 +115,13 @@ public class XBooleanFilter extends Filter {
if (mustFilters != null) {
for (int i = 0; i < mustFilters.size(); i++) {
final DocIdSet disi = getDISI(mustFilters, i, reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
final DocIdSet disi = getDISI(mustFilters, i, context, null);
if (disi == null) {
return null;
}
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
res = new FixedBitSet(context.reader().maxDoc());
DocSets.or(res, disi);
} else {
DocSets.and(res, disi);
@ -219,10 +225,10 @@ public class XBooleanFilter extends Filter {
private void appendFilters(ArrayList<Filter> filters, String occurString, StringBuilder buffer) {
if (filters != null) {
for (int i = 0; i < filters.size(); i++) {
for (Filter filter : filters) {
buffer.append(' ');
buffer.append(occurString);
buffer.append(filters.get(i).toString());
buffer.append(filter.toString());
}
}
}

View File

@ -19,7 +19,7 @@
package org.elasticsearch.common.lucene.search.function;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Explanation;
/**
@ -39,7 +39,7 @@ public class BoostScoreFunction implements ScoreFunction {
}
@Override
public void setNextReader(IndexReader reader) {
public void setNextReader(AtomicReaderContext context) {
// nothing to do here...
}

View File

@ -19,9 +19,11 @@
package org.elasticsearch.common.lucene.search.function;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.DocSets;
@ -73,13 +75,11 @@ public class FiltersFunctionScoreQuery extends Query {
final FilterFunction[] filterFunctions;
final ScoreMode scoreMode;
final float maxBoost;
DocSet[] docSets;
public FiltersFunctionScoreQuery(Query subQuery, ScoreMode scoreMode, FilterFunction[] filterFunctions, float maxBoost) {
this.subQuery = subQuery;
this.scoreMode = scoreMode;
this.filterFunctions = filterFunctions;
this.docSets = new DocSet[filterFunctions.length];
this.maxBoost = maxBoost;
}
@ -106,72 +106,69 @@ public class FiltersFunctionScoreQuery extends Query {
}
@Override
public Weight createWeight(Searcher searcher) throws IOException {
return new CustomBoostFactorWeight(searcher);
public Weight createWeight(IndexSearcher searcher) throws IOException {
Weight subQueryWeight = subQuery.createWeight(searcher);
return new CustomBoostFactorWeight(subQueryWeight, filterFunctions.length);
}
class CustomBoostFactorWeight extends Weight {
Searcher searcher;
Weight subQueryWeight;
public CustomBoostFactorWeight(Searcher searcher) throws IOException {
this.searcher = searcher;
this.subQueryWeight = subQuery.weight(searcher);
final Weight subQueryWeight;
final DocSet[] docSets;
public CustomBoostFactorWeight(Weight subQueryWeight, int filterFunctionLength) throws IOException {
this.subQueryWeight = subQueryWeight;
this.docSets = new DocSet[filterFunctionLength];
}
public Query getQuery() {
return FiltersFunctionScoreQuery.this;
}
public float getValue() {
return getBoost();
}
@Override
public float sumOfSquaredWeights() throws IOException {
float sum = subQueryWeight.sumOfSquaredWeights();
public float getValueForNormalization() throws IOException {
float sum = subQueryWeight.getValueForNormalization();
sum *= getBoost() * getBoost();
return sum;
}
@Override
public void normalize(float norm) {
norm *= getBoost();
subQueryWeight.normalize(norm);
public void normalize(float norm, float topLevelBoost) {
subQueryWeight.normalize(norm, topLevelBoost * getBoost());
}
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
Scorer subQueryScorer = subQueryWeight.scorer(reader, scoreDocsInOrder, false);
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
if (subQueryScorer == null) {
return null;
}
for (int i = 0; i < filterFunctions.length; i++) {
FilterFunction filterFunction = filterFunctions[i];
filterFunction.function.setNextReader(reader);
docSets[i] = DocSets.convert(reader, filterFunction.filter.getDocIdSet(reader));
filterFunction.function.setNextReader(context);
docSets[i] = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context, acceptDocs));
}
return new CustomBoostFactorScorer(getSimilarity(searcher), this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets);
return new CustomBoostFactorScorer(this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets);
}
@Override
public Explanation explain(IndexReader reader, int doc) throws IOException {
Explanation subQueryExpl = subQueryWeight.explain(reader, doc);
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Explanation subQueryExpl = subQueryWeight.explain(context, doc);
if (!subQueryExpl.isMatch()) {
return subQueryExpl;
}
if (scoreMode == ScoreMode.First) {
for (FilterFunction filterFunction : filterFunctions) {
DocSet docSet = DocSets.convert(reader, filterFunction.filter.getDocIdSet(reader));
DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context, context.reader().getLiveDocs()));
if (docSet.get(doc)) {
filterFunction.function.setNextReader(reader);
filterFunction.function.setNextReader(context);
Explanation functionExplanation = filterFunction.function.explainFactor(doc);
float sc = getValue() * subQueryExpl.getValue() * functionExplanation.getValue();
float sc = getBoost() * subQueryExpl.getValue() * functionExplanation.getValue();
Explanation filterExplanation = new ComplexExplanation(true, sc, "custom score, product of:");
filterExplanation.addDetail(new Explanation(1.0f, "match filter: " + filterFunction.filter.toString()));
filterExplanation.addDetail(functionExplanation);
filterExplanation.addDetail(new Explanation(getValue(), "queryBoost"));
filterExplanation.addDetail(new Explanation(getBoost(), "queryBoost"));
// top level score = subquery.score * filter.score (this already has the query boost)
float topLevelScore = subQueryExpl.getValue() * sc;
@ -189,9 +186,9 @@ public class FiltersFunctionScoreQuery extends Query {
float min = Float.POSITIVE_INFINITY;
ArrayList<Explanation> filtersExplanations = new ArrayList<Explanation>();
for (FilterFunction filterFunction : filterFunctions) {
DocSet docSet = DocSets.convert(reader, filterFunction.filter.getDocIdSet(reader));
DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context, context.reader().getLiveDocs()));
if (docSet.get(doc)) {
filterFunction.function.setNextReader(reader);
filterFunction.function.setNextReader(context);
Explanation functionExplanation = filterFunction.function.explainFactor(doc);
float factor = functionExplanation.getValue();
count++;
@ -202,7 +199,7 @@ public class FiltersFunctionScoreQuery extends Query {
Explanation res = new ComplexExplanation(true, factor, "custom score, product of:");
res.addDetail(new Explanation(1.0f, "match filter: " + filterFunction.filter.toString()));
res.addDetail(functionExplanation);
res.addDetail(new Explanation(getValue(), "queryBoost"));
res.addDetail(new Explanation(getBoost(), "queryBoost"));
filtersExplanations.add(res);
}
}
@ -229,7 +226,7 @@ public class FiltersFunctionScoreQuery extends Query {
if (factor > maxBoost) {
factor = maxBoost;
}
float sc = factor * subQueryExpl.getValue() * getValue();
float sc = factor * subQueryExpl.getValue() * getBoost();
Explanation res = new ComplexExplanation(true, sc, "custom score, score mode [" + scoreMode.toString().toLowerCase() + "]");
res.addDetail(subQueryExpl);
for (Explanation explanation : filtersExplanations) {
@ -239,27 +236,28 @@ public class FiltersFunctionScoreQuery extends Query {
}
}
float sc = getValue() * subQueryExpl.getValue();
float sc = getBoost() * subQueryExpl.getValue();
Explanation res = new ComplexExplanation(true, sc, "custom score, no filter match, product of:");
res.addDetail(subQueryExpl);
res.addDetail(new Explanation(getValue(), "queryBoost"));
res.addDetail(new Explanation(getBoost(), "queryBoost"));
return res;
}
}
static class CustomBoostFactorScorer extends Scorer {
private final float subQueryWeight;
private final float subQueryBoost;
private final Scorer scorer;
private final FilterFunction[] filterFunctions;
private final ScoreMode scoreMode;
private final float maxBoost;
private final DocSet[] docSets;
private CustomBoostFactorScorer(Similarity similarity, CustomBoostFactorWeight w, Scorer scorer,
ScoreMode scoreMode, FilterFunction[] filterFunctions, float maxBoost, DocSet[] docSets) throws IOException {
super(similarity);
this.subQueryWeight = w.getValue();
private CustomBoostFactorScorer(CustomBoostFactorWeight w, Scorer scorer, ScoreMode scoreMode,
FilterFunction[] filterFunctions, float maxBoost, DocSet[] docSets) throws IOException {
super(w);
this.subQueryBoost = w.getQuery().getBoost();
this.scorer = scorer;
this.scoreMode = scoreMode;
this.filterFunctions = filterFunctions;
@ -339,7 +337,12 @@ public class FiltersFunctionScoreQuery extends Query {
factor = maxBoost;
}
float score = scorer.score();
return subQueryWeight * score * factor;
return subQueryBoost * score * factor;
}
@Override
public float freq() throws IOException {
return scorer.freq();
}
}

View File

@ -19,9 +19,11 @@
package org.elasticsearch.common.lucene.search.function;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
@ -63,76 +65,72 @@ public class FunctionScoreQuery extends Query {
}
@Override
public Weight createWeight(Searcher searcher) throws IOException {
return new CustomBoostFactorWeight(searcher);
public Weight createWeight(IndexSearcher searcher) throws IOException {
Weight subQueryWeight = subQuery.createWeight(searcher);
return new CustomBoostFactorWeight(subQueryWeight);
}
class CustomBoostFactorWeight extends Weight {
Searcher searcher;
Weight subQueryWeight;
public CustomBoostFactorWeight(Searcher searcher) throws IOException {
this.searcher = searcher;
this.subQueryWeight = subQuery.weight(searcher);
final Weight subQueryWeight;
public CustomBoostFactorWeight(Weight subQueryWeight) throws IOException {
this.subQueryWeight = subQueryWeight;
}
public Query getQuery() {
return FunctionScoreQuery.this;
}
public float getValue() {
return getBoost();
}
@Override
public float sumOfSquaredWeights() throws IOException {
float sum = subQueryWeight.sumOfSquaredWeights();
public float getValueForNormalization() throws IOException {
float sum = subQueryWeight.getValueForNormalization();
sum *= getBoost() * getBoost();
return sum;
}
@Override
public void normalize(float norm) {
norm *= getBoost();
subQueryWeight.normalize(norm);
public void normalize(float norm, float topLevelBoost) {
subQueryWeight.normalize(norm, topLevelBoost * getBoost());
}
@Override
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
Scorer subQueryScorer = subQueryWeight.scorer(reader, scoreDocsInOrder, false);
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
if (subQueryScorer == null) {
return null;
}
function.setNextReader(reader);
return new CustomBoostFactorScorer(getSimilarity(searcher), this, subQueryScorer, function);
function.setNextReader(context);
return new CustomBoostFactorScorer(this, subQueryScorer, function);
}
@Override
public Explanation explain(IndexReader reader, int doc) throws IOException {
Explanation subQueryExpl = subQueryWeight.explain(reader, doc);
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Explanation subQueryExpl = subQueryWeight.explain(context, doc);
if (!subQueryExpl.isMatch()) {
return subQueryExpl;
}
function.setNextReader(reader);
function.setNextReader(context);
Explanation functionExplanation = function.explainScore(doc, subQueryExpl);
float sc = getValue() * functionExplanation.getValue();
float sc = getBoost() * functionExplanation.getValue();
Explanation res = new ComplexExplanation(true, sc, "custom score, product of:");
res.addDetail(functionExplanation);
res.addDetail(new Explanation(getValue(), "queryBoost"));
res.addDetail(new Explanation(getBoost(), "queryBoost"));
return res;
}
}
static class CustomBoostFactorScorer extends Scorer {
private final float subQueryWeight;
private final float subQueryBoost;
private final Scorer scorer;
private final ScoreFunction function;
private CustomBoostFactorScorer(Similarity similarity, CustomBoostFactorWeight w, Scorer scorer, ScoreFunction function) throws IOException {
super(similarity);
this.subQueryWeight = w.getValue();
private CustomBoostFactorScorer(CustomBoostFactorWeight w, Scorer scorer, ScoreFunction function) throws IOException {
super(w);
this.subQueryBoost = w.getQuery().getBoost();
this.scorer = scorer;
this.function = function;
}
@ -154,7 +152,12 @@ public class FunctionScoreQuery extends Query {
@Override
public float score() throws IOException {
return subQueryWeight * function.score(scorer.docID(), scorer.score());
return subQueryBoost * function.score(scorer.docID(), scorer.score());
}
@Override
public float freq() throws IOException {
return scorer.freq();
}
}

View File

@ -19,7 +19,7 @@
package org.elasticsearch.common.lucene.search.function;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Explanation;
/**
@ -27,7 +27,7 @@ import org.apache.lucene.search.Explanation;
*/
public interface ScoreFunction {
void setNextReader(IndexReader reader);
void setNextReader(AtomicReaderContext context);
float score(int docId, float subQueryScore);

View File

@ -5,7 +5,6 @@ import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.geo.GeoShapeConstants;
@ -56,12 +55,13 @@ public abstract class SpatialStrategy {
* @param shape Shape to convert ints its indexable format
* @return Fieldable for indexing the Shape
*/
public Fieldable createField(Shape shape) {
public Field createField(Shape shape) {
int detailLevel = prefixTree.getLevelForDistance(
calcDistanceFromErrPct(shape, distanceErrorPct, GeoShapeConstants.SPATIAL_CONTEXT));
List<Node> nodes = prefixTree.getNodes(shape, detailLevel, true);
NodeTokenStream tokenStream = nodeTokenStream.get();
tokenStream.setNodes(nodes);
// LUCENE 4 Upgrade: We should pass in the FieldType and use it here
return new Field(fieldName.indexName(), tokenStream);
}

View File

@ -1,185 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.store;
import com.google.common.collect.ImmutableSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.elasticsearch.index.store.support.ForceSyncDirectory;
import java.io.IOException;
import java.util.*;
/**
* A Directory instance that switches files between
* two other Directory instances.
* <p/>
* <p>Files with the specified extensions are placed in the
* primary directory; others are placed in the secondary
* directory.
*
*
*/
public class SwitchDirectory extends Directory implements ForceSyncDirectory {
private final Directory secondaryDir;
private final Directory primaryDir;
private final ImmutableSet<String> primaryExtensions;
private boolean doClose;
public SwitchDirectory(Set<String> primaryExtensions, Directory primaryDir, Directory secondaryDir, boolean doClose) {
this.primaryExtensions = ImmutableSet.copyOf(primaryExtensions);
this.primaryDir = primaryDir;
this.secondaryDir = secondaryDir;
this.doClose = doClose;
this.lockFactory = primaryDir.getLockFactory();
}
public ImmutableSet<String> primaryExtensions() {
return primaryExtensions;
}
/**
* Return the primary directory
*/
public Directory primaryDir() {
return primaryDir;
}
/**
* Return the secondary directory
*/
public Directory secondaryDir() {
return secondaryDir;
}
@Override
public void close() throws IOException {
if (doClose) {
try {
secondaryDir.close();
} finally {
primaryDir.close();
}
doClose = false;
}
}
@Override
public String[] listAll() throws IOException {
Set<String> files = new HashSet<String>();
for (String f : primaryDir.listAll()) {
files.add(f);
}
for (String f : secondaryDir.listAll()) {
files.add(f);
}
return files.toArray(new String[files.size()]);
}
/**
* Utility method to return a file's extension.
*/
public static String getExtension(String name) {
int i = name.lastIndexOf('.');
if (i == -1) {
return "";
}
return name.substring(i + 1, name.length());
}
private Directory getDirectory(String name) {
String ext = getExtension(name);
if (primaryExtensions.contains(ext)) {
return primaryDir;
} else {
return secondaryDir;
}
}
@Override
public boolean fileExists(String name) throws IOException {
return getDirectory(name).fileExists(name);
}
@Override
public long fileModified(String name) throws IOException {
return getDirectory(name).fileModified(name);
}
@Override
public void touchFile(String name) throws IOException {
getDirectory(name).touchFile(name);
}
@Override
public void deleteFile(String name) throws IOException {
getDirectory(name).deleteFile(name);
}
@Override
public long fileLength(String name) throws IOException {
return getDirectory(name).fileLength(name);
}
@Override
public IndexOutput createOutput(String name) throws IOException {
return getDirectory(name).createOutput(name);
}
@Override
public void sync(Collection<String> names) throws IOException {
List<String> primaryNames = new ArrayList<String>();
List<String> secondaryNames = new ArrayList<String>();
for (String name : names)
if (primaryExtensions.contains(getExtension(name)))
primaryNames.add(name);
else
secondaryNames.add(name);
primaryDir.sync(primaryNames);
secondaryDir.sync(secondaryNames);
}
@Override
public void sync(String name) throws IOException {
getDirectory(name).sync(name);
}
@Override
public void forceSync(String name) throws IOException {
Directory dir = getDirectory(name);
if (dir instanceof ForceSyncDirectory) {
((ForceSyncDirectory) dir).forceSync(name);
} else {
dir.sync(name);
}
}
@Override
public IndexInput openInput(String name) throws IOException {
return getDirectory(name).openInput(name);
}
}

View File

@ -19,14 +19,19 @@
package org.elasticsearch.common.lucene.uid;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.io.Reader;
@ -34,57 +39,49 @@ import java.io.Reader;
/**
*
*/
public class UidField extends AbstractField {
// TODO: LUCENE 4 UPGRADE: Store version as doc values instead of as a payload.
public class UidField extends Field {
public static class DocIdAndVersion {
public final int docId;
public final int docStart;
public final long version;
public final IndexReader reader;
public final AtomicReaderContext reader;
public DocIdAndVersion(int docId, long version, IndexReader reader, int docStart) {
public DocIdAndVersion(int docId, long version, AtomicReaderContext reader) {
this.docId = docId;
this.version = version;
this.reader = reader;
this.docStart = docStart;
}
}
// this works fine for nested docs since they don't have the payload which has the version
// so we iterate till we find the one with the payload
public static DocIdAndVersion loadDocIdAndVersion(IndexReader subReader, int docStart, Term term) {
// LUCENE 4 UPGRADE: We can get rid of the do while loop, since there is only one _uid value (live docs are taken into account)
public static DocIdAndVersion loadDocIdAndVersion(AtomicReaderContext context, Term term) {
int docId = Lucene.NO_DOC;
TermPositions uid = null;
try {
uid = subReader.termPositions(term);
if (!uid.next()) {
DocsAndPositionsEnum uid = context.reader().termPositionsEnum(term);
if (uid == null || uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return null; // no doc
}
// Note, only master docs uid have version payload, so we can use that info to not
// take them into account
do {
docId = uid.doc();
docId = uid.docID();
uid.nextPosition();
if (!uid.isPayloadAvailable()) {
if (uid.getPayload() == null) {
continue;
}
if (uid.getPayloadLength() < 8) {
if (uid.getPayload().length < 8) {
continue;
}
byte[] payload = uid.getPayload(new byte[8], 0);
return new DocIdAndVersion(docId, Numbers.bytesToLong(payload), subReader, docStart);
} while (uid.next());
return new DocIdAndVersion(docId, -2, subReader, docStart);
byte[] payload = new byte[uid.getPayload().length];
System.arraycopy(uid.getPayload().bytes, uid.getPayload().offset, payload, 0, uid.getPayload().length);
return new DocIdAndVersion(docId, Numbers.bytesToLong(payload), context);
} while (uid.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
return new DocIdAndVersion(docId, -2, context);
} catch (Exception e) {
return new DocIdAndVersion(docId, -2, subReader, docStart);
} finally {
if (uid != null) {
try {
uid.close();
} catch (IOException e) {
// nothing to do here...
}
}
return new DocIdAndVersion(docId, -2, context);
}
}
@ -92,37 +89,30 @@ public class UidField extends AbstractField {
* Load the version for the uid from the reader, returning -1 if no doc exists, or -2 if
* no version is available (for backward comp.)
*/
public static long loadVersion(IndexReader reader, Term term) {
TermPositions uid = null;
// LUCENE 4 UPGRADE: We can get rid of the do while loop, since there is only one _uid value (live docs are taken into account)
public static long loadVersion(AtomicReaderContext context, Term term) {
try {
uid = reader.termPositions(term);
if (!uid.next()) {
DocsAndPositionsEnum uid = context.reader().termPositionsEnum(term);
if (uid == null || uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return -1;
}
// Note, only master docs uid have version payload, so we can use that info to not
// take them into account
do {
uid.nextPosition();
if (!uid.isPayloadAvailable()) {
if (uid.getPayload() == null) {
continue;
}
if (uid.getPayloadLength() < 8) {
if (uid.getPayload().length < 8) {
continue;
}
byte[] payload = uid.getPayload(new byte[8], 0);
byte[] payload = new byte[uid.getPayload().length];
System.arraycopy(uid.getPayload().bytes, uid.getPayload().offset, payload, 0, uid.getPayload().length);
return Numbers.bytesToLong(payload);
} while (uid.next());
} while (uid.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
return -2;
} catch (Exception e) {
return -2;
} finally {
if (uid != null) {
try {
uid.close();
} catch (IOException e) {
// nothing to do here...
}
}
}
}
@ -130,26 +120,13 @@ public class UidField extends AbstractField {
private long version;
private final UidPayloadTokenStream tokenStream;
public UidField(String name, String uid, long version) {
super(name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
super(name, UidFieldMapper.Defaults.UID_FIELD_TYPE);
this.uid = uid;
this.version = version;
this.indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
this.tokenStream = new UidPayloadTokenStream(this);
}
@Override
public void setIndexOptions(FieldInfo.IndexOptions indexOptions) {
// never allow to set this, since we want payload!
}
@Override
public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) {
// never allow to set this, since we want payload!
}
public String uid() {
return this.uid;
}
@ -177,7 +154,7 @@ public class UidField extends AbstractField {
}
@Override
public TokenStream tokenStreamValue() {
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
return tokenStream;
}
@ -206,7 +183,7 @@ public class UidField extends AbstractField {
}
termAtt.setLength(0);
termAtt.append(field.uid);
payloadAttribute.setPayload(new Payload(Numbers.longToBytes(field.version())));
payloadAttribute.setPayload(new BytesRef(Numbers.longToBytes(field.version())));
added = true;
return true;
}

View File

@ -61,4 +61,19 @@ public class BytesText implements Text {
public String toString() {
return string();
}
}
@Override
public int hashCode() {
return bytes().hashCode();
}
@Override
public boolean equals(Object obj) {
return bytes().equals(((Text) obj).bytes());
}
@Override
public int compareTo(Text text) {
return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes(), text.bytes());
}
}

View File

@ -87,4 +87,19 @@ public class StringAndBytesText implements Text {
public String toString() {
return string();
}
@Override
public int hashCode() {
return bytes().hashCode();
}
@Override
public boolean equals(Object obj) {
return bytes().equals(((Text) obj).bytes());
}
@Override
public int compareTo(Text text) {
return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes(), text.bytes());
}
}

View File

@ -71,4 +71,21 @@ public class StringText implements Text {
public String toString() {
return string();
}
@Override
public int hashCode() {
// we use bytes here so we can be consistent with other text implementations
return bytes().hashCode();
}
@Override
public boolean equals(Object obj) {
// we use bytes here so we can be consistent with other text implementations
return bytes().equals(((Text) obj).bytes());
}
@Override
public int compareTo(Text text) {
return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes(), text.bytes());
}
}

View File

@ -26,7 +26,7 @@ import org.elasticsearch.common.bytes.BytesReference;
* so we can represent it in a more optimized manner in memory as well as serializing it over the
* network as well as converting it to json format.
*/
public interface Text {
public interface Text extends Comparable<Text> {
/**
* Are bytes available without the need to be converted into bytes when calling {@link #bytes()}.

View File

@ -0,0 +1,58 @@
package org.elasticsearch.common.text;
import org.elasticsearch.common.bytes.BytesReference;
import java.util.Comparator;
// LUCENE 4 UPGRADE: Is this the right way of comparing bytesreferences inside Text instances?
// Copied from Lucene's BytesRef comparator
public class UTF8SortedAsUnicodeComparator implements Comparator<BytesReference> {
public final static Comparator<BytesReference> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
// Only singleton
private UTF8SortedAsUnicodeComparator() {
}
public int compare(BytesReference a, BytesReference b) {
if (a.hasArray() && b.hasArray()) {
final byte[] aBytes = a.array();
int aUpto = a.arrayOffset();
final byte[] bBytes = b.array();
int bUpto = b.arrayOffset();
final int aStop = aUpto + Math.min(a.length(), b.length());
while (aUpto < aStop) {
int aByte = aBytes[aUpto++] & 0xff;
int bByte = bBytes[bUpto++] & 0xff;
int diff = aByte - bByte;
if (diff != 0) {
return diff;
}
}
// One is a prefix of the other, or, they are equal:
return a.length() - b.length();
} else {
final byte[] aBytes = a.toBytes();
int aUpto = 0;
final byte[] bBytes = b.toBytes();
int bUpto = 0;
final int aStop = aUpto + Math.min(a.length(), b.length());
while (aUpto < aStop) {
int aByte = aBytes[aUpto++] & 0xff;
int bByte = bBytes[bUpto++] & 0xff;
int diff = aByte - bByte;
if (diff != 0) {
return diff;
}
}
// One is a prefix of the other, or, they are equal:
return a.length() - b.length();
}
}
}

View File

@ -20,6 +20,7 @@
package org.elasticsearch.common.xcontent;
import com.google.common.base.Charsets;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray;
@ -526,6 +527,12 @@ public final class XContentBuilder implements BytesStream {
return this;
}
public XContentBuilder field(XContentBuilderString name, BytesRef value) throws IOException {
field(name);
generator.writeUTF8String(value.bytes, value.offset, value.length);
return this;
}
public XContentBuilder field(String name, Text value) throws IOException {
field(name);
if (value.hasBytes() && value.bytes().hasArray()) {

View File

@ -19,6 +19,8 @@
package org.elasticsearch.common.xcontent;
import org.apache.lucene.util.BytesRef;
import java.io.Closeable;
import java.io.IOException;
import java.util.Map;
@ -129,6 +131,10 @@ public interface XContentParser extends Closeable {
String textOrNull() throws IOException;
BytesRef bytesOrNull() throws IOException;
BytesRef bytes() throws IOException;
boolean hasTextCharacters();
char[] textCharacters() throws IOException;

Some files were not shown because too many files have changed in this diff Show More