Upgrade to Lucene 4.4
This commit is contained in:
parent
92a7030558
commit
2e9851138e
2
pom.xml
2
pom.xml
|
@ -30,7 +30,7 @@
|
|||
</parent>
|
||||
|
||||
<properties>
|
||||
<lucene.version>4.3.1</lucene.version>
|
||||
<lucene.version>4.4.0</lucene.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
|
|
|
@ -1,214 +0,0 @@
|
|||
package org.apache.lucene.analysis.ngram;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.util.XCharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Tokenizes the given token into n-grams of given size(s).
|
||||
* <p>
|
||||
* This {@link TokenFilter} create n-grams from the beginning edge or ending edge of a input token.
|
||||
* <p><a name="version"/>As of Lucene 4.4, this filter does not support
|
||||
* {@link Side#BACK} (you can use {@link ReverseStringFilter} up-front and
|
||||
* afterward to get the same behavior), handles supplementary characters
|
||||
* correctly and does not update offsets anymore.
|
||||
*/
|
||||
public final class XEdgeNGramTokenFilter extends TokenFilter {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
public static final Side DEFAULT_SIDE = Side.FRONT;
|
||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
||||
|
||||
/** Specifies which side of the input the n-gram should be generated from */
|
||||
public static enum Side {
|
||||
|
||||
/** Get the n-gram from the front of the input */
|
||||
FRONT {
|
||||
@Override
|
||||
public String getLabel() { return "front"; }
|
||||
},
|
||||
|
||||
/** Get the n-gram from the end of the input */
|
||||
@Deprecated
|
||||
BACK {
|
||||
@Override
|
||||
public String getLabel() { return "back"; }
|
||||
};
|
||||
|
||||
public abstract String getLabel();
|
||||
|
||||
// Get the appropriate Side from a string
|
||||
public static Side getSide(String sideName) {
|
||||
if (FRONT.getLabel().equals(sideName)) {
|
||||
return FRONT;
|
||||
}
|
||||
if (BACK.getLabel().equals(sideName)) {
|
||||
return BACK;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private final XCharacterUtils charUtils;
|
||||
private final int minGram;
|
||||
private final int maxGram;
|
||||
private Side side;
|
||||
private char[] curTermBuffer;
|
||||
private int curTermLength;
|
||||
private int curCodePointCount;
|
||||
private int curGramSize;
|
||||
private int tokStart;
|
||||
private int tokEnd;
|
||||
private int savePosIncr;
|
||||
private int savePosLen;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
||||
|
||||
/**
|
||||
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param input {@link TokenStream} holding the input to be tokenized
|
||||
* @param side the {@link Side} from which to chop off an n-gram
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
@Deprecated
|
||||
public XEdgeNGramTokenFilter(Version version, TokenStream input, Side side, int minGram, int maxGram) {
|
||||
super(input);
|
||||
|
||||
if (version == null) {
|
||||
throw new IllegalArgumentException("version must not be null");
|
||||
}
|
||||
|
||||
if (side == Side.BACK) {
|
||||
throw new IllegalArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
|
||||
}
|
||||
|
||||
if (side == null) {
|
||||
throw new IllegalArgumentException("sideLabel must be either front or back");
|
||||
}
|
||||
|
||||
if (minGram < 1) {
|
||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
||||
}
|
||||
|
||||
if (minGram > maxGram) {
|
||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
||||
}
|
||||
|
||||
this.charUtils = XCharacterUtils.getInstance(version);
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
this.side = side;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param input {@link TokenStream} holding the input to be tokenized
|
||||
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
@Deprecated
|
||||
public XEdgeNGramTokenFilter(Version version, TokenStream input, String sideLabel, int minGram, int maxGram) {
|
||||
this(version, input, Side.getSide(sideLabel), minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param input {@link TokenStream} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XEdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
|
||||
this(version, input, Side.FRONT, minGram, maxGram);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
while (true) {
|
||||
if (curTermBuffer == null) {
|
||||
if (!input.incrementToken()) {
|
||||
return false;
|
||||
} else {
|
||||
curTermBuffer = termAtt.buffer().clone();
|
||||
curTermLength = termAtt.length();
|
||||
curCodePointCount = charUtils.codePointCount(termAtt);
|
||||
curGramSize = minGram;
|
||||
tokStart = offsetAtt.startOffset();
|
||||
tokEnd = offsetAtt.endOffset();
|
||||
savePosIncr += posIncrAtt.getPositionIncrement();
|
||||
savePosLen = posLenAtt.getPositionLength();
|
||||
}
|
||||
}
|
||||
if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit
|
||||
if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
|
||||
// grab gramSize chars from front or back
|
||||
final int start = side == Side.FRONT ? 0 : charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
|
||||
final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
|
||||
clearAttributes();
|
||||
offsetAtt.setOffset(tokStart, tokEnd);
|
||||
// first ngram gets increment, others don't
|
||||
if (curGramSize == minGram) {
|
||||
posIncrAtt.setPositionIncrement(savePosIncr);
|
||||
savePosIncr = 0;
|
||||
} else {
|
||||
posIncrAtt.setPositionIncrement(0);
|
||||
}
|
||||
posLenAtt.setPositionLength(savePosLen);
|
||||
termAtt.copyBuffer(curTermBuffer, start, end - start);
|
||||
curGramSize++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
curTermBuffer = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
curTermBuffer = null;
|
||||
savePosIncr = 0;
|
||||
}
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
package org.apache.lucene.analysis.ngram;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tokenizes the input from an edge into n-grams of given size(s).
|
||||
* <p>
|
||||
* This {@link Tokenizer} create n-grams from the beginning edge or ending edge of a input token.
|
||||
* <p><a name="version" /> As of Lucene 4.4, this tokenizer<ul>
|
||||
* <li>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage
|
||||
* <li>doesn't trim the input,
|
||||
* <li>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones
|
||||
* <li>doesn't support backward n-grams anymore.
|
||||
* <li>supports {@link #isTokenChar(int) pre-tokenization},
|
||||
* <li>correctly handles supplementary characters.
|
||||
* </ul>
|
||||
* <p>Although <b style="color:red">highly</b> discouraged, it is still possible
|
||||
* to use the old behavior through {@link Lucene43XEdgeXNGramTokenizer}.
|
||||
*/
|
||||
public class XEdgeNGramTokenizer extends XNGramTokenizer {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
||||
|
||||
/**
|
||||
* Creates XEdgeXNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XEdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
|
||||
super(version, input, minGram, maxGram, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates XEdgeXNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XEdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
||||
super(version, factory, input, minGram, maxGram, true);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,281 +0,0 @@
|
|||
package org.apache.lucene.analysis.ngram;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Old version of {@link EdgeNGramTokenizer} which doesn't handle correctly
|
||||
* supplementary characters.
|
||||
*/
|
||||
@Deprecated
|
||||
public final class XLucene43EdgeNGramTokenizer extends Tokenizer {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
public static final Side DEFAULT_SIDE = Side.FRONT;
|
||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
/** Specifies which side of the input the n-gram should be generated from */
|
||||
public static enum Side {
|
||||
|
||||
/** Get the n-gram from the front of the input */
|
||||
FRONT {
|
||||
@Override
|
||||
public String getLabel() { return "front"; }
|
||||
},
|
||||
|
||||
/** Get the n-gram from the end of the input */
|
||||
BACK {
|
||||
@Override
|
||||
public String getLabel() { return "back"; }
|
||||
};
|
||||
|
||||
public abstract String getLabel();
|
||||
|
||||
// Get the appropriate Side from a string
|
||||
public static Side getSide(String sideName) {
|
||||
if (FRONT.getLabel().equals(sideName)) {
|
||||
return FRONT;
|
||||
}
|
||||
if (BACK.getLabel().equals(sideName)) {
|
||||
return BACK;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private int minGram;
|
||||
private int maxGram;
|
||||
private int gramSize;
|
||||
private Side side;
|
||||
private boolean started;
|
||||
private int inLen; // length of the input AFTER trim()
|
||||
private int charsRead; // length of the input
|
||||
private String inStr;
|
||||
|
||||
|
||||
/**
|
||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param side the {@link Side} from which to chop off an n-gram
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
@Deprecated
|
||||
public XLucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram) {
|
||||
super(input);
|
||||
init(version, side, minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param side the {@link Side} from which to chop off an n-gram
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
@Deprecated
|
||||
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram) {
|
||||
super(factory, input);
|
||||
init(version, side, minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
@Deprecated
|
||||
public XLucene43EdgeNGramTokenizer(Version version, Reader input, String sideLabel, int minGram, int maxGram) {
|
||||
this(version, input, Side.getSide(sideLabel), minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
@Deprecated
|
||||
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, String sideLabel, int minGram, int maxGram) {
|
||||
this(version, factory, input, Side.getSide(sideLabel), minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XLucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
|
||||
this(version, input, Side.FRONT, minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||
*
|
||||
* @param version the <a href="#version">Lucene match version</a>
|
||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
||||
this(version, factory, input, Side.FRONT, minGram, maxGram);
|
||||
}
|
||||
|
||||
private void init(Version version, Side side, int minGram, int maxGram) {
|
||||
if (version == null) {
|
||||
throw new IllegalArgumentException("version must not be null");
|
||||
}
|
||||
|
||||
if (side == null) {
|
||||
throw new IllegalArgumentException("sideLabel must be either front or back");
|
||||
}
|
||||
|
||||
if (minGram < 1) {
|
||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
||||
}
|
||||
|
||||
if (minGram > maxGram) {
|
||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
||||
}
|
||||
|
||||
maxGram = Math.min(maxGram, 1024);
|
||||
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
this.side = side;
|
||||
}
|
||||
|
||||
/** Returns the next token in the stream, or null at EOS. */
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
// if we are just starting, read the whole input
|
||||
if (!started) {
|
||||
started = true;
|
||||
gramSize = minGram;
|
||||
final int limit = side == Side.FRONT ? maxGram : 1024;
|
||||
char[] chars = new char[Math.min(1024, limit)];
|
||||
charsRead = 0;
|
||||
// TODO: refactor to a shared readFully somewhere:
|
||||
boolean exhausted = false;
|
||||
while (charsRead < limit) {
|
||||
final int inc = input.read(chars, charsRead, chars.length-charsRead);
|
||||
if (inc == -1) {
|
||||
exhausted = true;
|
||||
break;
|
||||
}
|
||||
charsRead += inc;
|
||||
if (charsRead == chars.length && charsRead < limit) {
|
||||
chars = ArrayUtil.grow(chars);
|
||||
}
|
||||
}
|
||||
|
||||
inStr = new String(chars, 0, charsRead);
|
||||
inStr = inStr.trim();
|
||||
|
||||
if (!exhausted) {
|
||||
// Read extra throwaway chars so that on end() we
|
||||
// report the correct offset:
|
||||
char[] throwaway = new char[1024];
|
||||
while(true) {
|
||||
final int inc = input.read(throwaway, 0, throwaway.length);
|
||||
if (inc == -1) {
|
||||
break;
|
||||
}
|
||||
charsRead += inc;
|
||||
}
|
||||
}
|
||||
|
||||
inLen = inStr.length();
|
||||
if (inLen == 0) {
|
||||
return false;
|
||||
}
|
||||
posIncrAtt.setPositionIncrement(1);
|
||||
} else {
|
||||
posIncrAtt.setPositionIncrement(0);
|
||||
}
|
||||
|
||||
// if the remaining input is too short, we can't generate any n-grams
|
||||
if (gramSize > inLen) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// if we have hit the end of our n-gram size range, quit
|
||||
if (gramSize > maxGram || gramSize > inLen) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// grab gramSize chars from front or back
|
||||
int start = side == Side.FRONT ? 0 : inLen - gramSize;
|
||||
int end = start + gramSize;
|
||||
termAtt.setEmpty().append(inStr, start, end);
|
||||
offsetAtt.setOffset(correctOffset(start), correctOffset(end));
|
||||
gramSize++;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() {
|
||||
// set final offset
|
||||
final int finalOffset = correctOffset(charsRead);
|
||||
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
started = false;
|
||||
}
|
||||
}
|
|
@ -1,164 +0,0 @@
|
|||
package org.apache.lucene.analysis.ngram;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
||||
/**
|
||||
* Old broken version of {@link NGramTokenizer}.
|
||||
*/
|
||||
@Deprecated
|
||||
public final class XLucene43NGramTokenizer extends Tokenizer {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
||||
|
||||
private int minGram, maxGram;
|
||||
private int gramSize;
|
||||
private int pos;
|
||||
private int inLen; // length of the input AFTER trim()
|
||||
private int charsRead; // length of the input
|
||||
private String inStr;
|
||||
private boolean started;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
/**
|
||||
* Creates NGramTokenizer with given min and max n-grams.
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XLucene43NGramTokenizer(Reader input, int minGram, int maxGram) {
|
||||
super(input);
|
||||
init(minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates NGramTokenizer with given min and max n-grams.
|
||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XLucene43NGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
||||
super(factory, input);
|
||||
init(minGram, maxGram);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates NGramTokenizer with default min and max n-grams.
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
*/
|
||||
public XLucene43NGramTokenizer(Reader input) {
|
||||
this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
|
||||
}
|
||||
|
||||
private void init(int minGram, int maxGram) {
|
||||
if (minGram < 1) {
|
||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
||||
}
|
||||
if (minGram > maxGram) {
|
||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
||||
}
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
}
|
||||
|
||||
/** Returns the next token in the stream, or null at EOS. */
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
if (!started) {
|
||||
started = true;
|
||||
gramSize = minGram;
|
||||
char[] chars = new char[1024];
|
||||
charsRead = 0;
|
||||
// TODO: refactor to a shared readFully somewhere:
|
||||
while (charsRead < chars.length) {
|
||||
int inc = input.read(chars, charsRead, chars.length-charsRead);
|
||||
if (inc == -1) {
|
||||
break;
|
||||
}
|
||||
charsRead += inc;
|
||||
}
|
||||
inStr = new String(chars, 0, charsRead).trim(); // remove any trailing empty strings
|
||||
|
||||
if (charsRead == chars.length) {
|
||||
// Read extra throwaway chars so that on end() we
|
||||
// report the correct offset:
|
||||
char[] throwaway = new char[1024];
|
||||
while(true) {
|
||||
final int inc = input.read(throwaway, 0, throwaway.length);
|
||||
if (inc == -1) {
|
||||
break;
|
||||
}
|
||||
charsRead += inc;
|
||||
}
|
||||
}
|
||||
|
||||
inLen = inStr.length();
|
||||
if (inLen == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos+gramSize > inLen) { // if we hit the end of the string
|
||||
pos = 0; // reset to beginning of string
|
||||
gramSize++; // increase n-gram size
|
||||
if (gramSize > maxGram) // we are done
|
||||
return false;
|
||||
if (pos+gramSize > inLen)
|
||||
return false;
|
||||
}
|
||||
|
||||
int oldPos = pos;
|
||||
pos++;
|
||||
termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize);
|
||||
offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize));
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() {
|
||||
// set final offset
|
||||
final int finalOffset = correctOffset(charsRead);
|
||||
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
started = false;
|
||||
pos = 0;
|
||||
}
|
||||
}
|
|
@ -1,158 +0,0 @@
|
|||
package org.apache.lucene.analysis.ngram;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.util.XCharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tokenizes the input into n-grams of the given size(s).
|
||||
* <a name="version"/>
|
||||
* <p>You must specify the required {@link Version} compatibility when
|
||||
* creating a {@link XNGramTokenFilter}. As of Lucene 4.4, this token filters:<ul>
|
||||
* <li>handles supplementary characters correctly,</li>
|
||||
* <li>emits all n-grams for the same token at the same position,</li>
|
||||
* <li>does not modify offsets,</li>
|
||||
* <li>sorts n-grams by their offset in the original token first, then
|
||||
* increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
|
||||
* "c").</li></ul>
|
||||
* <p>You can make this filter use the old behavior by providing a version <
|
||||
* {@link Version#LUCENE_44} in the constructor but this is not recommended as
|
||||
* it will lead to broken {@link TokenStream}s that will cause highlighting
|
||||
* bugs.
|
||||
* <p>If you were using this {@link TokenFilter} to perform partial highlighting,
|
||||
* this won't work anymore since this filter doesn't update offsets. You should
|
||||
* modify your analysis chain to use {@link NGramTokenizer}, and potentially
|
||||
* override {@link NGramTokenizer#isTokenChar(int)} to perform pre-tokenization.
|
||||
*/
|
||||
public final class XNGramTokenFilter extends TokenFilter {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
||||
|
||||
private final int minGram, maxGram;
|
||||
|
||||
private char[] curTermBuffer;
|
||||
private int curTermLength;
|
||||
private int curCodePointCount;
|
||||
private int curGramSize;
|
||||
private int curPos;
|
||||
private int curPosInc, curPosLen;
|
||||
private int tokStart;
|
||||
private int tokEnd;
|
||||
|
||||
private final XCharacterUtils charUtils;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAtt;
|
||||
private final PositionLengthAttribute posLenAtt;
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
/**
|
||||
* Creates XNGramTokenFilter with given min and max n-grams.
|
||||
* @param version Lucene version to enable correct position increments.
|
||||
* See <a href="#version">above</a> for details.
|
||||
* @param input {@link TokenStream} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
|
||||
super(new LengthFilter(true, input, minGram, Integer.MAX_VALUE));
|
||||
this.charUtils = XCharacterUtils.getInstance(version);
|
||||
if (minGram < 1) {
|
||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
||||
}
|
||||
if (minGram > maxGram) {
|
||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
||||
}
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
posLenAtt = addAttribute(PositionLengthAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates XNGramTokenFilter with default min and max n-grams.
|
||||
* @param version Lucene version to enable correct position increments.
|
||||
* See <a href="#version">above</a> for details.
|
||||
* @param input {@link TokenStream} holding the input to be tokenized
|
||||
*/
|
||||
public XNGramTokenFilter(Version version, TokenStream input) {
|
||||
this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
|
||||
}
|
||||
|
||||
/** Returns the next token in the stream, or null at EOS. */
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
while (true) {
|
||||
if (curTermBuffer == null) {
|
||||
if (!input.incrementToken()) {
|
||||
return false;
|
||||
} else {
|
||||
curTermBuffer = termAtt.buffer().clone();
|
||||
curTermLength = termAtt.length();
|
||||
curCodePointCount = charUtils.codePointCount(termAtt);
|
||||
curGramSize = minGram;
|
||||
curPos = 0;
|
||||
curPosInc = posIncAtt.getPositionIncrement();
|
||||
curPosLen = posLenAtt.getPositionLength();
|
||||
tokStart = offsetAtt.startOffset();
|
||||
tokEnd = offsetAtt.endOffset();
|
||||
}
|
||||
}
|
||||
if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) {
|
||||
++curPos;
|
||||
curGramSize = minGram;
|
||||
}
|
||||
if ((curPos + curGramSize) <= curCodePointCount) {
|
||||
clearAttributes();
|
||||
final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
|
||||
final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
|
||||
termAtt.copyBuffer(curTermBuffer, start, end - start);
|
||||
posIncAtt.setPositionIncrement(curPosInc);
|
||||
curPosInc = 0;
|
||||
posLenAtt.setPositionLength(curPosLen);
|
||||
offsetAtt.setOffset(tokStart, tokEnd);
|
||||
curGramSize++;
|
||||
return true;
|
||||
}
|
||||
curTermBuffer = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
curTermBuffer = null;
|
||||
}
|
||||
}
|
|
@ -1,248 +0,0 @@
|
|||
package org.apache.lucene.analysis.ngram;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.util.XCharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tokenizes the input into n-grams of the given size(s).
|
||||
* <p>On the contrary to {@link NGramTokenFilter}, this class sets offsets so
|
||||
* that characters between startOffset and endOffset in the original stream are
|
||||
* the same as the term chars.
|
||||
* <p>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
|
||||
* <table>
|
||||
* <tr><th>Term</th><td>ab</td><td>abc</td><td>bc</td><td>bcd</td><td>cd</td><td>cde</td><td>de</td></tr>
|
||||
* <tr><th>Position increment</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
|
||||
* <tr><th>Position length</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
|
||||
* <tr><th>Offsets</th><td>[0,2[</td><td>[0,3[</td><td>[1,3[</td><td>[1,4[</td><td>[2,4[</td><td>[2,5[</td><td>[3,5[</td></tr>
|
||||
* </table>
|
||||
* <a name="version"/>
|
||||
* <p>This tokenizer changed a lot in Lucene 4.4 in order to:<ul>
|
||||
* <li>tokenize in a streaming fashion to support streams which are larger
|
||||
* than 1024 chars (limit of the previous version),
|
||||
* <li>count grams based on unicode code points instead of java chars (and
|
||||
* never split in the middle of surrogate pairs),
|
||||
* <li>give the ability to {@link #isTokenChar(int) pre-tokenize} the stream
|
||||
* before computing n-grams.</ul>
|
||||
* <p>Additionally, this class doesn't trim trailing whitespaces and emits
|
||||
* tokens in a different order, tokens are now emitted by increasing start
|
||||
* offsets while they used to be emitted by increasing lengths (which prevented
|
||||
* from supporting large input streams).
|
||||
* <p>Although <b style="color:red">highly</b> discouraged, it is still possible
|
||||
* to use the old behavior through {@link Lucene43NGramTokenizer}.
|
||||
*/
|
||||
// non-final to allow for overriding isTokenChar, but all other methods should be final
|
||||
public class XNGramTokenizer extends Tokenizer {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
||||
|
||||
private XCharacterUtils charUtils;
|
||||
private XCharacterUtils.CharacterBuffer charBuffer;
|
||||
private int[] buffer; // like charBuffer, but converted to code points
|
||||
private int bufferStart, bufferEnd; // remaining slice in buffer
|
||||
private int offset;
|
||||
private int gramSize;
|
||||
private int minGram, maxGram;
|
||||
private boolean exhausted;
|
||||
private int lastCheckedChar; // last offset in the buffer that we checked
|
||||
private int lastNonTokenChar; // last offset that we found to not be a token char
|
||||
private boolean edgesOnly; // leading edges n-grams only
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
XNGramTokenizer(Version version, Reader input, int minGram, int maxGram, boolean edgesOnly) {
|
||||
super(input);
|
||||
init(version, minGram, maxGram, edgesOnly);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates NGramTokenizer with given min and max n-grams.
|
||||
* @param version the lucene compatibility <a href="#version">version</a>
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
|
||||
this(version, input, minGram, maxGram, false);
|
||||
}
|
||||
|
||||
XNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram, boolean edgesOnly) {
|
||||
super(factory, input);
|
||||
init(version, minGram, maxGram, edgesOnly);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates NGramTokenizer with given min and max n-grams.
|
||||
* @param version the lucene compatibility <a href="#version">version</a>
|
||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
* @param minGram the smallest n-gram to generate
|
||||
* @param maxGram the largest n-gram to generate
|
||||
*/
|
||||
public XNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
||||
this(version, factory, input, minGram, maxGram, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates NGramTokenizer with default min and max n-grams.
|
||||
* @param version the lucene compatibility <a href="#version">version</a>
|
||||
* @param input {@link Reader} holding the input to be tokenized
|
||||
*/
|
||||
public XNGramTokenizer(Version version, Reader input) {
|
||||
this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
|
||||
}
|
||||
|
||||
private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
|
||||
if (!version.onOrAfter(Version.LUCENE_43)) {
|
||||
throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
|
||||
}
|
||||
charUtils = version.onOrAfter(Version.LUCENE_43)
|
||||
? XCharacterUtils.getInstance(version)
|
||||
: XCharacterUtils.getJava4Instance();
|
||||
if (minGram < 1) {
|
||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
||||
}
|
||||
if (minGram > maxGram) {
|
||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
||||
}
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
this.edgesOnly = edgesOnly;
|
||||
charBuffer = XCharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
|
||||
buffer = new int[charBuffer.getBuffer().length];
|
||||
// Make the term att large enough
|
||||
termAtt.resizeBuffer(2 * maxGram);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
|
||||
// termination of this loop is guaranteed by the fact that every iteration
|
||||
// either advances the buffer (calls consumes()) or increases gramSize
|
||||
while (true) {
|
||||
// compact
|
||||
if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted) {
|
||||
System.arraycopy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
|
||||
bufferEnd -= bufferStart;
|
||||
lastCheckedChar -= bufferStart;
|
||||
lastNonTokenChar -= bufferStart;
|
||||
bufferStart = 0;
|
||||
|
||||
// fill in remaining space
|
||||
exhausted = !charUtils.fill(charBuffer, input, buffer.length - bufferEnd);
|
||||
// convert to code points
|
||||
bufferEnd += charUtils.toCodePoints(charBuffer.getBuffer(), 0, charBuffer.getLength(), buffer, bufferEnd);
|
||||
}
|
||||
|
||||
// should we go to the next offset?
|
||||
if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd) {
|
||||
if (bufferStart + 1 + minGram > bufferEnd) {
|
||||
assert exhausted;
|
||||
return false;
|
||||
}
|
||||
consume();
|
||||
gramSize = minGram;
|
||||
}
|
||||
|
||||
updateLastNonTokenChar();
|
||||
|
||||
// retry if the token to be emitted was going to not only contain token chars
|
||||
final boolean termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
|
||||
final boolean isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
|
||||
if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar) {
|
||||
consume();
|
||||
gramSize = minGram;
|
||||
continue;
|
||||
}
|
||||
|
||||
final int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(), 0);
|
||||
termAtt.setLength(length);
|
||||
posIncAtt.setPositionIncrement(1);
|
||||
posLenAtt.setPositionLength(1);
|
||||
offsetAtt.setOffset(correctOffset(offset), correctOffset(offset + length));
|
||||
++gramSize;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private void updateLastNonTokenChar() {
|
||||
final int termEnd = bufferStart + gramSize - 1;
|
||||
if (termEnd > lastCheckedChar) {
|
||||
for (int i = termEnd; i > lastCheckedChar; --i) {
|
||||
if (!isTokenChar(buffer[i])) {
|
||||
lastNonTokenChar = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
lastCheckedChar = termEnd;
|
||||
}
|
||||
}
|
||||
|
||||
/** Consume one code point. */
|
||||
private void consume() {
|
||||
offset += Character.charCount(buffer[bufferStart++]);
|
||||
}
|
||||
|
||||
/** Only collect characters which satisfy this condition. */
|
||||
protected boolean isTokenChar(int chr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void end() {
|
||||
assert bufferStart <= bufferEnd;
|
||||
int endOffset = offset;
|
||||
for (int i = bufferStart; i < bufferEnd; ++i) {
|
||||
endOffset += Character.charCount(buffer[i]);
|
||||
}
|
||||
endOffset = correctOffset(endOffset);
|
||||
offsetAtt.setOffset(endOffset, endOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void reset() throws IOException {
|
||||
super.reset();
|
||||
bufferStart = bufferEnd = buffer.length;
|
||||
lastNonTokenChar = lastCheckedChar = bufferStart - 1;
|
||||
offset = 0;
|
||||
gramSize = minGram;
|
||||
exhausted = false;
|
||||
charBuffer.reset();
|
||||
}
|
||||
}
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.vectorhighlight.XFieldTermStack.TermInfo;
|
||||
import org.apache.lucene.util.SorterTemplate;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
@ -364,34 +364,24 @@ public class XFieldQuery {
|
|||
PhraseQuery pq = (PhraseQuery)query;
|
||||
final Term[] terms = pq.getTerms();
|
||||
final int[] positions = pq.getPositions();
|
||||
new SorterTemplate() {
|
||||
new InPlaceMergeSorter() {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
Term tmpTerm = terms[i];
|
||||
terms[i] = terms[j];
|
||||
terms[j] = tmpTerm;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
Term tmpTerm = terms[i];
|
||||
terms[i] = terms[j];
|
||||
terms[j] = tmpTerm;
|
||||
int tmpPos = positions[i];
|
||||
positions[i] = positions[j];
|
||||
positions[j] = tmpPos;
|
||||
}
|
||||
|
||||
int tmpPos = positions[i];
|
||||
positions[i] = positions[j];
|
||||
positions[j] = tmpPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return positions[i] - positions[j];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}.mergeSort(0, terms.length - 1);
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return positions[i] - positions[j];
|
||||
}
|
||||
}.sort(0, terms.length);
|
||||
|
||||
addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
|
||||
}
|
||||
|
|
|
@ -1,171 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.util.packed.XPackedInts.checkBlockSize;
|
||||
import static org.apache.lucene.util.packed.XPackedInts.numBlocks;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Base implementation for {@link XPagedMutable} and {@link PagedGrowableWriter}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
abstract class XAbstractPagedMutable<T extends XAbstractPagedMutable<T>> {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
static final int MIN_BLOCK_SIZE = 1 << 6;
|
||||
static final int MAX_BLOCK_SIZE = 1 << 30;
|
||||
|
||||
final long size;
|
||||
final int pageShift;
|
||||
final int pageMask;
|
||||
final PackedInts.Mutable[] subMutables;
|
||||
final int bitsPerValue;
|
||||
|
||||
XAbstractPagedMutable(int bitsPerValue, long size, int pageSize) {
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
this.size = size;
|
||||
pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
|
||||
pageMask = pageSize - 1;
|
||||
final int numPages = numBlocks(size, pageSize);
|
||||
subMutables = new PackedInts.Mutable[numPages];
|
||||
}
|
||||
|
||||
protected final void fillPages() {
|
||||
final int numPages = numBlocks(size, pageSize());
|
||||
for (int i = 0; i < numPages; ++i) {
|
||||
// do not allocate for more entries than necessary on the last page
|
||||
final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize();
|
||||
subMutables[i] = newMutable(valueCount, bitsPerValue);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract PackedInts.Mutable newMutable(int valueCount, int bitsPerValue);
|
||||
|
||||
final int lastPageSize(long size) {
|
||||
final int sz = indexInPage(size);
|
||||
return sz == 0 ? pageSize() : sz;
|
||||
}
|
||||
|
||||
final int pageSize() {
|
||||
return pageMask + 1;
|
||||
}
|
||||
|
||||
/** The number of values. */
|
||||
public final long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
final int pageIndex(long index) {
|
||||
return (int) (index >>> pageShift);
|
||||
}
|
||||
|
||||
final int indexInPage(long index) {
|
||||
return (int) index & pageMask;
|
||||
}
|
||||
|
||||
/** Get value at <code>index</code>. */
|
||||
public final long get(long index) {
|
||||
assert index >= 0 && index < size;
|
||||
final int pageIndex = pageIndex(index);
|
||||
final int indexInPage = indexInPage(index);
|
||||
return subMutables[pageIndex].get(indexInPage);
|
||||
}
|
||||
|
||||
/** Set value at <code>index</code>. */
|
||||
public final void set(long index, long value) {
|
||||
assert index >= 0 && index < size;
|
||||
final int pageIndex = pageIndex(index);
|
||||
final int indexInPage = indexInPage(index);
|
||||
subMutables[pageIndex].set(indexInPage, value);
|
||||
}
|
||||
|
||||
protected long baseRamBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG
|
||||
+ 3 * RamUsageEstimator.NUM_BYTES_INT;
|
||||
}
|
||||
|
||||
/** Return the number of bytes used by this object. */
|
||||
public long ramBytesUsed() {
|
||||
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
|
||||
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length);
|
||||
for (PackedInts.Mutable gw : subMutables) {
|
||||
bytesUsed += gw.ramBytesUsed();
|
||||
}
|
||||
return bytesUsed;
|
||||
}
|
||||
|
||||
protected abstract T newUnfilledCopy(long newSize);
|
||||
|
||||
/** Create a new copy of size <code>newSize</code> based on the content of
|
||||
* this buffer. This method is much more efficient than creating a new
|
||||
* instance and copying values one by one. */
|
||||
public final T resize(long newSize) {
|
||||
final T copy = newUnfilledCopy(newSize);
|
||||
final int numCommonPages = Math.min(copy.subMutables.length, subMutables.length);
|
||||
final long[] copyBuffer = new long[1024];
|
||||
for (int i = 0; i < copy.subMutables.length; ++i) {
|
||||
final int valueCount = i == copy.subMutables.length - 1 ? lastPageSize(newSize) : pageSize();
|
||||
final int bpv = i < numCommonPages ? subMutables[i].getBitsPerValue() : this.bitsPerValue;
|
||||
copy.subMutables[i] = newMutable(valueCount, bpv);
|
||||
if (i < numCommonPages) {
|
||||
final int copyLength = Math.min(valueCount, subMutables[i].size());
|
||||
XPackedInts.copy(subMutables[i], 0, copy.subMutables[i], 0, copyLength, copyBuffer);
|
||||
}
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
/** Similar to {@link ArrayUtil#grow(long[], int)}. */
|
||||
public final T grow(long minSize) {
|
||||
assert minSize >= 0;
|
||||
if (minSize <= size()) {
|
||||
@SuppressWarnings("unchecked")
|
||||
final T result = (T) this;
|
||||
return result;
|
||||
}
|
||||
long extra = minSize >>> 3;
|
||||
if (extra < 3) {
|
||||
extra = 3;
|
||||
}
|
||||
final long newSize = minSize + extra;
|
||||
return resize(newSize);
|
||||
}
|
||||
|
||||
/** Similar to {@link ArrayUtil#grow(long[])}. */
|
||||
public final T grow() {
|
||||
return grow(size() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString() {
|
||||
return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
|
||||
}
|
||||
|
||||
}
|
|
@ -1,162 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Implements {@link XPackedInts.Mutable}, but grows the
|
||||
* bit count of the underlying packed ints on-demand.
|
||||
* <p>Beware that this class will accept to set negative values but in order
|
||||
* to do this, it will grow the number of bits per value to 64.
|
||||
*
|
||||
* <p>@lucene.internal</p>
|
||||
*/
|
||||
public class XGrowableWriter implements PackedInts.Mutable {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
private long currentMask;
|
||||
private PackedInts.Mutable current;
|
||||
private final float acceptableOverheadRatio;
|
||||
|
||||
/**
|
||||
* @param startBitsPerValue the initial number of bits per value, may grow depending on the data
|
||||
* @param valueCount the number of values
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio
|
||||
*/
|
||||
public XGrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
|
||||
currentMask = mask(current.getBitsPerValue());
|
||||
}
|
||||
|
||||
private static long mask(int bitsPerValue) {
|
||||
return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
return current.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return current.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBitsPerValue() {
|
||||
return current.getBitsPerValue();
|
||||
}
|
||||
|
||||
public PackedInts.Mutable getMutable() {
|
||||
return current;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return current.getArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return current.hasArray();
|
||||
}
|
||||
|
||||
private void ensureCapacity(long value) {
|
||||
if ((value & currentMask) == value) {
|
||||
return;
|
||||
}
|
||||
final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value);
|
||||
assert bitsRequired > current.getBitsPerValue();
|
||||
final int valueCount = size();
|
||||
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio);
|
||||
PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||
current = next;
|
||||
currentMask = mask(current.getBitsPerValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
ensureCapacity(value);
|
||||
current.set(index, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
current.clear();
|
||||
}
|
||||
|
||||
public XGrowableWriter resize(int newSize) {
|
||||
XGrowableWriter next = new XGrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
|
||||
final int limit = Math.min(size(), newSize);
|
||||
PackedInts.copy(current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
return current.get(index, arr, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
long max = 0;
|
||||
for (int i = off, end = off + len; i < end; ++i) {
|
||||
// bitwise or is nice because either all values are positive and the
|
||||
// or-ed result will require as many bits per value as the max of the
|
||||
// values, or one of them is negative and the result will be negative,
|
||||
// forcing GrowableWriter to use 64 bits per value
|
||||
max |= arr[i];
|
||||
}
|
||||
ensureCapacity(max);
|
||||
return current.set(index, arr, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
ensureCapacity(val);
|
||||
current.fill(fromIndex, toIndex, val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG
|
||||
+ RamUsageEstimator.NUM_BYTES_FLOAT)
|
||||
+ current.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(DataOutput out) throws IOException {
|
||||
current.save(out);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.packed.PackedInts.Mutable;
|
||||
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
/**
|
||||
* Simplistic compression for array of unsigned long values.
|
||||
* Each value is >= 0 and <= a specified maximum value. The
|
||||
* values are stored as packed ints, with each value
|
||||
* consuming a fixed number of bits.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class XPackedInts {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
/** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
|
||||
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
|
||||
assert buf.length > 0;
|
||||
int remaining = 0;
|
||||
while (len > 0) {
|
||||
final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
|
||||
assert read > 0;
|
||||
srcPos += read;
|
||||
len -= read;
|
||||
remaining += read;
|
||||
final int written = dest.set(destPos, buf, 0, remaining);
|
||||
assert written > 0;
|
||||
destPos += written;
|
||||
if (written < remaining) {
|
||||
System.arraycopy(buf, written, buf, 0, remaining - written);
|
||||
}
|
||||
remaining -= written;
|
||||
}
|
||||
while (remaining > 0) {
|
||||
final int written = dest.set(destPos, buf, 0, remaining);
|
||||
destPos += written;
|
||||
remaining -= written;
|
||||
System.arraycopy(buf, written, buf, 0, remaining);
|
||||
}
|
||||
}
|
||||
|
||||
/** Check that the block size is a power of 2, in the right bounds, and return
|
||||
* its log in base 2. */
|
||||
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
|
||||
if (blockSize < minBlockSize || blockSize > maxBlockSize) {
|
||||
throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
|
||||
}
|
||||
if ((blockSize & (blockSize - 1)) != 0) {
|
||||
throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
|
||||
}
|
||||
return Integer.numberOfTrailingZeros(blockSize);
|
||||
}
|
||||
|
||||
/** Return the number of blocks required to store <code>size</code> values on
|
||||
* <code>blockSize</code>. */
|
||||
static int numBlocks(long size, int blockSize) {
|
||||
final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
|
||||
if ((long) numBlocks * blockSize < size) {
|
||||
throw new IllegalArgumentException("size is too large for this block size");
|
||||
}
|
||||
return numBlocks;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.PackedInts.Mutable;
|
||||
|
||||
/**
|
||||
* A {@link XPagedGrowableWriter}. This class slices data into fixed-size blocks
|
||||
* which have independent numbers of bits per value and grow on-demand.
|
||||
* <p>You should use this class instead of {@link AppendingLongBuffer} only when
|
||||
* you need random write-access. Otherwise this class will likely be slower and
|
||||
* less memory-efficient.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class XPagedGrowableWriter extends XAbstractPagedMutable<XPagedGrowableWriter> {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
final float acceptableOverheadRatio;
|
||||
|
||||
/**
|
||||
* Create a new {@link XPagedGrowableWriter} instance.
|
||||
*
|
||||
* @param size the number of values to store.
|
||||
* @param pageSize the number of values per page
|
||||
* @param startBitsPerValue the initial number of bits per value
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio
|
||||
*/
|
||||
public XPagedGrowableWriter(long size, int pageSize,
|
||||
int startBitsPerValue, float acceptableOverheadRatio) {
|
||||
this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true);
|
||||
}
|
||||
|
||||
XPagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) {
|
||||
super(startBitsPerValue, size, pageSize);
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
if (fillPages) {
|
||||
fillPages();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Mutable newMutable(int valueCount, int bitsPerValue) {
|
||||
return new XGrowableWriter(bitsPerValue, valueCount, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected XPagedGrowableWriter newUnfilledCopy(long newSize) {
|
||||
return new XPagedGrowableWriter(newSize, pageSize(), bitsPerValue, acceptableOverheadRatio, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected long baseRamBytesUsed() {
|
||||
return super.baseRamBytesUsed() + RamUsageEstimator.NUM_BYTES_FLOAT;
|
||||
}
|
||||
|
||||
}
|
|
@ -128,7 +128,7 @@ public class Version implements Serializable {
|
|||
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_43);
|
||||
|
||||
public static final int V_1_0_0_Beta1_ID = /*00*/1000001;
|
||||
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, true, org.apache.lucene.util.Version.LUCENE_43);
|
||||
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, true, org.apache.lucene.util.Version.LUCENE_44);
|
||||
|
||||
public static final Version CURRENT = V_1_0_0_Beta1;
|
||||
|
||||
|
|
|
@ -473,7 +473,7 @@ public class MetaDataCreateIndexService extends AbstractComponent {
|
|||
}
|
||||
}
|
||||
|
||||
CollectionUtil.quickSort(templates, new Comparator<IndexTemplateMetaData>() {
|
||||
CollectionUtil.timSort(templates, new Comparator<IndexTemplateMetaData>() {
|
||||
@Override
|
||||
public int compare(IndexTemplateMetaData o1, IndexTemplateMetaData o2) {
|
||||
return o2.order() - o1.order();
|
||||
|
|
|
@ -19,8 +19,11 @@
|
|||
|
||||
package org.elasticsearch.cluster.routing.allocation.allocator;
|
||||
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
|
||||
import com.google.common.base.Predicate;
|
||||
import org.apache.lucene.util.SorterTemplate;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.cluster.metadata.MetaData;
|
||||
import org.elasticsearch.cluster.routing.MutableShardRouting;
|
||||
|
@ -404,7 +407,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
*/
|
||||
weights[lowIdx] = sorter.weight(Operation.BALANCE, modelNodes[lowIdx]);
|
||||
weights[highIdx] = sorter.weight(Operation.BALANCE, modelNodes[highIdx]);
|
||||
sorter.quickSort(0, weights.length - 1);
|
||||
sorter.sort(0, weights.length);
|
||||
lowIdx = 0;
|
||||
highIdx = weights.length - 1;
|
||||
changed = true;
|
||||
|
@ -451,7 +454,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
sorter.reset(operation, indices[i]);
|
||||
deltas[i] = sorter.delta();
|
||||
}
|
||||
new SorterTemplate() {
|
||||
new IntroSorter() {
|
||||
float pivotWeight;
|
||||
|
||||
@Override
|
||||
|
@ -478,7 +481,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
protected int comparePivot(int j) {
|
||||
return Float.compare(deltas[j], pivotWeight);
|
||||
}
|
||||
}.quickSort(0, deltas.length - 1);
|
||||
}.sort(0, deltas.length);
|
||||
|
||||
return indices;
|
||||
}
|
||||
|
@ -956,7 +959,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
}
|
||||
}
|
||||
|
||||
static final class NodeSorter extends SorterTemplate {
|
||||
static final class NodeSorter extends IntroSorter {
|
||||
|
||||
final ModelNode[] modelNodes;
|
||||
/* the nodes weights with respect to the current weight function / index */
|
||||
|
@ -982,7 +985,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
for (int i = 0; i < weights.length; i++) {
|
||||
weights[i] = weight(operation, modelNodes[i]);
|
||||
}
|
||||
quickSort(0, modelNodes.length - 1);
|
||||
sort(0, modelNodes.length);
|
||||
}
|
||||
|
||||
public float weight(Operation operation, ModelNode node) {
|
||||
|
|
|
@ -35,14 +35,13 @@ import org.elasticsearch.index.analysis.NamedAnalyzer;
|
|||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class Lucene {
|
||||
|
||||
public static final Version VERSION = Version.LUCENE_43;
|
||||
public static final Version VERSION = Version.LUCENE_44;
|
||||
public static final Version ANALYZER_VERSION = VERSION;
|
||||
public static final Version QUERYPARSER_VERSION = VERSION;
|
||||
|
||||
|
@ -57,6 +56,9 @@ public class Lucene {
|
|||
if (version == null) {
|
||||
return defaultVersion;
|
||||
}
|
||||
if ("4.4".equals(version)) {
|
||||
return VERSION.LUCENE_44;
|
||||
}
|
||||
if ("4.3".equals(version)) {
|
||||
return Version.LUCENE_43;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.all;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
|
@ -27,7 +29,6 @@ import org.apache.lucene.index.IndexReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
|
@ -74,7 +75,7 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
if (this.stats == null) {
|
||||
return null;
|
||||
}
|
||||
SloppySimScorer sloppySimScorer = similarity.sloppySimScorer(stats, context);
|
||||
SimScorer sloppySimScorer = similarity.simScorer(stats, context);
|
||||
return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer);
|
||||
}
|
||||
|
||||
|
@ -83,7 +84,7 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
protected float payloadScore;
|
||||
protected int payloadsSeen;
|
||||
|
||||
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
|
||||
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
positions = spans.getPostings();
|
||||
}
|
||||
|
@ -158,7 +159,7 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
SimScorer docScorer = similarity.simScorer(stats, context);
|
||||
ComplexExplanation inner = new ComplexExplanation();
|
||||
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
|
|
|
@ -45,7 +45,7 @@ public class DocIdSets {
|
|||
* Is it an empty {@link DocIdSet}?
|
||||
*/
|
||||
public static boolean isEmpty(@Nullable DocIdSet set) {
|
||||
return set == null || set == DocIdSet.EMPTY_DOCIDSET;
|
||||
return set == null || set == EMPTY_DOCIDSET;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -70,19 +70,19 @@ public class DocIdSets {
|
|||
* <p/>
|
||||
* Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution
|
||||
* might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively
|
||||
* always either return {@link DocIdSet#EMPTY_DOCIDSET} or {@link FixedBitSet}.
|
||||
* always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
|
||||
*/
|
||||
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
|
||||
if (set == null || set == DocIdSet.EMPTY_DOCIDSET) {
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
if (set == null || set == EMPTY_DOCIDSET) {
|
||||
return EMPTY_DOCIDSET;
|
||||
}
|
||||
DocIdSetIterator it = set.iterator();
|
||||
if (it == null) {
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
return EMPTY_DOCIDSET;
|
||||
}
|
||||
int doc = it.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
return EMPTY_DOCIDSET;
|
||||
}
|
||||
if (set instanceof FixedBitSet) {
|
||||
return set;
|
||||
|
@ -94,6 +94,26 @@ public class DocIdSets {
|
|||
} while (doc != DocIdSetIterator.NO_MORE_DOCS);
|
||||
return fixedBitSet;
|
||||
}
|
||||
|
||||
/** An empty {@code DocIdSet} instance */
|
||||
protected static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() {
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return DocIdSetIterator.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// we explicitly provide no random access, as this filter is 100% sparse and iterator exits faster
|
||||
@Override
|
||||
public Bits bits() {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets a set to bits.
|
||||
|
|
|
@ -110,7 +110,7 @@ public class ElectMasterService extends AbstractComponent {
|
|||
it.remove();
|
||||
}
|
||||
}
|
||||
CollectionUtil.quickSort(possibleNodes, nodeComparator);
|
||||
CollectionUtil.introSort(possibleNodes, nodeComparator);
|
||||
return possibleNodes;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.*;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
|
||||
|
@ -47,24 +49,24 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
super(index, indexSettings, name, settings);
|
||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||
if (version.onOrAfter(Version.LUCENE_43)) {
|
||||
TokenStream result = tokenStream;
|
||||
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
||||
if (side == Side.BACK) {
|
||||
result = new ReverseStringFilter(version, result);
|
||||
}
|
||||
result = new XEdgeNGramTokenFilter(version, result, minGram, maxGram);
|
||||
result = new EdgeNGramTokenFilter(version, result, minGram, maxGram);
|
||||
if (side == Side.BACK) {
|
||||
result = new ReverseStringFilter(version, result);
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
return new EdgeNGramTokenFilter(tokenStream, side, minGram, maxGram);
|
||||
}
|
||||
return new EdgeNGramTokenFilter(version, tokenStream, side, minGram, maxGram);
|
||||
}
|
||||
}
|
|
@ -19,12 +19,13 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
|
||||
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.XEdgeNGramTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -45,7 +46,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
|
||||
private final int maxGram;
|
||||
|
||||
private final EdgeNGramTokenizer.Side side;
|
||||
private final Lucene43EdgeNGramTokenizer.Side side;
|
||||
|
||||
private final CharMatcher matcher;
|
||||
|
||||
|
@ -54,22 +55,23 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
super(index, indexSettings, name, settings);
|
||||
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||
this.side = EdgeNGramTokenizer.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||
this.side = Lucene43EdgeNGramTokenizer.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
if (version.onOrAfter(Version.LUCENE_43)) {
|
||||
if (side == EdgeNGramTokenizer.Side.BACK) {
|
||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||
if (side == Lucene43EdgeNGramTokenizer.Side.BACK) {
|
||||
throw new ElasticSearchIllegalArgumentException("side=BACK is not supported anymore. Please fix your analysis chain or use"
|
||||
+ " an older compatibility version (<=4.2) but beware that it might cause highlighting bugs.");
|
||||
}
|
||||
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
||||
if (matcher == null) {
|
||||
return new XEdgeNGramTokenizer(version, reader, minGram, maxGram);
|
||||
return new EdgeNGramTokenizer(version, reader, minGram, maxGram);
|
||||
} else {
|
||||
return new XEdgeNGramTokenizer(version, reader, minGram, maxGram) {
|
||||
return new EdgeNGramTokenizer(version, reader, minGram, maxGram) {
|
||||
@Override
|
||||
protected boolean isTokenChar(int chr) {
|
||||
return matcher.isTokenChar(chr);
|
||||
|
@ -77,7 +79,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
};
|
||||
}
|
||||
} else {
|
||||
return new EdgeNGramTokenizer(reader, side, minGram, maxGram);
|
||||
return new Lucene43EdgeNGramTokenizer(version, reader, side, minGram, maxGram);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,6 +17,8 @@ package org.elasticsearch.index.analysis;
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -60,8 +62,8 @@ import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
|||
*/
|
||||
@AnalysisSettingsRequired
|
||||
public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
|
||||
private Boolean enablePositionIncrements;
|
||||
private CharArraySet keepWords;
|
||||
private final CharArraySet keepWords;
|
||||
private final boolean enablePositionIncrements;
|
||||
private static final String KEEP_WORDS_KEY = "keep_words";
|
||||
private static final String KEEP_WORDS_PATH_KEY = KEEP_WORDS_KEY + "_path";
|
||||
private static final String KEEP_WORDS_CASE_KEY = KEEP_WORDS_KEY + "_case"; // for javadoc
|
||||
|
@ -80,14 +82,22 @@ public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
|
|||
throw new ElasticSearchIllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
|
||||
+ KEEP_WORDS_PATH_KEY + "` to be configured");
|
||||
}
|
||||
this.enablePositionIncrements = settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
|
||||
if (version.onOrAfter(Version.LUCENE_44) && settings.get(ENABLE_POS_INC_KEY) != null) {
|
||||
throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
|
||||
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
|
||||
}
|
||||
enablePositionIncrements = version.onOrAfter(Version.LUCENE_44) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
|
||||
|
||||
this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY, version);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new KeepWordFilter(enablePositionIncrements, tokenStream, keepWords);
|
||||
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||
return new KeepWordFilter(version, tokenStream, keepWords);
|
||||
}
|
||||
return new KeepWordFilter(version, enablePositionIncrements, tokenStream, keepWords);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -35,18 +39,25 @@ public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final int min;
|
||||
private final int max;
|
||||
private final boolean enablePositionIncrements;
|
||||
private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
|
||||
|
||||
@Inject
|
||||
public LengthTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
min = settings.getAsInt("min", 0);
|
||||
max = settings.getAsInt("max", Integer.MAX_VALUE);
|
||||
enablePositionIncrements = settings.getAsBoolean("enabled_position_increments", true);
|
||||
if (version.onOrAfter(Version.LUCENE_44) && settings.get(ENABLE_POS_INC_KEY) != null) {
|
||||
throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
|
||||
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
|
||||
}
|
||||
enablePositionIncrements = version.onOrAfter(Version.LUCENE_44) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new LengthFilter(enablePositionIncrements, tokenStream, min, max);
|
||||
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||
return new LengthFilter(version, tokenStream, min, max);
|
||||
}
|
||||
return new LengthFilter(version, enablePositionIncrements, tokenStream, min, max);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.XNGramTokenFilter;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -49,10 +48,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
if (this.version.onOrAfter(Version.LUCENE_43)) {
|
||||
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
||||
return new XNGramTokenFilter(version, tokenStream, minGram, maxGram);
|
||||
}
|
||||
return new NGramTokenFilter(tokenStream, minGram, maxGram);
|
||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||
return new NGramTokenFilter(version, tokenStream, minGram, maxGram);
|
||||
}
|
||||
}
|
|
@ -19,10 +19,11 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.ngram.Lucene43NGramTokenizer;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.XNGramTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -98,20 +99,21 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
if (this.version.onOrAfter(Version.LUCENE_43)) {
|
||||
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||
if (matcher == null) {
|
||||
return new XNGramTokenizer(version, reader, minGram, maxGram);
|
||||
return new NGramTokenizer(version, reader, minGram, maxGram);
|
||||
} else {
|
||||
return new XNGramTokenizer(version, reader, minGram, maxGram) {
|
||||
return new NGramTokenizer(version, reader, minGram, maxGram) {
|
||||
@Override
|
||||
protected boolean isTokenChar(int chr) {
|
||||
return matcher.isTokenChar(chr);
|
||||
}
|
||||
};
|
||||
}
|
||||
} else {
|
||||
return new Lucene43NGramTokenizer(reader, minGram, maxGram);
|
||||
}
|
||||
return new NGramTokenizer(reader, minGram, maxGram);
|
||||
}
|
||||
|
||||
}
|
|
@ -19,6 +19,9 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -34,15 +37,23 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final boolean updateOffsets;
|
||||
private static final String UPDATE_OFFSETS_KEY = "update_offsets";
|
||||
|
||||
@Inject
|
||||
public TrimTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
if (version.onOrAfter(Version.LUCENE_44) && settings.get(UPDATE_OFFSETS_KEY) != null) {
|
||||
throw new ElasticSearchIllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain or use"
|
||||
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
|
||||
}
|
||||
this.updateOffsets = settings.getAsBoolean("update_offsets", false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new TrimFilter(tokenStream, updateOffsets);
|
||||
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||
return new TrimFilter(version, tokenStream);
|
||||
}
|
||||
return new TrimFilter(version, tokenStream, updateOffsets);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -186,7 +186,7 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
|
|||
// note, we don't wrap the return value with a BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs) because
|
||||
// we rely on our custom XFilteredQuery to do the wrapping if needed, so we don't have the wrap each
|
||||
// filter on its own
|
||||
return cacheValue == DocIdSet.EMPTY_DOCIDSET ? null : cacheValue;
|
||||
return DocIdSets.isEmpty(cacheValue) ? null : cacheValue;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
|
@ -66,7 +66,10 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
|
|||
* Called by Lucene. Same as {@link #onCommit(java.util.List)}.
|
||||
*/
|
||||
public void onInit(List<? extends IndexCommit> commits) throws IOException {
|
||||
onCommit(commits);
|
||||
if (!commits.isEmpty()) { // this might be empty if we create a new index.
|
||||
// the behavior has changed in Lucene 4.4 that calls onInit even with an empty commits list.
|
||||
onCommit(commits);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -74,6 +77,7 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
|
|||
* and delegates to the wrapped deletion policy.
|
||||
*/
|
||||
public void onCommit(List<? extends IndexCommit> commits) throws IOException {
|
||||
assert !commits.isEmpty() : "Commits must not be empty";
|
||||
synchronized (mutex) {
|
||||
List<SnapshotIndexCommit> snapshotCommits = wrapCommits(commits);
|
||||
primary.onCommit(snapshotCommits);
|
||||
|
@ -94,7 +98,8 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
|
|||
}
|
||||
this.commits = newCommits;
|
||||
// the last commit that is not deleted
|
||||
this.lastCommit = newCommits.get(newCommits.size() - 1);
|
||||
this.lastCommit = newCommits.get(newCommits.size() - 1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1273,7 +1273,6 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine {
|
|||
config.setReaderTermsIndexDivisor(termIndexDivisor);
|
||||
config.setMaxThreadStates(indexConcurrency);
|
||||
config.setCodec(codecService.codec(codecName));
|
||||
|
||||
indexWriter = new IndexWriter(store.directory(), config);
|
||||
} catch (IOException e) {
|
||||
safeClose(indexWriter);
|
||||
|
|
|
@ -19,10 +19,6 @@
|
|||
|
||||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
|
@ -34,11 +30,8 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
|
|||
/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */
|
||||
public class MultiOrdinals implements Ordinals {
|
||||
|
||||
// hard-coded in Lucene 4.3 but will be exposed in Lucene 4.4
|
||||
static {
|
||||
assert Lucene.VERSION == Version.LUCENE_43;
|
||||
}
|
||||
private static final int OFFSETS_PAGE_SIZE = 1024;
|
||||
private static final int OFFSET_INIT_PAGE_COUNT = 16;
|
||||
|
||||
/** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */
|
||||
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
|
||||
|
@ -62,7 +55,7 @@ public class MultiOrdinals implements Ordinals {
|
|||
multiValued = builder.getNumMultiValuesDocs() > 0;
|
||||
numOrds = builder.getNumOrds();
|
||||
endOffsets = new MonotonicAppendingLongBuffer();
|
||||
ords = new AppendingLongBuffer();
|
||||
ords = new AppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE);
|
||||
long lastEndOffset = 0;
|
||||
for (int i = 0; i < builder.maxDoc(); ++i) {
|
||||
final LongsRef docOrds = builder.docOrds(i);
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.XPagedGrowableWriter;
|
||||
import org.apache.lucene.util.packed.PagedGrowableWriter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
import java.io.Closeable;
|
||||
|
@ -117,13 +117,13 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
}
|
||||
|
||||
// Current position
|
||||
private XPagedGrowableWriter positions;
|
||||
private PagedGrowableWriter positions;
|
||||
// First level (0) of ordinals and pointers to the next level
|
||||
private final GrowableWriter firstOrdinals;
|
||||
private XPagedGrowableWriter firstNextLevelSlices;
|
||||
private PagedGrowableWriter firstNextLevelSlices;
|
||||
// Ordinals and pointers for other levels, starting at 1
|
||||
private final XPagedGrowableWriter[] ordinals;
|
||||
private final XPagedGrowableWriter[] nextLevelSlices;
|
||||
private final PagedGrowableWriter[] ordinals;
|
||||
private final PagedGrowableWriter[] nextLevelSlices;
|
||||
private final int[] sizes;
|
||||
|
||||
private final int startBitsPerValue;
|
||||
|
@ -132,11 +132,11 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) {
|
||||
this.startBitsPerValue = startBitsPerValue;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
positions = new XPagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||
positions = new PagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||
firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio);
|
||||
// over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc...
|
||||
ordinals = new XPagedGrowableWriter[24];
|
||||
nextLevelSlices = new XPagedGrowableWriter[24];
|
||||
ordinals = new PagedGrowableWriter[24];
|
||||
nextLevelSlices = new PagedGrowableWriter[24];
|
||||
sizes = new int[24];
|
||||
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
|
||||
}
|
||||
|
@ -146,7 +146,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
final long newSlice = sizes[level]++;
|
||||
// Lazily allocate ordinals
|
||||
if (ordinals[level] == null) {
|
||||
ordinals[level] = new XPagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||
ordinals[level] = new PagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||
} else {
|
||||
ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level));
|
||||
if (nextLevelSlices[level] != null) {
|
||||
|
@ -167,7 +167,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
} else {
|
||||
final long newSlice = newSlice(1);
|
||||
if (firstNextLevelSlices == null) {
|
||||
firstNextLevelSlices = new XPagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
|
||||
firstNextLevelSlices = new PagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
|
||||
}
|
||||
firstNextLevelSlices.set(docID, newSlice);
|
||||
final long offset = startOffset(1, newSlice);
|
||||
|
@ -183,7 +183,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
// reached the end of the slice, allocate a new one on the next level
|
||||
final long newSlice = newSlice(level + 1);
|
||||
if (nextLevelSlices[level] == null) {
|
||||
nextLevelSlices[level] = new XPagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
|
||||
nextLevelSlices[level] = new PagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
|
||||
}
|
||||
nextLevelSlices[level].set(sliceID(level, offset), newSlice);
|
||||
++level;
|
||||
|
|
|
@ -60,7 +60,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
|
|||
if (terms == null) {
|
||||
return FSTBytesAtomicFieldData.empty(reader.maxDoc());
|
||||
}
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||
final IntsRef scratch = new IntsRef();
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ public class CommitPoints implements Iterable<CommitPoint> {
|
|||
private final ImmutableList<CommitPoint> commitPoints;
|
||||
|
||||
public CommitPoints(List<CommitPoint> commitPoints) {
|
||||
CollectionUtil.quickSort(commitPoints, new Comparator<CommitPoint>() {
|
||||
CollectionUtil.introSort(commitPoints, new Comparator<CommitPoint>() {
|
||||
@Override
|
||||
public int compare(CommitPoint o1, CommitPoint o2) {
|
||||
return (o2.version() < o1.version() ? -1 : (o2.version() == o1.version() ? 0 : 1));
|
||||
|
|
|
@ -27,13 +27,11 @@ public abstract class AbstractMergePolicyProvider<MP extends MergePolicy> extend
|
|||
|
||||
public static final String INDEX_COMPOUND_FORMAT = "index.compound_format";
|
||||
|
||||
protected volatile boolean compoundFormat;
|
||||
protected volatile double noCFSRatio;
|
||||
|
||||
protected AbstractMergePolicyProvider(Store store) {
|
||||
super(store.shardId(), store.indexSettings());
|
||||
this.noCFSRatio = parseNoCFSRatio(indexSettings.get(INDEX_COMPOUND_FORMAT, Boolean.toString(store.suggestUseCompoundFile())));
|
||||
this.compoundFormat = noCFSRatio != 0.0;
|
||||
}
|
||||
|
||||
public static double parseNoCFSRatio(String noCFSRatio) {
|
||||
|
|
|
@ -83,7 +83,6 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
|
|||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
mergePolicy.setUseCompoundFile(compoundFormat);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
|
||||
policies.add(mergePolicy);
|
||||
|
@ -140,14 +139,11 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
|
|||
}
|
||||
|
||||
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogByteSizeMergePolicyProvider.this.noCFSRatio)));
|
||||
final boolean compoundFormat = noCFSRatio != 0.0;
|
||||
if (noCFSRatio != LogByteSizeMergePolicyProvider.this.noCFSRatio) {
|
||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogByteSizeMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||
LogByteSizeMergePolicyProvider.this.compoundFormat = compoundFormat;
|
||||
LogByteSizeMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||
for (CustomLogByteSizeMergePolicy policy : policies) {
|
||||
policy.setNoCFSRatio(noCFSRatio);
|
||||
policy.setUseCompoundFile(compoundFormat);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -86,7 +86,6 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
|
|||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
mergePolicy.setUseCompoundFile(compoundFormat);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
policies.add(mergePolicy);
|
||||
return mergePolicy;
|
||||
|
@ -130,11 +129,9 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
|
|||
final boolean compoundFormat = noCFSRatio != 0.0;
|
||||
if (noCFSRatio != LogDocMergePolicyProvider.this.noCFSRatio) {
|
||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogDocMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||
LogDocMergePolicyProvider.this.compoundFormat = compoundFormat;
|
||||
LogDocMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||
for (CustomLogDocMergePolicy policy : policies) {
|
||||
policy.setNoCFSRatio(noCFSRatio);
|
||||
policy.setUseCompoundFile(compoundFormat);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,7 +99,6 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
|
|||
} else {
|
||||
mergePolicy = new CustomTieredMergePolicyProvider(this);
|
||||
}
|
||||
mergePolicy.setUseCompoundFile(compoundFormat);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
|
||||
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
|
||||
|
@ -191,14 +190,11 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
|
|||
}
|
||||
|
||||
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
|
||||
final boolean compoundFormat = noCFSRatio != 0.0;
|
||||
if (noCFSRatio != TieredMergePolicyProvider.this.noCFSRatio) {
|
||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(TieredMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||
TieredMergePolicyProvider.this.compoundFormat = compoundFormat;
|
||||
TieredMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setNoCFSRatio(noCFSRatio);
|
||||
policy.setUseCompoundFile(compoundFormat);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,8 +59,7 @@ public class ConcurrentMergeSchedulerProvider extends MergeSchedulerProvider {
|
|||
@Override
|
||||
public MergeScheduler newMergeScheduler() {
|
||||
CustomConcurrentMergeScheduler concurrentMergeScheduler = new CustomConcurrentMergeScheduler(logger, shardId, this);
|
||||
concurrentMergeScheduler.setMaxMergeCount(maxMergeCount);
|
||||
concurrentMergeScheduler.setMaxThreadCount(maxThreadCount);
|
||||
concurrentMergeScheduler.setMaxMergesAndThreads(maxMergeCount, maxThreadCount);
|
||||
schedulers.add(concurrentMergeScheduler);
|
||||
return concurrentMergeScheduler;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.common.bytes.HashedBytesArray;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
import org.elasticsearch.common.lucene.search.ApplyAcceptedDocsFilter;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.common.lucene.search.TermFilter;
|
||||
|
@ -217,7 +218,7 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
|
|||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
DocIdSet parentsSet = parentFilter.getDocIdSet(context, acceptDocs);
|
||||
if (parentsSet == null || parentsSet == DocIdSet.EMPTY_DOCIDSET || remaining == 0) {
|
||||
if (DocIdSets.isEmpty(parentsSet) || remaining == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.search.child;
|
||||
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
|
||||
import gnu.trove.map.hash.TObjectFloatHashMap;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -212,7 +214,7 @@ public class ParentQuery extends Query implements SearchContext.Rewrite {
|
|||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, acceptDocs);
|
||||
if (childrenDocSet == null || childrenDocSet == DocIdSet.EMPTY_DOCIDSET) {
|
||||
if (DocIdSets.isEmpty(childrenDocSet)) {
|
||||
return null;
|
||||
}
|
||||
IdReaderTypeCache idTypeCache = searchContext.idCache().reader(context.reader()).type(parentType);
|
||||
|
|
|
@ -82,7 +82,6 @@ import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
|||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
@ -91,7 +90,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
||||
import org.elasticsearch.index.analysis.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -268,7 +266,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new NGramTokenizer(reader);
|
||||
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -280,7 +278,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new NGramTokenizer(reader);
|
||||
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -292,7 +290,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.DEFAULT_SIDE, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -304,7 +302,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.DEFAULT_SIDE, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -357,7 +355,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new TrimFilter(tokenStream, false);
|
||||
return new TrimFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -393,7 +391,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new LengthFilter(true, tokenStream, 0, Integer.MAX_VALUE);
|
||||
return new LengthFilter(Lucene.ANALYZER_VERSION, tokenStream, 0, Integer.MAX_VALUE);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -477,7 +475,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new NGramTokenFilter(tokenStream);
|
||||
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -489,7 +487,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new NGramTokenFilter(tokenStream);
|
||||
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -501,7 +499,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_SIDE, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
|
@ -513,7 +511,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_SIDE, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
}
|
||||
}));
|
||||
|
||||
|
|
|
@ -131,7 +131,7 @@ public class HotThreads {
|
|||
// sort by delta CPU time on thread.
|
||||
List<MyThreadInfo> hotties = new ArrayList<MyThreadInfo>(threadInfos.values());
|
||||
// skip that for now
|
||||
CollectionUtil.quickSort(hotties, new Comparator<MyThreadInfo>() {
|
||||
CollectionUtil.introSort(hotties, new Comparator<MyThreadInfo>() {
|
||||
public int compare(MyThreadInfo o1, MyThreadInfo o2) {
|
||||
if ("cpu".equals(type)) {
|
||||
return (int) (o2.cpuTime - o1.cpuTime);
|
||||
|
|
|
@ -142,7 +142,7 @@ public class PlainHighlighter implements Highlighter {
|
|||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
||||
}
|
||||
if (field.scoreOrdered()) {
|
||||
CollectionUtil.quickSort(fragsList, new Comparator<TextFragment>() {
|
||||
CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
|
||||
public int compare(TextFragment o1, TextFragment o2) {
|
||||
return Math.round(o2.getScore() - o1.getScore());
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ public final class FragmentBuilderHelper {
|
|||
* the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather
|
||||
* than in this hack... aka. "we are are working on in!" */
|
||||
final List<SubInfo> subInfos = fragInfo.getSubInfos();
|
||||
CollectionUtil.quickSort(subInfos, new Comparator<SubInfo>() {
|
||||
CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() {
|
||||
@Override
|
||||
public int compare(SubInfo o1, SubInfo o2) {
|
||||
int startOffset = o1.getTermsOffsets().get(0).getStartOffset();
|
||||
|
|
|
@ -17,22 +17,11 @@ package org.elasticsearch.search.rescore;
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.SorterTemplate;
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||
|
@ -40,6 +29,10 @@ import org.elasticsearch.index.query.ParsedQuery;
|
|||
import org.elasticsearch.search.internal.ContextIndexSearcher;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
final class QueryRescorer implements Rescorer {
|
||||
|
||||
public static final Rescorer INSTANCE = new QueryRescorer();
|
||||
|
@ -163,10 +156,10 @@ final class QueryRescorer implements Rescorer {
|
|||
private TopDocs merge(TopDocs primary, TopDocs secondary, QueryRescoreContext context) {
|
||||
DocIdSorter sorter = new DocIdSorter();
|
||||
sorter.array = primary.scoreDocs;
|
||||
sorter.mergeSort(0, sorter.array.length-1);
|
||||
sorter.sort(0, sorter.array.length);
|
||||
ScoreDoc[] primaryDocs = sorter.array;
|
||||
sorter.array = secondary.scoreDocs;
|
||||
sorter.mergeSort(0, sorter.array.length-1);
|
||||
sorter.sort(0, sorter.array.length);
|
||||
ScoreDoc[] secondaryDocs = sorter.array;
|
||||
int j = 0;
|
||||
float primaryWeight = context.queryWeight();
|
||||
|
@ -180,12 +173,12 @@ final class QueryRescorer implements Rescorer {
|
|||
}
|
||||
ScoreSorter scoreSorter = new ScoreSorter();
|
||||
scoreSorter.array = primaryDocs;
|
||||
scoreSorter.mergeSort(0, primaryDocs.length-1);
|
||||
scoreSorter.sort(0, primaryDocs.length);
|
||||
primary.setMaxScore(primaryDocs[0].score);
|
||||
return primary;
|
||||
}
|
||||
|
||||
private static final class DocIdSorter extends SorterTemplate {
|
||||
private static final class DocIdSorter extends IntroSorter {
|
||||
private ScoreDoc[] array;
|
||||
private ScoreDoc pivot;
|
||||
@Override
|
||||
|
@ -222,7 +215,7 @@ final class QueryRescorer implements Rescorer {
|
|||
return -1;
|
||||
}
|
||||
|
||||
private static final class ScoreSorter extends SorterTemplate {
|
||||
private static final class ScoreSorter extends IntroSorter {
|
||||
private ScoreDoc[] array;
|
||||
private ScoreDoc pivot;
|
||||
@Override
|
||||
|
|
|
@ -5,15 +5,15 @@
|
|||
"my_keep_filter":{
|
||||
"type":"keep",
|
||||
"keep_words" : ["Hello", "worlD"],
|
||||
"enable_position_increments" : true,
|
||||
"keep_words_case" : true
|
||||
},
|
||||
"my_case_sensitive_keep_filter":{
|
||||
"type":"keep",
|
||||
"keep_words" : ["Hello", "worlD"],
|
||||
"enable_position_increments" : false
|
||||
"enable_position_increments" : false,
|
||||
"version" : "4.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,73 +50,40 @@ public class MergePolicySettingsTest {
|
|||
public void testCompoundFileSettings() throws IOException {
|
||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||
|
||||
assertThat(new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
|
||||
}
|
||||
|
@ -150,57 +117,45 @@ public class MergePolicySettingsTest {
|
|||
{
|
||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||
TieredMergePolicyProvider mp = new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
|
||||
service.refreshSettings(build(1.0));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
|
||||
service.refreshSettings(build(0.1));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
||||
|
||||
service.refreshSettings(build(0.0));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
}
|
||||
|
||||
{
|
||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||
LogByteSizeMergePolicyProvider mp = new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
|
||||
service.refreshSettings(build(1.0));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
|
||||
service.refreshSettings(build(0.1));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
||||
|
||||
service.refreshSettings(build(0.0));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
}
|
||||
|
||||
{
|
||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||
LogDocMergePolicyProvider mp = new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
|
||||
service.refreshSettings(build(1.0));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||
|
||||
service.refreshSettings(build(0.1));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
||||
|
||||
service.refreshSettings(build(0.0));
|
||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue