[Lucene] Upgrade to Lucene 4.10

Closes #7584
This commit is contained in:
Robert Muir 2014-09-05 12:21:08 -04:00
parent 5df9c048fe
commit 223dab8921
129 changed files with 999 additions and 6692 deletions

View File

@ -31,7 +31,7 @@
</parent>
<properties>
<lucene.version>4.9.0</lucene.version>
<lucene.version>4.10.0</lucene.version>
<tests.jvms>auto</tests.jvms>
<tests.shuffle>true</tests.shuffle>
<tests.output>onerror</tests.output>

View File

@ -1,70 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis;
import java.io.Reader;
/**
* A simple analyzer wrapper, that doesn't allow to wrap components or reader. By disallowing
* it, it means that the thread local resources will be delegated to the wrapped analyzer, and not
* also be allocated on this analyzer.
*
* This solves the problem of per field analyzer wrapper, where it also maintains a thread local
* per field token stream components, while it can safely delegate those and not also hold these
* data structures, which can become expensive memory wise.
*/
public abstract class SimpleAnalyzerWrapper extends AnalyzerWrapper {
static {
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5803)";
}
public SimpleAnalyzerWrapper() {
super(new DelegatingReuseStrategy());
((DelegatingReuseStrategy) getReuseStrategy()).wrapper = this;
}
@Override
protected final TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return super.wrapComponents(fieldName, components);
}
@Override
protected final Reader wrapReader(String fieldName, Reader reader) {
return super.wrapReader(fieldName, reader);
}
private static class DelegatingReuseStrategy extends ReuseStrategy {
AnalyzerWrapper wrapper;
@Override
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
return wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName);
}
@Override
public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
wrappedAnalyzer.getReuseStrategy().setReusableComponents(wrappedAnalyzer, fieldName, components);
}
}
}

View File

@ -1,128 +0,0 @@
package org.apache.lucene.expressions;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.search.SortField;
/**
* Simple class that binds expression variable names to {@link SortField}s
* or other {@link Expression}s.
* <p>
* Example usage:
* <pre class="prettyprint">
* XSimpleBindings bindings = new XSimpleBindings();
* // document's text relevance score
* bindings.add(new SortField("_score", SortField.Type.SCORE));
* // integer NumericDocValues field (or from FieldCache)
* bindings.add(new SortField("popularity", SortField.Type.INT));
* // another expression
* bindings.add("recency", myRecencyExpression);
*
* // create a sort field in reverse order
* Sort sort = new Sort(expr.getSortField(bindings, true));
* </pre>
*
* @lucene.experimental
*/
public final class XSimpleBindings extends Bindings {
static {
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5806)";
}
final Map<String,Object> map = new HashMap<>();
/** Creates a new empty Bindings */
public XSimpleBindings() {}
/**
* Adds a SortField to the bindings.
* <p>
* This can be used to reference a DocValuesField, a field from
* FieldCache, the document's score, etc.
*/
public void add(SortField sortField) {
map.put(sortField.getField(), sortField);
}
/**
* Bind a {@link ValueSource} directly to the given name.
*/
public void add(String name, ValueSource source) { map.put(name, source); }
/**
* Adds an Expression to the bindings.
* <p>
* This can be used to reference expressions from other expressions.
*/
public void add(String name, Expression expression) {
map.put(name, expression);
}
@Override
public ValueSource getValueSource(String name) {
Object o = map.get(name);
if (o == null) {
throw new IllegalArgumentException("Invalid reference '" + name + "'");
} else if (o instanceof Expression) {
return ((Expression)o).getValueSource(this);
} else if (o instanceof ValueSource) {
return ((ValueSource)o);
}
SortField field = (SortField) o;
switch(field.getType()) {
case INT:
return new IntFieldSource(field.getField());
case LONG:
return new LongFieldSource(field.getField());
case FLOAT:
return new FloatFieldSource(field.getField());
case DOUBLE:
return new DoubleFieldSource(field.getField());
case SCORE:
return getScoreValueSource();
default:
throw new UnsupportedOperationException();
}
}
/**
* Traverses the graph of bindings, checking there are no cycles or missing references
* @throws IllegalArgumentException if the bindings is inconsistent
*/
public void validate() {
for (Object o : map.values()) {
if (o instanceof Expression) {
Expression expr = (Expression) o;
try {
expr.getValueSource(this);
} catch (StackOverflowError e) {
throw new IllegalArgumentException("Recursion Error: Cycle detected originating in (" + expr.sourceText + ")");
}
}
}
}
}

View File

@ -1,614 +0,0 @@
package org.apache.lucene.expressions.js;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Properties;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.Tree;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.util.IOUtils;
import org.objectweb.asm.ClassWriter;
import org.objectweb.asm.Label;
import org.objectweb.asm.Opcodes;
import org.objectweb.asm.Type;
import org.objectweb.asm.commons.GeneratorAdapter;
/**
* An expression compiler for javascript expressions.
* <p>
* Example:
* <pre class="prettyprint">
* Expression foo = XJavascriptCompiler.compile("((0.3*popularity)/10.0)+(0.7*score)");
* </pre>
* <p>
* See the {@link org.apache.lucene.expressions.js package documentation} for
* the supported syntax and default functions.
* <p>
* You can compile with an alternate set of functions via {@link #compile(String, Map, ClassLoader)}.
* For example:
* <pre class="prettyprint">
* Map&lt;String,Method&gt; functions = new HashMap&lt;&gt;();
* // add all the default functions
* functions.putAll(XJavascriptCompiler.DEFAULT_FUNCTIONS);
* // add cbrt()
* functions.put("cbrt", Math.class.getMethod("cbrt", double.class));
* // call compile with customized function map
* Expression foo = XJavascriptCompiler.compile("cbrt(score)+ln(popularity)",
* functions,
* getClass().getClassLoader());
* </pre>
*
* @lucene.experimental
*/
public class XJavascriptCompiler {
static {
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5806)";
}
static final class Loader extends ClassLoader {
Loader(ClassLoader parent) {
super(parent);
}
public Class<? extends Expression> define(String className, byte[] bytecode) {
return defineClass(className, bytecode, 0, bytecode.length).asSubclass(Expression.class);
}
}
private static final int CLASSFILE_VERSION = Opcodes.V1_7;
// We use the same class name for all generated classes as they all have their own class loader.
// The source code is displayed as "source file name" in stack trace.
private static final String COMPILED_EXPRESSION_CLASS = XJavascriptCompiler.class.getName() + "$CompiledExpression";
private static final String COMPILED_EXPRESSION_INTERNAL = COMPILED_EXPRESSION_CLASS.replace('.', '/');
private static final Type EXPRESSION_TYPE = Type.getType(Expression.class);
private static final Type FUNCTION_VALUES_TYPE = Type.getType(FunctionValues.class);
private static final org.objectweb.asm.commons.Method
EXPRESSION_CTOR = getMethod("void <init>(String, String[])"),
EVALUATE_METHOD = getMethod("double evaluate(int, " + FunctionValues.class.getName() + "[])"),
DOUBLE_VAL_METHOD = getMethod("double doubleVal(int)");
// to work around import clash:
private static org.objectweb.asm.commons.Method getMethod(String method) {
return org.objectweb.asm.commons.Method.getMethod(method);
}
// This maximum length is theoretically 65535 bytes, but as its CESU-8 encoded we dont know how large it is in bytes, so be safe
// rcmuir: "If your ranking function is that large you need to check yourself into a mental institution!"
private static final int MAX_SOURCE_LENGTH = 16384;
private final String sourceText;
private final Map<String, Integer> externalsMap = new LinkedHashMap<>();
private final ClassWriter classWriter = new ClassWriter(ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS);
private GeneratorAdapter gen;
private final Map<String,Method> functions;
/**
* Compiles the given expression.
*
* @param sourceText The expression to compile
* @return A new compiled expression
* @throws ParseException on failure to compile
*/
public static Expression compile(String sourceText) throws ParseException {
return new XJavascriptCompiler(sourceText).compileExpression(XJavascriptCompiler.class.getClassLoader());
}
/**
* Compiles the given expression with the supplied custom functions.
* <p>
* Functions must be {@code public static}, return {@code double} and
* can take from zero to 256 {@code double} parameters.
*
* @param sourceText The expression to compile
* @param functions map of String names to functions
* @param parent a {@code ClassLoader} that should be used as the parent of the loaded class.
* It must contain all classes referred to by the given {@code functions}.
* @return A new compiled expression
* @throws ParseException on failure to compile
*/
public static Expression compile(String sourceText, Map<String,Method> functions, ClassLoader parent) throws ParseException {
if (parent == null) {
throw new NullPointerException("A parent ClassLoader must be given.");
}
for (Method m : functions.values()) {
checkFunction(m, parent);
}
return new XJavascriptCompiler(sourceText, functions).compileExpression(parent);
}
/**
* This method is unused, it is just here to make sure that the function signatures don't change.
* If this method fails to compile, you also have to change the byte code generator to correctly
* use the FunctionValues class.
*/
@SuppressWarnings({"unused", "null"})
private static void unusedTestCompile() {
FunctionValues f = null;
double ret = f.doubleVal(2);
}
/**
* Constructs a compiler for expressions.
* @param sourceText The expression to compile
*/
private XJavascriptCompiler(String sourceText) {
this(sourceText, DEFAULT_FUNCTIONS);
}
/**
* Constructs a compiler for expressions with specific set of functions
* @param sourceText The expression to compile
*/
private XJavascriptCompiler(String sourceText, Map<String,Method> functions) {
if (sourceText == null) {
throw new NullPointerException();
}
this.sourceText = sourceText;
this.functions = functions;
}
/**
* Compiles the given expression with the specified parent classloader
*
* @return A new compiled expression
* @throws ParseException on failure to compile
*/
private Expression compileExpression(ClassLoader parent) throws ParseException {
try {
Tree antlrTree = getAntlrComputedExpressionTree();
beginCompile();
recursiveCompile(antlrTree, Type.DOUBLE_TYPE);
endCompile();
Class<? extends Expression> evaluatorClass = new Loader(parent)
.define(COMPILED_EXPRESSION_CLASS, classWriter.toByteArray());
Constructor<? extends Expression> constructor = evaluatorClass.getConstructor(String.class, String[].class);
return constructor.newInstance(sourceText, externalsMap.keySet().toArray(new String[externalsMap.size()]));
} catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException exception) {
throw new IllegalStateException("An internal error occurred attempting to compile the expression (" + sourceText + ").", exception);
}
}
private void beginCompile() {
classWriter.visit(CLASSFILE_VERSION,
Opcodes.ACC_PUBLIC | Opcodes.ACC_SUPER | Opcodes.ACC_FINAL | Opcodes.ACC_SYNTHETIC,
COMPILED_EXPRESSION_INTERNAL,
null, EXPRESSION_TYPE.getInternalName(), null);
String clippedSourceText = (sourceText.length() <= MAX_SOURCE_LENGTH) ?
sourceText : (sourceText.substring(0, MAX_SOURCE_LENGTH - 3) + "...");
classWriter.visitSource(clippedSourceText, null);
GeneratorAdapter constructor = new GeneratorAdapter(Opcodes.ACC_PUBLIC | Opcodes.ACC_SYNTHETIC,
EXPRESSION_CTOR, null, null, classWriter);
constructor.loadThis();
constructor.loadArgs();
constructor.invokeConstructor(EXPRESSION_TYPE, EXPRESSION_CTOR);
constructor.returnValue();
constructor.endMethod();
gen = new GeneratorAdapter(Opcodes.ACC_PUBLIC | Opcodes.ACC_SYNTHETIC,
EVALUATE_METHOD, null, null, classWriter);
}
private void recursiveCompile(Tree current, Type expected) {
int type = current.getType();
String text = current.getText();
switch (type) {
case XJavascriptParser.AT_CALL:
Tree identifier = current.getChild(0);
String call = identifier.getText();
int arguments = current.getChildCount() - 1;
Method method = functions.get(call);
if (method == null) {
throw new IllegalArgumentException("Unrecognized method call (" + call + ").");
}
int arity = method.getParameterTypes().length;
if (arguments != arity) {
throw new IllegalArgumentException("Expected (" + arity + ") arguments for method call (" +
call + "), but found (" + arguments + ").");
}
for (int argument = 1; argument <= arguments; ++argument) {
recursiveCompile(current.getChild(argument), Type.DOUBLE_TYPE);
}
gen.invokeStatic(Type.getType(method.getDeclaringClass()),
org.objectweb.asm.commons.Method.getMethod(method));
gen.cast(Type.DOUBLE_TYPE, expected);
break;
case XJavascriptParser.VARIABLE:
int index;
// normalize quotes
text = normalizeQuotes(text);
if (externalsMap.containsKey(text)) {
index = externalsMap.get(text);
} else {
index = externalsMap.size();
externalsMap.put(text, index);
}
gen.loadArg(1);
gen.push(index);
gen.arrayLoad(FUNCTION_VALUES_TYPE);
gen.loadArg(0);
gen.invokeVirtual(FUNCTION_VALUES_TYPE, DOUBLE_VAL_METHOD);
gen.cast(Type.DOUBLE_TYPE, expected);
break;
case XJavascriptParser.HEX:
pushLong(expected, Long.parseLong(text.substring(2), 16));
break;
case XJavascriptParser.OCTAL:
pushLong(expected, Long.parseLong(text.substring(1), 8));
break;
case XJavascriptParser.DECIMAL:
gen.push(Double.parseDouble(text));
gen.cast(Type.DOUBLE_TYPE, expected);
break;
case XJavascriptParser.AT_NEGATE:
recursiveCompile(current.getChild(0), Type.DOUBLE_TYPE);
gen.visitInsn(Opcodes.DNEG);
gen.cast(Type.DOUBLE_TYPE, expected);
break;
case XJavascriptParser.AT_ADD:
pushArith(Opcodes.DADD, current, expected);
break;
case XJavascriptParser.AT_SUBTRACT:
pushArith(Opcodes.DSUB, current, expected);
break;
case XJavascriptParser.AT_MULTIPLY:
pushArith(Opcodes.DMUL, current, expected);
break;
case XJavascriptParser.AT_DIVIDE:
pushArith(Opcodes.DDIV, current, expected);
break;
case XJavascriptParser.AT_MODULO:
pushArith(Opcodes.DREM, current, expected);
break;
case XJavascriptParser.AT_BIT_SHL:
pushShift(Opcodes.LSHL, current, expected);
break;
case XJavascriptParser.AT_BIT_SHR:
pushShift(Opcodes.LSHR, current, expected);
break;
case XJavascriptParser.AT_BIT_SHU:
pushShift(Opcodes.LUSHR, current, expected);
break;
case XJavascriptParser.AT_BIT_AND:
pushBitwise(Opcodes.LAND, current, expected);
break;
case XJavascriptParser.AT_BIT_OR:
pushBitwise(Opcodes.LOR, current, expected);
break;
case XJavascriptParser.AT_BIT_XOR:
pushBitwise(Opcodes.LXOR, current, expected);
break;
case XJavascriptParser.AT_BIT_NOT:
recursiveCompile(current.getChild(0), Type.LONG_TYPE);
gen.push(-1L);
gen.visitInsn(Opcodes.LXOR);
gen.cast(Type.LONG_TYPE, expected);
break;
case XJavascriptParser.AT_COMP_EQ:
pushCond(GeneratorAdapter.EQ, current, expected);
break;
case XJavascriptParser.AT_COMP_NEQ:
pushCond(GeneratorAdapter.NE, current, expected);
break;
case XJavascriptParser.AT_COMP_LT:
pushCond(GeneratorAdapter.LT, current, expected);
break;
case XJavascriptParser.AT_COMP_GT:
pushCond(GeneratorAdapter.GT, current, expected);
break;
case XJavascriptParser.AT_COMP_LTE:
pushCond(GeneratorAdapter.LE, current, expected);
break;
case XJavascriptParser.AT_COMP_GTE:
pushCond(GeneratorAdapter.GE, current, expected);
break;
case XJavascriptParser.AT_BOOL_NOT:
Label labelNotTrue = new Label();
Label labelNotReturn = new Label();
recursiveCompile(current.getChild(0), Type.INT_TYPE);
gen.visitJumpInsn(Opcodes.IFEQ, labelNotTrue);
pushBoolean(expected, false);
gen.goTo(labelNotReturn);
gen.visitLabel(labelNotTrue);
pushBoolean(expected, true);
gen.visitLabel(labelNotReturn);
break;
case XJavascriptParser.AT_BOOL_AND:
Label andFalse = new Label();
Label andEnd = new Label();
recursiveCompile(current.getChild(0), Type.INT_TYPE);
gen.visitJumpInsn(Opcodes.IFEQ, andFalse);
recursiveCompile(current.getChild(1), Type.INT_TYPE);
gen.visitJumpInsn(Opcodes.IFEQ, andFalse);
pushBoolean(expected, true);
gen.goTo(andEnd);
gen.visitLabel(andFalse);
pushBoolean(expected, false);
gen.visitLabel(andEnd);
break;
case XJavascriptParser.AT_BOOL_OR:
Label orTrue = new Label();
Label orEnd = new Label();
recursiveCompile(current.getChild(0), Type.INT_TYPE);
gen.visitJumpInsn(Opcodes.IFNE, orTrue);
recursiveCompile(current.getChild(1), Type.INT_TYPE);
gen.visitJumpInsn(Opcodes.IFNE, orTrue);
pushBoolean(expected, false);
gen.goTo(orEnd);
gen.visitLabel(orTrue);
pushBoolean(expected, true);
gen.visitLabel(orEnd);
break;
case XJavascriptParser.AT_COND_QUE:
Label condFalse = new Label();
Label condEnd = new Label();
recursiveCompile(current.getChild(0), Type.INT_TYPE);
gen.visitJumpInsn(Opcodes.IFEQ, condFalse);
recursiveCompile(current.getChild(1), expected);
gen.goTo(condEnd);
gen.visitLabel(condFalse);
recursiveCompile(current.getChild(2), expected);
gen.visitLabel(condEnd);
break;
default:
throw new IllegalStateException("Unknown operation specified: (" + current.getText() + ").");
}
}
private void pushArith(int operator, Tree current, Type expected) {
pushBinaryOp(operator, current, expected, Type.DOUBLE_TYPE, Type.DOUBLE_TYPE, Type.DOUBLE_TYPE);
}
private void pushShift(int operator, Tree current, Type expected) {
pushBinaryOp(operator, current, expected, Type.LONG_TYPE, Type.INT_TYPE, Type.LONG_TYPE);
}
private void pushBitwise(int operator, Tree current, Type expected) {
pushBinaryOp(operator, current, expected, Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
}
private void pushBinaryOp(int operator, Tree current, Type expected, Type arg1, Type arg2, Type returnType) {
recursiveCompile(current.getChild(0), arg1);
recursiveCompile(current.getChild(1), arg2);
gen.visitInsn(operator);
gen.cast(returnType, expected);
}
private void pushCond(int operator, Tree current, Type expected) {
Label labelTrue = new Label();
Label labelReturn = new Label();
recursiveCompile(current.getChild(0), Type.DOUBLE_TYPE);
recursiveCompile(current.getChild(1), Type.DOUBLE_TYPE);
gen.ifCmp(Type.DOUBLE_TYPE, operator, labelTrue);
pushBoolean(expected, false);
gen.goTo(labelReturn);
gen.visitLabel(labelTrue);
pushBoolean(expected, true);
gen.visitLabel(labelReturn);
}
private void pushBoolean(Type expected, boolean truth) {
switch (expected.getSort()) {
case Type.INT:
gen.push(truth);
break;
case Type.LONG:
gen.push(truth ? 1L : 0L);
break;
case Type.DOUBLE:
gen.push(truth ? 1. : 0.);
break;
default:
throw new IllegalStateException("Invalid expected type: " + expected);
}
}
private void pushLong(Type expected, long i) {
switch (expected.getSort()) {
case Type.INT:
gen.push((int) i);
break;
case Type.LONG:
gen.push(i);
break;
case Type.DOUBLE:
gen.push((double) i);
break;
default:
throw new IllegalStateException("Invalid expected type: " + expected);
}
}
private void endCompile() {
gen.returnValue();
gen.endMethod();
classWriter.visitEnd();
}
private Tree getAntlrComputedExpressionTree() throws ParseException {
CharStream input = new ANTLRStringStream(sourceText);
XJavascriptLexer lexer = new XJavascriptLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
XJavascriptParser parser = new XJavascriptParser(tokens);
try {
return parser.expression().tree;
} catch (RecognitionException exception) {
throw new IllegalArgumentException(exception);
} catch (RuntimeException exception) {
if (exception.getCause() instanceof ParseException) {
throw (ParseException)exception.getCause();
}
throw exception;
}
}
private static String normalizeQuotes(String text) {
StringBuilder out = new StringBuilder(text.length());
boolean inDoubleQuotes = false;
for (int i = 0; i < text.length(); ++i) {
char c = text.charAt(i);
if (c == '\\') {
c = text.charAt(++i);
if (c == '\\') {
out.append('\\'); // re-escape the backslash
}
// no escape for double quote
} else if (c == '\'') {
if (inDoubleQuotes) {
// escape in output
out.append('\\');
} else {
int j = findSingleQuoteStringEnd(text, i);
out.append(text, i, j); // copy up to end quote (leave end for append below)
i = j;
}
} else if (c == '"') {
c = '\''; // change beginning/ending doubles to singles
inDoubleQuotes = !inDoubleQuotes;
}
out.append(c);
}
return out.toString();
}
private static int findSingleQuoteStringEnd(String text, int start) {
++start; // skip beginning
while (text.charAt(start) != '\'') {
if (text.charAt(start) == '\\') {
++start; // blindly consume escape value
}
++start;
}
return start;
}
/**
* The default set of functions available to expressions.
* <p>
* See the {@link org.apache.lucene.expressions.js package documentation}
* for a list.
*/
public static final Map<String,Method> DEFAULT_FUNCTIONS;
static {
Map<String,Method> map = new HashMap<>();
try {
final Properties props = new Properties();
try (Reader in = IOUtils.getDecodingReader(JavascriptCompiler.class,
JavascriptCompiler.class.getSimpleName() + ".properties", StandardCharsets.UTF_8)) {
props.load(in);
}
for (final String call : props.stringPropertyNames()) {
final String[] vals = props.getProperty(call).split(",");
if (vals.length != 3) {
throw new Error("Syntax error while reading Javascript functions from resource");
}
final Class<?> clazz = Class.forName(vals[0].trim());
final String methodName = vals[1].trim();
final int arity = Integer.parseInt(vals[2].trim());
@SuppressWarnings({"rawtypes", "unchecked"}) Class[] args = new Class[arity];
Arrays.fill(args, double.class);
Method method = clazz.getMethod(methodName, args);
checkFunction(method, JavascriptCompiler.class.getClassLoader());
map.put(call, method);
}
} catch (NoSuchMethodException | ClassNotFoundException | IOException e) {
throw new Error("Cannot resolve function", e);
}
DEFAULT_FUNCTIONS = Collections.unmodifiableMap(map);
}
private static void checkFunction(Method method, ClassLoader parent) {
// We can only call the function if the given parent class loader of our compiled class has access to the method:
final ClassLoader functionClassloader = method.getDeclaringClass().getClassLoader();
if (functionClassloader != null) { // it is a system class iff null!
boolean found = false;
while (parent != null) {
if (parent == functionClassloader) {
found = true;
break;
}
parent = parent.getParent();
}
if (!found) {
throw new IllegalArgumentException(method + " is not declared by a class which is accessible by the given parent ClassLoader.");
}
}
// do some checks if the signature is "compatible":
if (!Modifier.isStatic(method.getModifiers())) {
throw new IllegalArgumentException(method + " is not static.");
}
if (!Modifier.isPublic(method.getModifiers())) {
throw new IllegalArgumentException(method + " is not public.");
}
if (!Modifier.isPublic(method.getDeclaringClass().getModifiers())) {
throw new IllegalArgumentException(method.getDeclaringClass().getName() + " is not public.");
}
for (Class<?> clazz : method.getParameterTypes()) {
if (!clazz.equals(double.class)) {
throw new IllegalArgumentException(method + " must take only double parameters");
}
}
if (method.getReturnType() != double.class) {
throw new IllegalArgumentException(method + " does not return a double.");
}
}
}

View File

@ -1,106 +0,0 @@
package org.apache.lucene.expressions.js;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
/**
* A helper to parse the context of a variable name, which is the base variable, followed by the
* sequence of array (integer or string indexed) and member accesses.
*/
public class XVariableContext {
static {
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5806)";
}
public static enum Type {
MEMBER, // "dot" access
STR_INDEX, // brackets with a string
INT_INDEX // brackets with a positive integer
}
public final Type type;
public final String text;
public final int integer;
private XVariableContext(Type c, String s, int i) {
type = c;
text = s;
integer = i;
}
/**
* Parses a normalized javascript variable. All strings in the variable should be single quoted,
* and no spaces (except possibly within strings).
*/
public static final XVariableContext[] parse(String variable) {
char[] text = variable.toCharArray();
List<XVariableContext> contexts = new ArrayList<>();
int i = addMember(text, 0, contexts); // base variable is a "member" of the global namespace
while (i < text.length) {
if (text[i] == '[') {
if (text[++i] == '\'') {
i = addStringIndex(text, i, contexts);
} else {
i = addIntIndex(text, i, contexts);
}
++i; // move past end bracket
} else { // text[i] == '.', ie object member
i = addMember(text, i + 1, contexts);
}
}
return contexts.toArray(new XVariableContext[contexts.size()]);
}
// i points to start of member name
private static int addMember(final char[] text, int i, List<XVariableContext> contexts) {
int j = i + 1;
while (j < text.length && text[j] != '[' && text[j] != '.') ++j; // find first array or member access
contexts.add(new XVariableContext(Type.MEMBER, new String(text, i, j - i), -1));
return j;
}
// i points to start of single quoted index
private static int addStringIndex(final char[] text, int i, List<XVariableContext> contexts) {
++i; // move past quote
int j = i;
while (text[j] != '\'') { // find end of single quoted string
if (text[j] == '\\') ++j; // skip over escapes
++j;
}
StringBuffer buf = new StringBuffer(j - i); // space for string, without end quote
while (i < j) { // copy string to buffer (without begin/end quotes)
if (text[i] == '\\') ++i; // unescape escapes
buf.append(text[i]);
++i;
}
contexts.add(new XVariableContext(Type.STR_INDEX, buf.toString(), -1));
return j + 1; // move past quote, return end bracket location
}
// i points to start of integer index
private static int addIntIndex(final char[] text, int i, List<XVariableContext> contexts) {
int j = i + 1;
while (text[j] != ']') ++j; // find end of array access
int index = Integer.parseInt(new String(text, i, j - i));
contexts.add(new XVariableContext(Type.INT_INDEX, null, index));
return j ;
}
}

View File

@ -1,306 +0,0 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
/** maps per-segment ordinals to/from global ordinal space */
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
// TODO: use more efficient packed ints structures?
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
public class XOrdinalMap implements Accountable {
static {
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5780, LUCENE-5782)";
}
private static class SegmentMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
private static int[] map(final long[] weights) {
final int[] newToOld = new int[weights.length];
for (int i = 0; i < weights.length; ++i) {
newToOld[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = newToOld[i];
newToOld[i] = newToOld[j];
newToOld[j] = tmp;
}
@Override
protected int compare(int i, int j) {
// j first since we actually want higher weights first
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
}
}.sort(0, weights.length);
return newToOld;
}
/** Inverse the map. */
private static int[] inverse(int[] map) {
final int[] inverse = new int[map.length];
for (int i = 0; i < map.length; ++i) {
inverse[map[i]] = i;
}
return inverse;
}
private final int[] newToOld, oldToNew;
SegmentMap(long[] weights) {
newToOld = map(weights);
oldToNew = inverse(newToOld);
assert Arrays.equals(newToOld, inverse(oldToNew));
}
int newToOld(int segment) {
return newToOld[segment];
}
int oldToNew(int segment) {
return oldToNew[segment];
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
}
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedDocValues} instance as a weight.
* @see #build(Object, TermsEnum[], long[], float)
*/
public static XOrdinalMap build(Object owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedSetDocValues} instance as a weight.
* @see #build(Object, TermsEnum[], long[], float)
*/
public static XOrdinalMap build(Object owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Creates an ordinal map that allows mapping ords to/from a merged
* space from <code>subs</code>.
* @param owner a cache key
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
* not be dense (e.g. can be FilteredTermsEnums}.
* @param weights a weight for each sub. This is ideally correlated with
* the number of unique terms that each sub introduces compared
* to the other subs
* @throws IOException if an I/O error occurred.
*/
public static XOrdinalMap build(Object owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
if (subs.length != weights.length) {
throw new IllegalArgumentException("subs and weights must have the same length");
}
// enums are not sorted, so let's sort to save memory
final SegmentMap segmentMap = new SegmentMap(weights);
return new XOrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
}
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(XOrdinalMap.class);
// cache key of whoever asked for this awful thing
final Object owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
final MonotonicAppendingLongBuffer globalOrdDeltas;
// globalOrd -> first segment container
final AppendingPackedLongBuffer firstSegments;
// for every segment, segmentOrd -> globalOrd
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
final SegmentMap segmentMap;
// ram usage
final long ramBytesUsed;
XOrdinalMap(Object owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
this.segmentMap = segmentMap;
// even though we accept an overhead ratio, we keep these ones with COMPACT
// since they are only used to resolve values given a global ord, which is
// slow anyway
globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long segmentOrds[] = new long[subs.length];
ReaderSlice slices[] = new ReaderSlice[subs.length];
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
for (int i = 0; i < slices.length; i++) {
slices[i] = new ReaderSlice(0, 0, i);
indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
}
MultiTermsEnum mte = new MultiTermsEnum(slices);
mte.reset(indexes);
long globalOrd = 0;
while (mte.next() != null) {
TermsEnumWithSlice matches[] = mte.getMatchArray();
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
for (int i = 0; i < mte.getMatchCount(); i++) {
int segmentIndex = matches[i].index;
long segmentOrd = matches[i].terms.ord();
long delta = globalOrd - segmentOrd;
// We compute the least segment where the term occurs. In case the
// first segment contains most (or better all) values, this will
// help save significant memory
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
// for each per-segment ord, map it back to the global term.
while (segmentOrds[segmentIndex] <= segmentOrd) {
ordDeltaBits[segmentIndex] |= delta;
ordDeltas[segmentIndex].add(delta);
segmentOrds[segmentIndex]++;
}
}
// for each unique term, just mark the first segment index/delta where it occurs
assert firstSegmentIndex < segmentOrds.length;
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
firstSegments.freeze();
globalOrdDeltas.freeze();
for (int i = 0; i < ordDeltas.length; ++i) {
ordDeltas[i].freeze();
}
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
segmentToGlobalOrds = new LongValues[subs.length];
long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed()
+ firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
final MonotonicAppendingLongBuffer deltas = ordDeltas[i];
if (ordDeltaBits[i] == 0L) {
// segment ords perfectly match global ordinals
// likely in case of low cardinalities and large segments
segmentToGlobalOrds[i] = LongValues.IDENTITY;
} else {
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
final long monotonicBits = deltas.ramBytesUsed() * 8;
final long packedBits = bitsRequired * deltas.size();
if (deltas.size() <= Integer.MAX_VALUE
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
assert !it.hasNext();
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + newDeltas.get((int) ord);
}
};
ramBytesUsed += newDeltas.ramBytesUsed();
} else {
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + deltas.get(ord);
}
};
ramBytesUsed += deltas.ramBytesUsed();
}
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
}
}
this.ramBytesUsed = ramBytesUsed;
}
/**
* Given a segment number, return a {@link LongValues} instance that maps
* segment ordinals to global ordinals.
*/
public LongValues getGlobalOrds(int segmentIndex) {
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
}
/**
* Given global ordinal, returns the ordinal of the first segment which contains
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
*/
public long getFirstSegmentOrd(long globalOrd) {
return globalOrd - globalOrdDeltas.get(globalOrd);
}
/**
* Given a global ordinal, returns the index of the first
* segment that contains this term.
*/
public int getFirstSegmentNumber(long globalOrd) {
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
}
/**
* Returns the total number of unique terms in global ord space.
*/
public long getValueCount() {
return globalOrdDeltas.size();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
}

View File

@ -26,7 +26,9 @@ import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.*;
import org.apache.lucene.util.*;
import org.apache.lucene.util.automaton.*;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.*;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.PairOutputs.Pair;
@ -254,67 +256,95 @@ public class XAnalyzingSuggester extends Lookup {
return fst == null ? 0 : fst.ramBytesUsed();
}
private static void copyDestTransitions(State from, State to, List<Transition> transitions) {
if (to.isAccept()) {
from.setAccept(true);
}
for(Transition t : to.getTransitions()) {
transitions.add(t);
}
}
// Replaces SEP with epsilon or remaps them if
// we were asked to preserve them:
private static void replaceSep(Automaton a, boolean preserveSep, int replaceSep) {
private Automaton replaceSep(Automaton a) {
State[] states = a.getNumberedStates();
Automaton result = new Automaton();
// Go in reverse topo sort so we know we only have to
// make one pass:
for(int stateNumber=states.length-1;stateNumber >=0;stateNumber--) {
final State state = states[stateNumber];
List<Transition> newTransitions = new ArrayList<>();
for(Transition t : state.getTransitions()) {
assert t.getMin() == t.getMax();
if (t.getMin() == TokenStreamToAutomaton.POS_SEP) {
if (preserveSep) {
// Remap to SEP_LABEL:
newTransitions.add(new Transition(replaceSep, t.getDest()));
// Copy all states over
int numStates = a.getNumStates();
for(int s=0;s<numStates;s++) {
result.createState();
result.setAccept(s, a.isAccept(s));
}
// Go in reverse topo sort so we know we only have to
// make one pass:
Transition t = new Transition();
int[] topoSortStates = topoSortStates(a);
for(int i=0;i<topoSortStates.length;i++) {
int state = topoSortStates[topoSortStates.length-1-i];
int count = a.initTransition(state, t);
for(int j=0;j<count;j++) {
a.getNextTransition(t);
if (t.min == TokenStreamToAutomaton.POS_SEP) {
assert t.max == TokenStreamToAutomaton.POS_SEP;
if (preserveSep) {
// Remap to SEP_LABEL:
result.addTransition(state, t.dest, SEP_LABEL);
} else {
result.addEpsilon(state, t.dest);
}
} else if (t.min == TokenStreamToAutomaton.HOLE) {
assert t.max == TokenStreamToAutomaton.HOLE;
// Just remove the hole: there will then be two
// SEP tokens next to each other, which will only
// match another hole at search time. Note that
// it will also match an empty-string token ... if
// that's somehow a problem we can always map HOLE
// to a dedicated byte (and escape it in the
// input).
result.addEpsilon(state, t.dest);
} else {
copyDestTransitions(state, t.getDest(), newTransitions);
a.setDeterministic(false);
result.addTransition(state, t.dest, t.min, t.max);
}
} else if (t.getMin() == TokenStreamToAutomaton.HOLE) {
// Just remove the hole: there will then be two
// SEP tokens next to each other, which will only
// match another hole at search time. Note that
// it will also match an empty-string token ... if
// that's somehow a problem we can always map HOLE
// to a dedicated byte (and escape it in the
// input).
copyDestTransitions(state, t.getDest(), newTransitions);
a.setDeterministic(false);
} else {
newTransitions.add(t);
}
}
state.setTransitions(newTransitions.toArray(new Transition[newTransitions.size()]));
}
result.finishState();
return result;
}
protected Automaton convertAutomaton(Automaton a) {
if (queryPrefix != null) {
a = Automaton.concatenate(Arrays.asList(queryPrefix, a));
BasicOperations.determinize(a);
a = Operations.concatenate(Arrays.asList(queryPrefix, a));
a = Operations.determinize(a);
}
return a;
}
private int[] topoSortStates(Automaton a) {
int[] states = new int[a.getNumStates()];
final Set<Integer> visited = new HashSet<>();
final LinkedList<Integer> worklist = new LinkedList<>();
worklist.add(0);
visited.add(0);
int upto = 0;
states[upto] = 0;
upto++;
Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
if (!visited.contains(t.dest)) {
visited.add(t.dest);
worklist.add(t.dest);
states[upto++] = t.dest;
}
}
}
return states;
}
/** Just escapes the 0xff byte (which we still for SEP). */
private static final class EscapingTokenStreamToAutomaton extends TokenStreamToAutomaton {
final BytesRef spare = new BytesRef();
final BytesRefBuilder spare = new BytesRefBuilder();
private char sepLabel;
public EscapingTokenStreamToAutomaton(char sepLabel) {
@ -327,21 +357,16 @@ public class XAnalyzingSuggester extends Lookup {
for(int i=0;i<in.length;i++) {
byte b = in.bytes[in.offset+i];
if (b == (byte) sepLabel) {
if (spare.bytes.length == upto) {
spare.grow(upto+2);
}
spare.bytes[upto++] = (byte) sepLabel;
spare.bytes[upto++] = b;
spare.grow(upto+2);
spare.setByteAt(upto++, (byte) sepLabel);
spare.setByteAt(upto++, b);
} else {
if (spare.bytes.length == upto) {
spare.grow(upto+1);
}
spare.bytes[upto++] = b;
spare.grow(upto+1);
spare.setByteAt(upto++, b);
}
}
spare.offset = 0;
spare.length = upto;
return spare;
spare.setLength(upto);
return spare.get();
}
}
@ -427,7 +452,7 @@ public class XAnalyzingSuggester extends Lookup {
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
OfflineSorter.ByteSequencesReader reader = null;
BytesRef scratch = new BytesRef();
BytesRefBuilder scratch = new BytesRefBuilder();
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
@ -448,10 +473,10 @@ public class XAnalyzingSuggester extends Lookup {
Util.toBytesRef(path, scratch);
// length of the analyzed text (FST input)
if (scratch.length > Short.MAX_VALUE-2) {
throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length + ")");
if (scratch.length() > Short.MAX_VALUE-2) {
throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length() + ")");
}
short analyzedLength = (short) scratch.length;
short analyzedLength = (short) scratch.length();
// compute the required length:
// analyzed sequence + weight (4) + surface + analyzedLength (short)
@ -476,7 +501,7 @@ public class XAnalyzingSuggester extends Lookup {
output.writeShort(analyzedLength);
output.writeBytes(scratch.bytes, scratch.offset, scratch.length);
output.writeBytes(scratch.bytes(), 0, scratch.length());
output.writeInt(encodeWeight(iterator.weight()));
@ -513,10 +538,10 @@ public class XAnalyzingSuggester extends Lookup {
Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
// Build FST:
BytesRef previousAnalyzed = null;
BytesRef analyzed = new BytesRef();
BytesRefBuilder previousAnalyzed = null;
BytesRefBuilder analyzed = new BytesRefBuilder();
BytesRef surface = new BytesRef();
IntsRef scratchInts = new IntsRef();
IntsRefBuilder scratchInts = new IntsRefBuilder();
ByteArrayDataInput input = new ByteArrayDataInput();
// Used to remove duplicate surface forms (but we
@ -527,28 +552,28 @@ public class XAnalyzingSuggester extends Lookup {
int dedup = 0;
while (reader.read(scratch)) {
input.reset(scratch.bytes, scratch.offset, scratch.length);
input.reset(scratch.bytes(), 0, scratch.length());
short analyzedLength = input.readShort();
analyzed.grow(analyzedLength+2);
input.readBytes(analyzed.bytes, 0, analyzedLength);
analyzed.length = analyzedLength;
input.readBytes(analyzed.bytes(), 0, analyzedLength);
analyzed.setLength(analyzedLength);
long cost = input.readInt();
surface.bytes = scratch.bytes;
surface.bytes = scratch.bytes();
if (hasPayloads) {
surface.length = input.readShort();
surface.offset = input.getPosition();
} else {
surface.offset = input.getPosition();
surface.length = scratch.length - surface.offset;
surface.length = scratch.length() - surface.offset;
}
if (previousAnalyzed == null) {
previousAnalyzed = new BytesRef();
previousAnalyzed = new BytesRefBuilder();
previousAnalyzed.copyBytes(analyzed);
seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
} else if (analyzed.equals(previousAnalyzed)) {
} else if (analyzed.get().equals(previousAnalyzed.get())) {
dedup++;
if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
// More than maxSurfaceFormsPerAnalyzedForm
@ -574,23 +599,22 @@ public class XAnalyzingSuggester extends Lookup {
// NOTE: must be byte 0 so we sort before whatever
// is next
analyzed.bytes[analyzed.offset+analyzed.length] = 0;
analyzed.bytes[analyzed.offset+analyzed.length+1] = (byte) dedup;
analyzed.length += 2;
analyzed.append((byte) 0);
analyzed.append((byte) dedup);
Util.toIntsRef(analyzed, scratchInts);
Util.toIntsRef(analyzed.get(), scratchInts);
//System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
if (!hasPayloads) {
builder.add(scratchInts, outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
} else {
int payloadOffset = input.getPosition() + surface.length;
int payloadLength = scratch.length - payloadOffset;
int payloadLength = scratch.length() - payloadOffset;
BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
br.bytes[surface.length] = (byte) payloadSep;
System.arraycopy(scratch.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
System.arraycopy(scratch.bytes(), payloadOffset, br.bytes, surface.length+1, payloadLength);
br.length = br.bytes.length;
builder.add(scratchInts, outputs.newPair(cost, br));
builder.add(scratchInts.get(), outputs.newPair(cost, br));
}
}
fst = builder.finish();
@ -647,7 +671,7 @@ public class XAnalyzingSuggester extends Lookup {
return true;
}
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRef spare) {
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
LookupResult result;
if (hasPayloads) {
int sepIndex = -1;
@ -658,16 +682,14 @@ public class XAnalyzingSuggester extends Lookup {
}
}
assert sepIndex != -1;
spare.grow(sepIndex);
final int payloadLen = output2.length - sepIndex - 1;
UnicodeUtil.UTF8toUTF16(output2.bytes, output2.offset, sepIndex, spare);
spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
BytesRef payload = new BytesRef(payloadLen);
System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
payload.length = payloadLen;
result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
} else {
spare.grow(output2.length);
UnicodeUtil.UTF8toUTF16(output2, spare);
spare.copyUTF8Bytes(output2);
result = new LookupResult(spare.toString(), decodeWeight(output1));
}
@ -716,7 +738,7 @@ public class XAnalyzingSuggester extends Lookup {
Automaton lookupAutomaton = toLookupAutomaton(key);
final CharsRef spare = new CharsRef();
final CharsRefBuilder spare = new CharsRefBuilder();
//System.out.println(" now intersect exactFirst=" + exactFirst);
@ -888,20 +910,28 @@ public class XAnalyzingSuggester extends Lookup {
}
public final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
// Analyze surface form:
TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
return toFiniteStrings(ts2a, ts);
}
public final Set<IntsRef> toFiniteStrings(final TokenStreamToAutomaton ts2a, TokenStream ts) throws IOException {
// Analyze surface form:
TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
return toFiniteStrings(ts2a, ts);
}
public final Set<IntsRef> toFiniteStrings(final TokenStreamToAutomaton ts2a, final TokenStream ts) throws IOException {
Automaton automaton = null;
try {
// Create corresponding automaton: labels are bytes
// from each analyzed token, with byte 0 used as
// separator between tokens:
Automaton automaton = ts2a.toAutomaton(ts);
ts.close();
// Create corresponding automaton: labels are bytes
// from each analyzed token, with byte 0 used as
// separator between tokens:
automaton = ts2a.toAutomaton(ts);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
replaceSep(automaton, preserveSep, sepLabel);
automaton = replaceSep(automaton);
automaton = convertAutomaton(automaton);
// TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
// assert SpecialOperations.isFinite(automaton);
// Get all paths from the automaton (there can be
// more than one path, eg if the analyzer created a
@ -910,27 +940,27 @@ public class XAnalyzingSuggester extends Lookup {
// TODO: we could walk & add simultaneously, so we
// don't have to alloc [possibly biggish]
// intermediate HashSet in RAM:
return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
return Operations.getFiniteStrings(automaton, maxGraphExpansions);
}
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
// Turn tokenstream into automaton:
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
ts.close();
// TODO: is there a Reader from a CharSequence?
// Turn tokenstream into automaton:
Automaton automaton = null;
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
try {
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
// TODO: we could use the end offset to "guess"
// whether the final token was a partial token; this
// would only be a heuristic ... but maybe an OK one.
// This way we could eg differentiate "net" from "net ",
// which we can't today...
automaton = replaceSep(automaton);
replaceSep(automaton, preserveSep, sepLabel);
// TODO: we can optimize this somewhat by determinizing
// while we convert
BasicOperations.determinize(automaton);
return automaton;
// TODO: we can optimize this somewhat by determinizing
// while we convert
automaton = Operations.determinize(automaton);
return automaton;
}
@ -967,10 +997,10 @@ public class XAnalyzingSuggester extends Lookup {
public static class XBuilder {
private Builder<Pair<Long, BytesRef>> builder;
private int maxSurfaceFormsPerAnalyzedForm;
private IntsRef scratchInts = new IntsRef();
private IntsRefBuilder scratchInts = new IntsRefBuilder();
private final PairOutputs<Long, BytesRef> outputs;
private boolean hasPayloads;
private BytesRef analyzed = new BytesRef();
private BytesRefBuilder analyzed = new BytesRefBuilder();
private final SurfaceFormAndPayload[] surfaceFormsAndPayload;
private int count;
private ObjectIntOpenHashMap<BytesRef> seenSurfaceForms = HppcMaps.Object.Integer.ensureNoNullKeys(256, 0.75f);
@ -986,8 +1016,8 @@ public class XAnalyzingSuggester extends Lookup {
}
public void startTerm(BytesRef analyzed) {
this.analyzed.copyBytes(analyzed);
this.analyzed.grow(analyzed.length+2);
this.analyzed.copyBytes(analyzed);
}
private final static class SurfaceFormAndPayload implements Comparable<SurfaceFormAndPayload> {
@ -1063,14 +1093,15 @@ public class XAnalyzingSuggester extends Lookup {
public void finishTerm(long defaultWeight) throws IOException {
ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
int deduplicator = 0;
analyzed.bytes[analyzed.offset + analyzed.length] = 0;
analyzed.length += 2;
analyzed.append((byte) 0);
analyzed.setLength(analyzed.length() + 1);
analyzed.grow(analyzed.length());
for (int i = 0; i < count; i++) {
analyzed.bytes[analyzed.offset + analyzed.length - 1 ] = (byte) deduplicator++;
Util.toIntsRef(analyzed, scratchInts);
analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
Util.toIntsRef(analyzed.get(), scratchInts);
SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
builder.add(scratchInts, outputs.newPair(cost, candiate.payload));
builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
}
seenSurfaceForms.clear();
count = 0;

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.*;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs;
@ -204,7 +205,7 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
if (unicodeAware) {
// FLORIAN EDIT: get converted Automaton from superclass
Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
BasicOperations.determinize(utf8automaton);
utf8automaton = Operations.determinize(utf8automaton);
return utf8automaton;
} else {
return super.convertAutomaton(a);
@ -219,46 +220,40 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
}
Automaton toLevenshteinAutomata(Automaton automaton) {
final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
final Set<IntsRef> ref = Operations.getFiniteStrings(automaton, -1);
Automaton subs[] = new Automaton[ref.size()];
int upto = 0;
for (IntsRef path : ref) {
if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
subs[upto] = BasicAutomata.makeString(path.ints, path.offset, path.length);
upto++;
} else {
Automaton prefix = BasicAutomata.makeString(path.ints, path.offset, nonFuzzyPrefix);
int ints[] = new int[path.length-nonFuzzyPrefix];
System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
// TODO: maybe add alphaMin to LevenshteinAutomata,
// and pass 1 instead of 0? We probably don't want
// to allow the trailing dedup bytes to be
// edited... but then 0 byte is "in general" allowed
// on input (but not in UTF8).
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
Automaton levAutomaton = lev.toAutomaton(maxEdits);
Automaton combined = BasicOperations.concatenate(Arrays.asList(prefix, levAutomaton));
combined.setDeterministic(true); // its like the special case in concatenate itself, except we cloneExpanded already
subs[upto] = combined;
upto++;
}
if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
subs[upto] = Automata.makeString(path.ints, path.offset, path.length);
upto++;
} else {
int ints[] = new int[path.length-nonFuzzyPrefix];
System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
// TODO: maybe add alphaMin to LevenshteinAutomata,
// and pass 1 instead of 0? We probably don't want
// to allow the trailing dedup bytes to be
// edited... but then 0 byte is "in general" allowed
// on input (but not in UTF8).
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
subs[upto] = lev.toAutomaton(maxEdits, UnicodeUtil.newString(path.ints, path.offset, nonFuzzyPrefix));
upto++;
}
}
if (subs.length == 0) {
// automaton is empty, there is no accepted paths through it
return BasicAutomata.makeEmpty(); // matches nothing
// automaton is empty, there is no accepted paths through it
return Automata.makeEmpty(); // matches nothing
} else if (subs.length == 1) {
// no synonyms or anything: just a single path through the tokenstream
return subs[0];
// no synonyms or anything: just a single path through the tokenstream
return subs[0];
} else {
// multiple paths: this is really scary! is it slow?
// maybe we should not do this and throw UOE?
Automaton a = BasicOperations.union(Arrays.asList(subs));
// TODO: we could call toLevenshteinAutomata() before det?
// this only happens if you have multiple paths anyway (e.g. synonyms)
BasicOperations.determinize(a);
return a;
// multiple paths: this is really scary! is it slow?
// maybe we should not do this and throw UOE?
Automaton a = Operations.union(Arrays.asList(subs));
// TODO: we could call toLevenshteinAutomata() before det?
// this only happens if you have multiple paths anyway (e.g. synonyms)
return Operations.determinize(a);
}
}
}
}

View File

@ -199,14 +199,14 @@ public class Version implements Serializable {
public static final int V_1_3_3_ID = /*00*/1030399;
public static final Version V_1_3_3 = new Version(V_1_3_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_9);
public static final int V_1_4_0_ID = /*00*/1040099;
public static final Version V_1_4_0 = new Version(V_1_4_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_9);
public static final Version V_1_4_0 = new Version(V_1_4_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_10_0);
public static final int V_2_0_0_ID = /*00*/2000099;
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_9);
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_10_0);
public static final Version CURRENT = V_2_0_0;
static {
assert CURRENT.luceneVersion == Lucene.VERSION : "Version must be upgraded to [" + Lucene.VERSION + "] is still set to [" + CURRENT.luceneVersion + "]";
assert CURRENT.luceneVersion.equals(Lucene.VERSION) : "Version must be upgraded to [" + Lucene.VERSION + "] is still set to [" + CURRENT.luceneVersion + "]";
}
public static Version readVersion(StreamInput in) throws IOException {

View File

@ -22,8 +22,7 @@ package org.elasticsearch.action.search.type;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.action.search.SearchRequest;
@ -85,21 +84,19 @@ public abstract class TransportSearchHelper {
sb.append(entry.getKey()).append(':').append(entry.getValue()).append(';');
}
}
BytesRef bytesRef = new BytesRef();
UnicodeUtil.UTF16toUTF8(sb, 0, sb.length(), bytesRef);
BytesRef bytesRef = new BytesRef(sb);
return Base64.encodeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length, Base64.URL_SAFE);
}
public static ParsedScrollId parseScrollId(String scrollId) {
CharsRef spare = new CharsRef();
CharsRefBuilder spare = new CharsRefBuilder();
try {
byte[] decode = Base64.decode(scrollId, Base64.URL_SAFE);
UnicodeUtil.UTF8toUTF16(decode, 0, decode.length, spare);
spare.copyUTF8Bytes(decode, 0, decode.length);
} catch (Exception e) {
throw new ElasticsearchIllegalArgumentException("Failed to decode scrollId", e);
}
String[] elements = Strings.splitStringToArray(spare, ';');
String[] elements = Strings.splitStringToArray(spare.get(), ';');
if (elements.length < 2) {
throw new ElasticsearchIllegalArgumentException("Malformed scrollId [" + scrollId + "]");
}

View File

@ -22,14 +22,12 @@ package org.elasticsearch.action.termvector;
import com.carrotsearch.hppc.ObjectLongOpenHashMap;
import com.carrotsearch.hppc.cursors.ObjectLongCursor;
import org.apache.lucene.index.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.*;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamInput;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
@ -227,8 +225,8 @@ public final class TermVectorFields extends Fields {
int[] positions = new int[1];
int[] startOffsets = new int[1];
int[] endOffsets = new int[1];
BytesRef[] payloads = new BytesRef[1];
final BytesRef spare = new BytesRef();
BytesRefBuilder[] payloads = new BytesRefBuilder[1];
final BytesRefBuilder spare = new BytesRefBuilder();
@Override
public BytesRef next() throws IOException {
@ -237,8 +235,8 @@ public final class TermVectorFields extends Fields {
int termVectorSize = perFieldTermVectorInput.readVInt();
spare.grow(termVectorSize);
// ...then the value.
perFieldTermVectorInput.readBytes(spare.bytes, 0, termVectorSize);
spare.length = termVectorSize;
perFieldTermVectorInput.readBytes(spare.bytes(), 0, termVectorSize);
spare.setLength(termVectorSize);
if (hasTermStatistic) {
docFreq = readPotentiallyNegativeVInt(perFieldTermVectorInput);
totalTermFrequency = readPotentiallyNegativeVLong(perFieldTermVectorInput);
@ -253,7 +251,7 @@ public final class TermVectorFields extends Fields {
// curentPosition etc. so that we can just iterate
// later
writeInfos(perFieldTermVectorInput);
return spare;
return spare.get();
} else {
return null;
@ -272,13 +270,11 @@ public final class TermVectorFields extends Fields {
if (hasPayloads) {
int payloadLength = input.readVInt();
if (payloads[i] == null) {
payloads[i] = new BytesRef(payloadLength);
} else {
payloads[i].grow(payloadLength);
payloads[i] = new BytesRefBuilder();
}
input.readBytes(payloads[i].bytes, 0, payloadLength);
payloads[i].length = payloadLength;
payloads[i].offset = 0;
payloads[i].grow(payloadLength);
input.readBytes(payloads[i].bytes(), 0, payloadLength);
payloads[i].setLength(payloadLength);
}
}
}
@ -293,9 +289,7 @@ public final class TermVectorFields extends Fields {
}
if (hasPayloads) {
if (payloads.length < freq) {
final BytesRef[] newArray = new BytesRef[ArrayUtil.oversize(freq, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(payloads, 0, newArray, 0, payloads.length);
payloads = newArray;
payloads = Arrays.copyOf(payloads, ArrayUtil.oversize(freq, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
}
}
}
@ -317,7 +311,7 @@ public final class TermVectorFields extends Fields {
@Override
public BytesRef term() throws IOException {
return spare;
return spare.get();
}
@Override
@ -406,10 +400,10 @@ public final class TermVectorFields extends Fields {
private int freq;
private int[] startOffsets;
private int[] positions;
private BytesRef[] payloads;
private BytesRefBuilder[] payloads;
private int[] endOffsets;
private DocsAndPositionsEnum reset(int[] positions, int[] startOffsets, int[] endOffsets, BytesRef[] payloads, int freq) {
private DocsAndPositionsEnum reset(int[] positions, int[] startOffsets, int[] endOffsets, BytesRefBuilder[] payloads, int freq) {
curPos = -1;
doc = -1;
this.hasPositions = positions != null;
@ -468,7 +462,13 @@ public final class TermVectorFields extends Fields {
@Override
public BytesRef getPayload() throws IOException {
assert curPos < freq && curPos >= 0;
return hasPayloads ? payloads[curPos] : null;
if (hasPayloads) {
final BytesRefBuilder payload = payloads[curPos];
if (payload != null) {
return payload.get();
}
}
return null;
}
@Override

View File

@ -26,8 +26,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.action.ActionResponse;
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
@ -175,7 +174,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
return builder;
}
builder.startObject(FieldStrings.TERM_VECTORS);
final CharsRef spare = new CharsRef();
final CharsRefBuilder spare = new CharsRefBuilder();
Fields theFields = getFields();
Iterator<String> fieldIter = theFields.iterator();
while (fieldIter.hasNext()) {
@ -185,7 +184,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
return builder;
}
private void buildField(XContentBuilder builder, final CharsRef spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
String fieldName = fieldIter.next();
builder.startObject(fieldName);
Terms curTerms = theFields.terms(fieldName);
@ -200,10 +199,10 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
builder.endObject();
}
private void buildTerm(XContentBuilder builder, final CharsRef spare, Terms curTerms, TermsEnum termIter) throws IOException {
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
UnicodeUtil.UTF8toUTF16(term, spare);
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors

View File

@ -21,10 +21,7 @@ package org.elasticsearch.common;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.util.CollectionUtils;
@ -1014,14 +1011,12 @@ public class Strings {
}
public static byte[] toUTF8Bytes(CharSequence charSequence) {
return toUTF8Bytes(charSequence, new BytesRef());
return toUTF8Bytes(charSequence, new BytesRefBuilder());
}
public static byte[] toUTF8Bytes(CharSequence charSequence, BytesRef spare) {
UnicodeUtil.UTF16toUTF8(charSequence, 0, charSequence.length(), spare);
final byte[] bytes = new byte[spare.length];
System.arraycopy(spare.bytes, spare.offset, bytes, 0, bytes.length);
return bytes;
public static byte[] toUTF8Bytes(CharSequence charSequence, BytesRefBuilder spare) {
spare.copyChars(charSequence);
return Arrays.copyOf(spare.bytes(), spare.length());
}

View File

@ -21,7 +21,6 @@ package org.elasticsearch.common.bytes;
import com.google.common.base.Charsets;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.io.Channels;
import org.elasticsearch.common.io.stream.BytesStreamInput;
@ -43,8 +42,7 @@ public class BytesArray implements BytesReference {
private int length;
public BytesArray(String bytes) {
BytesRef bytesRef = new BytesRef();
UnicodeUtil.UTF16toUTF8(bytes, 0, bytes.length(), bytesRef);
BytesRef bytesRef = new BytesRef(bytes);
this.bytes = bytesRef.bytes;
this.offset = bytesRef.offset;
this.length = bytesRef.length;

View File

@ -20,8 +20,7 @@
package org.elasticsearch.common.bytes;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.io.Channels;
import org.elasticsearch.common.io.stream.StreamInput;
@ -274,8 +273,8 @@ public class PagedBytesReference implements BytesReference {
}
byte[] bytes = toBytes();
final CharsRef ref = new CharsRef(length);
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
final CharsRefBuilder ref = new CharsRefBuilder();
ref.copyUTF8Bytes(bytes, offset, length);
return ref.toString();
}

View File

@ -20,7 +20,7 @@
package org.elasticsearch.common.io.stream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
@ -254,15 +254,14 @@ public abstract class StreamInput extends InputStream {
return null;
}
private final CharsRef spare = new CharsRef();
private final CharsRefBuilder spare = new CharsRefBuilder();
public String readString() throws IOException {
final int charCount = readVInt();
spare.offset = 0;
spare.length = 0;
spare.clear();
spare.grow(charCount);
int c = 0;
while (spare.length < charCount) {
while (spare.length() < charCount) {
c = readByte() & 0xff;
switch (c >> 4) {
case 0:
@ -273,14 +272,14 @@ public abstract class StreamInput extends InputStream {
case 5:
case 6:
case 7:
spare.chars[spare.length++] = (char) c;
spare.append((char) c);
break;
case 12:
case 13:
spare.chars[spare.length++] = (char) ((c & 0x1F) << 6 | readByte() & 0x3F);
spare.append((char) ((c & 0x1F) << 6 | readByte() & 0x3F));
break;
case 14:
spare.chars[spare.length++] = (char) ((c & 0x0F) << 12 | (readByte() & 0x3F) << 6 | (readByte() & 0x3F) << 0);
spare.append((char) ((c & 0x0F) << 12 | (readByte() & 0x3F) << 6 | (readByte() & 0x3F) << 0));
break;
}
}

View File

@ -20,17 +20,15 @@
package org.elasticsearch.common.io.stream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.UTF8StreamWriter;
import org.elasticsearch.common.text.Text;
import org.joda.time.ReadableInstant;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.ref.SoftReference;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
@ -195,14 +193,14 @@ public abstract class StreamOutput extends OutputStream {
}
}
private final BytesRef spare = new BytesRef();
private final BytesRefBuilder spare = new BytesRefBuilder();
public void writeText(Text text) throws IOException {
if (!text.hasBytes()) {
final String string = text.string();
UnicodeUtil.UTF16toUTF8(string, 0, string.length(), spare);
writeInt(spare.length);
write(spare.bytes, spare.offset, spare.length);
spare.copyChars(string);
writeInt(spare.length());
write(spare.bytes(), 0, spare.length());
} else {
BytesReference bytes = text.bytes();
writeInt(bytes.length());

View File

@ -20,6 +20,7 @@
package org.elasticsearch.common.lucene;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
/**
*/
@ -52,7 +53,7 @@ public class BytesRefs {
return new BytesRef(value.toString());
}
public static BytesRef toBytesRef(Object value, BytesRef spare) {
public static BytesRef toBytesRef(Object value, BytesRefBuilder spare) {
if (value == null) {
return null;
}
@ -60,6 +61,6 @@ public class BytesRefs {
return (BytesRef) value;
}
spare.copyChars(value.toString());
return spare;
return spare.get();
}
}

View File

@ -79,8 +79,7 @@ public class HashedBytesRef {
}
public static HashedBytesRef deepCopyOf(HashedBytesRef other) {
BytesRef copy = new BytesRef();
copy.copyBytes(other.bytes);
BytesRef copy = BytesRef.deepCopyOf(other.bytes);
return new HashedBytesRef(copy, other.hash);
}
}

View File

@ -51,15 +51,14 @@ import static org.elasticsearch.common.lucene.search.NoopCollector.NOOP_COLLECTO
*/
public class Lucene {
public static final Version VERSION = Version.LUCENE_4_9;
// TODO: remove VERSION, and have users use Version.LATEST.
public static final Version VERSION = Version.LATEST;
public static final Version ANALYZER_VERSION = VERSION;
public static final Version QUERYPARSER_VERSION = VERSION;
public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer(ANALYZER_VERSION));
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());
public static final int NO_DOC = -1;
public static final ScoreDoc[] EMPTY_SCORE_DOCS = new ScoreDoc[0];
public static final TopDocs EMPTY_TOP_DOCS = new TopDocs(0, EMPTY_SCORE_DOCS, 0.0f);
@ -69,27 +68,11 @@ public class Lucene {
if (version == null) {
return defaultVersion;
}
switch(version) {
case "4.9": return VERSION.LUCENE_4_9;
case "4.8": return VERSION.LUCENE_4_8;
case "4.7": return VERSION.LUCENE_4_7;
case "4.6": return VERSION.LUCENE_4_6;
case "4.5": return VERSION.LUCENE_4_5;
case "4.4": return VERSION.LUCENE_4_4;
case "4.3": return VERSION.LUCENE_4_3;
case "4.2": return VERSION.LUCENE_4_2;
case "4.1": return VERSION.LUCENE_4_1;
case "4.0": return VERSION.LUCENE_4_0;
case "3.6": return VERSION.LUCENE_3_6;
case "3.5": return VERSION.LUCENE_3_5;
case "3.4": return VERSION.LUCENE_3_4;
case "3.3": return VERSION.LUCENE_3_3;
case "3.2": return VERSION.LUCENE_3_2;
case "3.1": return VERSION.LUCENE_3_1;
case "3.0": return VERSION.LUCENE_3_0;
default:
logger.warn("no version match {}, default to {}", version, defaultVersion);
return defaultVersion;
try {
return Version.parse(version);
} catch (IllegalArgumentException e) {
logger.warn("no version match {}, default to {}", version, defaultVersion, e);
return defaultVersion;
}
}
@ -580,10 +563,7 @@ public class Lucene {
try {
return Version.parseLeniently(toParse);
} catch (IllegalArgumentException e) {
final String parsedMatchVersion = toParse
.toUpperCase(Locale.ROOT)
.replaceFirst("^(\\d+)\\.(\\d+)(.(\\d+))+$", "LUCENE_$1_$2");
return Version.valueOf(parsedMatchVersion);
// pass to default
}
}
return defaultValue;

View File

@ -46,9 +46,8 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.io.FastStringReader;
import java.io.IOException;
@ -804,10 +803,10 @@ public final class XMoreLikeThis {
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
final CharsRef spare = new CharsRef();
final CharsRefBuilder spare = new CharsRefBuilder();
BytesRef text;
while((text = termsEnum.next()) != null) {
UnicodeUtil.UTF8toUTF16(text, spare);
spare.copyUTF8Bytes(text);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;

View File

@ -305,10 +305,10 @@ public enum CollectionUtils {
};
public static void sort(final BytesRefArray bytes, final int[] indices) {
sort(new BytesRef(), new BytesRef(), bytes, indices);
sort(new BytesRefBuilder(), new BytesRefBuilder(), bytes, indices);
}
private static void sort(final BytesRef scratch, final BytesRef scratch1, final BytesRefArray bytes, final int[] indices) {
private static void sort(final BytesRefBuilder scratch, final BytesRefBuilder scratch1, final BytesRefArray bytes, final int[] indices) {
final int numValues = bytes.size();
assert indices.length >= numValues;
@ -332,8 +332,8 @@ public enum CollectionUtils {
}
public static int sortAndDedup(final BytesRefArray bytes, final int[] indices) {
final BytesRef scratch = new BytesRef();
final BytesRef scratch1 = new BytesRef();
final BytesRefBuilder scratch = new BytesRefBuilder();
final BytesRefBuilder scratch1 = new BytesRefBuilder();
final int numValues = bytes.size();
assert indices.length >= numValues;
if (numValues <= 1) {
@ -341,15 +341,15 @@ public enum CollectionUtils {
}
sort(scratch, scratch1, bytes, indices);
int uniqueCount = 1;
BytesRef previous = scratch;
BytesRef current = scratch1;
BytesRefBuilder previous = scratch;
BytesRefBuilder current = scratch1;
bytes.get(previous, indices[0]);
for (int i = 1; i < numValues; ++i) {
bytes.get(current, indices[i]);
if (!previous.equals(current)) {
if (!previous.get().equals(current.get())) {
indices[uniqueCount++] = indices[i];
}
BytesRef tmp = previous;
BytesRefBuilder tmp = previous;
previous = current;
current = tmp;
}

View File

@ -23,12 +23,12 @@ import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.AbstractXContentParser;
import java.io.IOException;
import java.nio.CharBuffer;
/**
*
@ -88,9 +88,7 @@ public class JsonXContentParser extends AbstractXContentParser {
@Override
public BytesRef utf8Bytes() throws IOException {
BytesRef bytes = new BytesRef();
UnicodeUtil.UTF16toUTF8(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength(), bytes);
return bytes;
return new BytesRef(CharBuffer.wrap(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength()));
}
@Override

View File

@ -21,7 +21,6 @@ package org.elasticsearch.http.netty;
import com.google.common.base.Strings;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput;
@ -54,8 +53,7 @@ public class NettyHttpChannel extends HttpChannel {
private static final ChannelBuffer END_JSONP;
static {
BytesRef U_END_JSONP = new BytesRef();
UnicodeUtil.UTF16toUTF8(");", 0, ");".length(), U_END_JSONP);
BytesRef U_END_JSONP = new BytesRef(");");
END_JSONP = ChannelBuffers.wrappedBuffer(U_END_JSONP.bytes, U_END_JSONP.offset, U_END_JSONP.length);
}
@ -147,8 +145,7 @@ public class NettyHttpChannel extends HttpChannel {
// handle JSONP
String callback = request.param("callback");
if (callback != null) {
final BytesRef callbackBytes = new BytesRef(callback.length() * 4 + 1);
UnicodeUtil.UTF16toUTF8(callback, 0, callback.length(), callbackBytes);
final BytesRef callbackBytes = new BytesRef(callback);
callbackBytes.bytes[callbackBytes.length] = '(';
callbackBytes.length++;
buffer = ChannelBuffers.wrappedBuffer(

View File

@ -20,18 +20,19 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.elasticsearch.common.collect.UpdateInPlaceMap;
/**
*
*/
public final class FieldNameAnalyzer extends SimpleAnalyzerWrapper {
public final class FieldNameAnalyzer extends DelegatingAnalyzerWrapper {
private final UpdateInPlaceMap<String, Analyzer> analyzers;
private final Analyzer defaultAnalyzer;
public FieldNameAnalyzer(UpdateInPlaceMap<String, Analyzer> analyzers, Analyzer defaultAnalyzer) {
super(Analyzer.PER_FIELD_REUSE_STRATEGY);
this.analyzers = analyzers;
this.defaultAnalyzer = defaultAnalyzer;
}

View File

@ -20,13 +20,13 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
/**
* Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated
* with a name ({@link #name()}.
*/
public class NamedAnalyzer extends SimpleAnalyzerWrapper {
public class NamedAnalyzer extends DelegatingAnalyzerWrapper {
private final String name;
private final AnalyzerScope scope;
@ -46,6 +46,7 @@ public class NamedAnalyzer extends SimpleAnalyzerWrapper {
}
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionOffsetGap) {
super(ERROR_STRATEGY);
this.name = name;
this.scope = scope;
this.analyzer = analyzer;
@ -90,4 +91,17 @@ public class NamedAnalyzer extends SimpleAnalyzerWrapper {
public String toString() {
return "analyzer name[" + name + "], analyzer [" + analyzer + "]";
}
/** It is an error if this is ever used, it means we screwed up! */
static final ReuseStrategy ERROR_STRATEGY = new Analyzer.ReuseStrategy() {
@Override
public TokenStreamComponents getReusableComponents(Analyzer a, String f) {
throw new IllegalStateException("NamedAnalyzer cannot be wrapped with a wrapper, only a delegator");
}
@Override
public void setReusableComponents(Analyzer a, String f, TokenStreamComponents c) {
throw new IllegalStateException("NamedAnalyzer cannot be wrapped with a wrapper, only a delegator");
}
};
}

View File

@ -44,18 +44,20 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
super(version, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
// TODO: add non Version based ctors?
public StandardHtmlStripAnalyzer(Version version, CharArraySet stopwords) {
super(version, stopwords);
}
@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
final StandardTokenizer src = new StandardTokenizer(getVersion(), reader);
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
TokenStream tok = new StandardFilter(matchVersion, src);
tok = new LowerCaseFilter(matchVersion, tok);
TokenStream tok = new StandardFilter(getVersion(), src);
tok = new LowerCaseFilter(getVersion(), tok);
if (!stopwords.isEmpty()) {
tok = new StopFilter(matchVersion, tok, stopwords);
tok = new StopFilter(getVersion(), tok, stopwords);
}
return new TokenStreamComponents(src, tok) {
@Override

View File

@ -28,7 +28,7 @@ import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
@ -102,7 +102,7 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
@Override
public void clear(String reason, String[] keys) {
logger.debug("clear keys [], reason [{}]", reason, keys);
final BytesRef spare = new BytesRef();
final BytesRefBuilder spare = new BytesRefBuilder();
for (String key : keys) {
final byte[] keyBytes = Strings.toUTF8Bytes(key, spare);
for (Object readerKey : seenReaders.keySet()) {

View File

@ -21,7 +21,7 @@ package org.elasticsearch.index.codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
import org.apache.lucene.codecs.lucene410.Lucene410Codec;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
@ -37,7 +37,7 @@ import org.elasticsearch.index.mapper.MapperService;
* configured for a specific field the default postings format is used.
*/
// LUCENE UPGRADE: make sure to move to a new codec depending on the lucene version
public class PerFieldMappingPostingFormatCodec extends Lucene49Codec {
public class PerFieldMappingPostingFormatCodec extends Lucene410Codec {
private final ESLogger logger;
private final MapperService mapperService;
private final PostingsFormat defaultPostingFormat;

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.codec.docvaluesformat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
import org.apache.lucene.codecs.lucene410.Lucene410DocValuesFormat;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -36,7 +36,7 @@ public class DiskDocValuesFormatProvider extends AbstractDocValuesFormatProvider
public DiskDocValuesFormatProvider(@Assisted String name, @Assisted Settings docValuesFormatSettings) {
super(name);
// TODO: log a warning if someone chooses this? just remove this together and map it to the 4.9 provider?
this.docValuesFormat = new Lucene49DocValuesFormat();
this.docValuesFormat = new Lucene410DocValuesFormat();
}
@Override

View File

@ -38,10 +38,10 @@ public class DocValuesFormats {
builtInDocValuesFormatsX.put(name, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormat.forName(name)));
}
// LUCENE UPGRADE: update those DVF if necessary
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene49")));
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene410")));
builtInDocValuesFormatsX.put("memory", new PreBuiltDocValuesFormatProvider.Factory("memory", DocValuesFormat.forName("Memory")));
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Lucene49")));
builtInDocValuesFormatsX.put("Disk", new PreBuiltDocValuesFormatProvider.Factory("Disk", DocValuesFormat.forName("Lucene49")));
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Lucene410")));
builtInDocValuesFormatsX.put("Disk", new PreBuiltDocValuesFormatProvider.Factory("Disk", DocValuesFormat.forName("Lucene410")));
builtInDocValuesFormats = builtInDocValuesFormatsX.immutableMap();
}

View File

@ -23,6 +23,7 @@ import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.unit.ByteSizeValue;
import java.io.IOException;
@ -36,7 +37,7 @@ public class Segment implements Streamable {
public long sizeInBytes = -1;
public int docCount = -1;
public int delDocCount = -1;
public String version = null;
public org.apache.lucene.util.Version version = null;
public Boolean compound = null;
public String mergeId;
public long memoryInBytes;
@ -81,7 +82,7 @@ public class Segment implements Streamable {
return this.sizeInBytes;
}
public String getVersion() {
public org.apache.lucene.util.Version getVersion() {
return version;
}
@ -138,7 +139,7 @@ public class Segment implements Streamable {
docCount = in.readInt();
delDocCount = in.readInt();
sizeInBytes = in.readLong();
version = in.readOptionalString();
version = Lucene.parseVersionLenient(in.readOptionalString(), null);
compound = in.readOptionalBoolean();
mergeId = in.readOptionalString();
memoryInBytes = in.readLong();
@ -152,7 +153,7 @@ public class Segment implements Streamable {
out.writeInt(docCount);
out.writeInt(delDocCount);
out.writeLong(sizeInBytes);
out.writeOptionalString(version);
out.writeOptionalString(version.toString());
out.writeOptionalBoolean(compound);
out.writeOptionalString(mergeId);
out.writeLong(memoryInBytes);

View File

@ -1042,7 +1042,11 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
}
// wait for the merges outside of the read lock
if (optimize.waitForMerge()) {
currentIndexWriter().waitForMerges();
try {
currentIndexWriter().waitForMerges();
} catch (IOException e) {
throw new OptimizeFailedEngineException(shardId, e);
}
}
if (optimize.flush()) {
flush(new Flush().force(true).waitIfOngoing(true));
@ -1383,7 +1387,7 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
config.setIndexDeletionPolicy(deletionPolicy);
config.setInfoStream(new LoggerInfoStream(indexSettings, shardId));
config.setMergeScheduler(mergeScheduler.newMergeScheduler());
MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy();
MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy();
// Give us the opportunity to upgrade old segments while performing
// background merges
mergePolicy = new ElasticsearchMergePolicy(mergePolicy);

View File

@ -20,9 +20,11 @@
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.*;
import org.apache.lucene.util.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.lucene.Lucene;
import java.util.ArrayList;
import java.util.List;
@ -33,10 +35,6 @@ import java.util.List;
public enum FieldData {
;
static {
assert Lucene.VERSION == Version.LUCENE_4_9 : "Remove emptySortedNumeric in 4.10 and use the method with the same name from Lucene's DocValues class. See LUCENE-5834.";
}
/**
* Return a {@link SortedBinaryDocValues} that doesn't contain any value.
*/
@ -44,13 +42,6 @@ public enum FieldData {
return singleton(DocValues.emptyBinary(), new Bits.MatchNoBits(maxDoc));
}
/**
* Return a {@link SortedNumericDocValues} that doesn't contain any value.
*/
public static SortedNumericDocValues emptySortedNumeric(int maxDoc) {
return DocValues.singleton(DocValues.emptyNumeric(), new Bits.MatchNoBits(maxDoc));
}
/**
* Return a {@link NumericDoubleValues} that doesn't contain any value.
*/

View File

@ -23,8 +23,8 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
@ -115,9 +115,10 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
* since {@link Character#MAX_CODE_POINT} is a noncharacter and thus shouldn't appear in an index term. */
public static final BytesRef MAX_TERM;
static {
MAX_TERM = new BytesRef();
BytesRefBuilder builder = new BytesRefBuilder();
final char[] chars = Character.toChars(Character.MAX_CODE_POINT);
UnicodeUtil.UTF16toUTF8(chars, 0, chars.length, MAX_TERM);
builder.copyChars(chars, 0, chars.length);
MAX_TERM = builder.toBytesRef();
}
/**

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
@ -37,7 +38,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
}
@Override
public void toIndexForm(Number number, BytesRef bytes) {
public void toIndexForm(Number number, BytesRefBuilder bytes) {
INT.toIndexForm(number, bytes);
}
@ -53,7 +54,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
}
@Override
public void toIndexForm(Number number, BytesRef bytes) {
public void toIndexForm(Number number, BytesRefBuilder bytes) {
INT.toIndexForm(number, bytes);
}
@ -69,7 +70,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
}
@Override
public void toIndexForm(Number number, BytesRef bytes) {
public void toIndexForm(Number number, BytesRefBuilder bytes) {
NumericUtils.intToPrefixCodedBytes(number.intValue(), 0, bytes);
}
@ -85,7 +86,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
}
@Override
public void toIndexForm(Number number, BytesRef bytes) {
public void toIndexForm(Number number, BytesRefBuilder bytes) {
NumericUtils.longToPrefixCodedBytes(number.longValue(), 0, bytes);
}
@ -101,7 +102,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
}
@Override
public void toIndexForm(Number number, BytesRef bytes) {
public void toIndexForm(Number number, BytesRefBuilder bytes) {
NumericUtils.intToPrefixCodedBytes(NumericUtils.floatToSortableInt(number.floatValue()), 0, bytes);
}
@ -117,7 +118,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
}
@Override
public void toIndexForm(Number number, BytesRef bytes) {
public void toIndexForm(Number number, BytesRefBuilder bytes) {
NumericUtils.longToPrefixCodedBytes(NumericUtils.doubleToSortableLong(number.doubleValue()), 0, bytes);
}
@ -160,7 +161,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
return requiredBits;
}
public abstract void toIndexForm(Number number, BytesRef bytes);
public abstract void toIndexForm(Number number, BytesRefBuilder bytes);
public long toLong(BytesRef indexForm) {
return (long) toDouble(indexForm);

View File

@ -29,11 +29,11 @@ import java.util.Arrays;
public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
protected int count;
protected BytesRef[] values;
protected BytesRefBuilder[] values;
private final Sorter sorter;
protected SortingBinaryDocValues() {
values = new BytesRef[] { new BytesRef() };
values = new BytesRefBuilder[] { new BytesRefBuilder() };
sorter = new InPlaceMergeSorter() {
@Override
@ -43,7 +43,7 @@ public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
@Override
protected int compare(int i, int j) {
return values[i].compareTo(values[j]);
return values[i].get().compareTo(values[j].get());
}
};
}
@ -57,7 +57,7 @@ public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
final int newLen = ArrayUtil.oversize(count, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
values = Arrays.copyOf(values, newLen);
for (int i = oldLen; i < newLen; ++i) {
values[i] = new BytesRef();
values[i] = new BytesRefBuilder();
}
}
}
@ -77,6 +77,6 @@ public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
@Override
public final BytesRef valueAt(int index) {
return values[index];
return values[index].get();
}
}

View File

@ -23,15 +23,12 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
@ -110,7 +107,7 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
final BytesRef nullPlaceHolder = new BytesRef();
final BytesRef nonNullMissingBytes = missingBytes == null ? nullPlaceHolder : missingBytes;
return new TermValComparator(numHits, null, sortMissingLast) {
return new FieldComparator.TermValComparator(numHits, null, sortMissingLast) {
@Override
protected BinaryDocValues getBinaryDocValues(AtomicReaderContext context, String field) throws IOException {
@ -211,137 +208,4 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
// we let termsenum etc fall back to the default implementation
}
static {
assert Lucene.VERSION == Version.LUCENE_4_9 : "The comparator below is a raw copy of Lucene's, remove it when upgrading to 4.10";
}
/** Sorts by field's natural Term sort order. All
* comparisons are done using BytesRef.compareTo, which is
* slow for medium to large result sets but possibly
* very fast for very small results sets. */
public static class TermValComparator extends FieldComparator<BytesRef> {
private final BytesRef[] values;
private final BytesRef[] tempBRs;
private BinaryDocValues docTerms;
private Bits docsWithField;
private final String field;
private BytesRef bottom;
private BytesRef topValue;
private final int missingSortCmp;
/** Sole constructor. */
public TermValComparator(int numHits, String field, boolean sortMissingLast) {
values = new BytesRef[numHits];
tempBRs = new BytesRef[numHits];
this.field = field;
missingSortCmp = sortMissingLast ? 1 : -1;
}
@Override
public int compare(int slot1, int slot2) {
final BytesRef val1 = values[slot1];
final BytesRef val2 = values[slot2];
return compareValues(val1, val2);
}
@Override
public int compareBottom(int doc) {
final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
return compareValues(bottom, comparableBytes);
}
@Override
public void copy(int slot, int doc) {
final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
if (comparableBytes == null) {
values[slot] = null;
} else {
if (tempBRs[slot] == null) {
tempBRs[slot] = new BytesRef();
}
values[slot] = tempBRs[slot];
values[slot].copyBytes(comparableBytes);
}
}
/** Retrieves the BinaryDocValues for the field in this segment */
protected BinaryDocValues getBinaryDocValues(AtomicReaderContext context, String field) throws IOException {
return FieldCache.DEFAULT.getTerms(context.reader(), field, true);
}
/** Retrieves the set of documents that have a value in this segment */
protected Bits getDocsWithField(AtomicReaderContext context, String field) throws IOException {
return FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
}
/** Check whether the given value represents <tt>null</tt>. This can be
* useful if the {@link BinaryDocValues} returned by {@link #getBinaryDocValues}
* use a special value as a sentinel. The default implementation checks
* {@link #getDocsWithField}.
* <p>NOTE: The null value can only be an EMPTY {@link BytesRef}. */
protected boolean isNull(int doc, BytesRef term) {
return docsWithField != null && docsWithField.get(doc) == false;
}
@Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
docTerms = getBinaryDocValues(context, field);
docsWithField = getDocsWithField(context, field);
if (docsWithField instanceof Bits.MatchAllBits) {
docsWithField = null;
}
return this;
}
@Override
public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
@Override
public void setTopValue(BytesRef value) {
// null is fine: it means the last doc of the prior
// search was missing this value
topValue = value;
}
@Override
public BytesRef value(int slot) {
return values[slot];
}
@Override
public int compareValues(BytesRef val1, BytesRef val2) {
// missing always sorts first:
if (val1 == null) {
if (val2 == null) {
return 0;
}
return missingSortCmp;
} else if (val2 == null) {
return -missingSortCmp;
}
return val1.compareTo(val2);
}
@Override
public int compareTop(int doc) {
final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
return compareValues(topValue, comparableBytes);
}
/**
* Given a document and a term, return the term itself if it exists or
* <tt>null</tt> otherwise.
*/
private BytesRef getComparableBytes(int doc, BytesRef term) {
if (term.length == 0 && isNull(doc, term)) {
return null;
}
return term;
}
}
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.XOrdinalMap;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.elasticsearch.index.fielddata.AbstractRandomAccessOrds;
@ -31,11 +31,11 @@ import org.elasticsearch.index.fielddata.AbstractRandomAccessOrds;
public class GlobalOrdinalMapping extends AbstractRandomAccessOrds {
private final RandomAccessOrds values;
private final XOrdinalMap ordinalMap;
private final OrdinalMap ordinalMap;
private final LongValues mapping;
private final RandomAccessOrds[] bytesValues;
GlobalOrdinalMapping(XOrdinalMap ordinalMap, RandomAccessOrds[] bytesValues, int segmentIndex) {
GlobalOrdinalMapping(OrdinalMap ordinalMap, RandomAccessOrds[] bytesValues, int segmentIndex) {
super();
this.values = bytesValues[segmentIndex];
this.bytesValues = bytesValues;

View File

@ -20,8 +20,8 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.XOrdinalMap;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.logging.ESLogger;
@ -51,7 +51,7 @@ public enum GlobalOrdinalsBuilder {
atomicFD[i] = indexFieldData.load(indexReader.leaves().get(i));
subs[i] = atomicFD[i].getOrdinalsValues();
}
final XOrdinalMap ordinalMap = XOrdinalMap.build(null, subs, PackedInts.DEFAULT);
final OrdinalMap ordinalMap = OrdinalMap.build(null, subs, PackedInts.DEFAULT);
final long memorySizeInBytes = ordinalMap.ramBytesUsed();
breakerService.getBreaker(CircuitBreaker.Name.FIELDDATA).addWithoutBreaking(memorySizeInBytes);

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.XOrdinalMap;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData;
@ -35,7 +35,7 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
private final Atomic[] atomicReaders;
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicOrdinalsFieldData[] segmentAfd, XOrdinalMap ordinalMap, long memorySizeInBytes) {
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicOrdinalsFieldData[] segmentAfd, OrdinalMap ordinalMap, long memorySizeInBytes) {
super(index, settings, fieldNames, fieldDataType, memorySizeInBytes);
this.atomicReaders = new Atomic[segmentAfd.length];
for (int i = 0; i < segmentAfd.length; i++) {
@ -51,10 +51,10 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
private final class Atomic extends AbstractAtomicOrdinalsFieldData {
private final AtomicOrdinalsFieldData afd;
private final XOrdinalMap ordinalMap;
private final OrdinalMap ordinalMap;
private final int segmentIndex;
private Atomic(AtomicOrdinalsFieldData afd, XOrdinalMap ordinalMap, int segmentIndex) {
private Atomic(AtomicOrdinalsFieldData afd, OrdinalMap ordinalMap, int segmentIndex) {
this.afd = afd;
this.ordinalMap = ordinalMap;
this.segmentIndex = segmentIndex;

View File

@ -24,9 +24,8 @@ import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.index.fielddata.AbstractRandomAccessOrds;
/**
@ -57,26 +56,26 @@ public class MultiOrdinals extends Ordinals {
private final boolean multiValued;
private final long valueCount;
private final MonotonicAppendingLongBuffer endOffsets;
private final AppendingPackedLongBuffer ords;
private final PackedLongValues endOffsets;
private final PackedLongValues ords;
public MultiOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
multiValued = builder.getNumMultiValuesDocs() > 0;
valueCount = builder.getValueCount();
endOffsets = new MonotonicAppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
ords = new AppendingPackedLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
PackedLongValues.Builder endOffsetsBuilder = PackedLongValues.monotonicBuilder(OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
PackedLongValues.Builder ordsBuilder = PackedLongValues.packedBuilder(OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
long lastEndOffset = 0;
for (int i = 0; i < builder.maxDoc(); ++i) {
final LongsRef docOrds = builder.docOrds(i);
final long endOffset = lastEndOffset + docOrds.length;
endOffsets.add(endOffset);
endOffsetsBuilder.add(endOffset);
for (int j = 0; j < docOrds.length; ++j) {
ords.add(docOrds.longs[docOrds.offset + j]);
ordsBuilder.add(docOrds.longs[docOrds.offset + j]);
}
lastEndOffset = endOffset;
}
endOffsets.freeze();
ords.freeze();
endOffsets = endOffsetsBuilder.build();
ords = ordsBuilder.build();
assert endOffsets.size() == builder.maxDoc();
assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds();
}
@ -98,8 +97,8 @@ public class MultiOrdinals extends Ordinals {
private static class SingleDocs extends SortedDocValues {
private final int valueCount;
private final MonotonicAppendingLongBuffer endOffsets;
private final AppendingPackedLongBuffer ords;
private final PackedLongValues endOffsets;
private final PackedLongValues ords;
private final ValuesHolder values;
SingleDocs(MultiOrdinals ordinals, ValuesHolder values) {
@ -131,8 +130,8 @@ public class MultiOrdinals extends Ordinals {
private static class MultiDocs extends AbstractRandomAccessOrds {
private final long valueCount;
private final MonotonicAppendingLongBuffer endOffsets;
private final AppendingPackedLongBuffer ords;
private final PackedLongValues endOffsets;
private final PackedLongValues ords;
private long offset;
private int cardinality;
private final ValuesHolder values;

View File

@ -21,8 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.geo.GeoPoint;
@ -41,12 +40,12 @@ abstract class AbstractIndexGeoPointFieldData extends AbstractIndexFieldData<Ato
private final BytesRefIterator termsEnum;
private final GeoPoint next;
private final CharsRef spare;
private final CharsRefBuilder spare;
protected GeoPointEnum(BytesRefIterator termsEnum) {
this.termsEnum = termsEnum;
next = new GeoPoint();
spare = new CharsRef();
spare = new CharsRefBuilder();
}
public GeoPoint next() throws IOException {
@ -54,10 +53,10 @@ abstract class AbstractIndexGeoPointFieldData extends AbstractIndexFieldData<Ato
if (term == null) {
return null;
}
UnicodeUtil.UTF8toUTF16(term, spare);
spare.copyUTF8Bytes(term);
int commaIndex = -1;
for (int i = 0; i < spare.length; i++) {
if (spare.chars[spare.offset + i] == ',') { // saves a string creation
for (int i = 0; i < spare.length(); i++) {
if (spare.charAt(i) == ',') { // saves a string creation
commaIndex = i;
break;
}
@ -66,8 +65,8 @@ abstract class AbstractIndexGeoPointFieldData extends AbstractIndexFieldData<Ato
assert false;
return next.reset(0, 0);
}
final double lat = Double.parseDouble(new String(spare.chars, spare.offset, (commaIndex - spare.offset)));
final double lon = Double.parseDouble(new String(spare.chars, (spare.offset + (commaIndex + 1)), spare.length - ((commaIndex + 1) - spare.offset)));
final double lat = Double.parseDouble(new String(spare.chars(), 0, commaIndex));
final double lon = Double.parseDouble(new String(spare.chars(), commaIndex + 1, spare.length() - (commaIndex + 1)));
return next.reset(lat, lon);
}

View File

@ -20,8 +20,7 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
@ -31,8 +30,8 @@ import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.N
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper.Names;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.MultiValueMode;
import java.io.IOException;
import java.util.Map;
@ -138,7 +137,7 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
private static final class RegexFilter extends FilteredTermsEnum {
private final Matcher matcher;
private final CharsRef spare = new CharsRef();
private final CharsRefBuilder spare = new CharsRefBuilder();
public RegexFilter(TermsEnum delegate, Matcher matcher) {
super(delegate, false);
@ -155,8 +154,8 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
@Override
protected AcceptStatus accept(BytesRef arg0) throws IOException {
UnicodeUtil.UTF8toUTF16(arg0, spare);
matcher.reset(spare);
spare.copyUTF8Bytes(arg0);
matcher.reset(spare.get());
if (matcher.matches()) {
return AcceptStatus.YES;
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.elasticsearch.index.fielddata.*;
@ -59,7 +60,7 @@ abstract class AtomicLongFieldData implements AtomicNumericFieldData {
@Override
public SortedNumericDocValues getLongValues() {
return FieldData.emptySortedNumeric(maxDoc);
return DocValues.emptySortedNumeric(maxDoc);
}
};

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
@ -49,7 +50,7 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData {
return new SortedBinaryDocValues() {
int count;
BytesRef[] refs = new BytesRef[0];
BytesRefBuilder[] refs = new BytesRefBuilder[0];
final ByteArrayDataInput in = new ByteArrayDataInput();
@Override
@ -64,16 +65,15 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData {
final int previousLength = refs.length;
refs = Arrays.copyOf(refs, ArrayUtil.oversize(count, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
for (int i = previousLength; i < refs.length; ++i) {
refs[i] = new BytesRef();
refs[i] = new BytesRefBuilder();
}
}
for (int i = 0; i < count; ++i) {
final int length = in.readVInt();
final BytesRef scratch = refs[i];
final BytesRefBuilder scratch = refs[i];
scratch.grow(length);
in.readBytes(scratch.bytes, 0, length);
scratch.length = length;
scratch.offset = 0;
in.readBytes(scratch.bytes(), 0, length);
scratch.setLength(length);
}
}
}
@ -85,7 +85,7 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData {
@Override
public BytesRef valueAt(int index) {
return refs[index];
return refs[index].get();
}
};

View File

@ -21,7 +21,9 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
@ -71,15 +73,15 @@ public class FSTBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
private final FST<Long> fst;
// per-thread resources
private final BytesRef scratch;
private final BytesRefBuilder scratch;
protected final BytesReader in;
protected final Arc<Long> firstArc = new Arc<>();
protected final Arc<Long> scratchArc = new Arc<>();
protected final IntsRef scratchInts = new IntsRef();
protected final IntsRefBuilder scratchInts = new IntsRefBuilder();
ValuesHolder(FST<Long> fst) {
this.fst = fst;
scratch = new BytesRef();
scratch = new BytesRefBuilder();
in = fst.getBytesReader();
}
@ -90,13 +92,13 @@ public class FSTBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
fst.getFirstArc(firstArc);
try {
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
scratch.length = scratch.offset = 0;
scratch.clear();
scratch.grow(output.length);
Util.toBytesRef(output, scratch);
} catch (IOException ex) {
//bogus
}
return scratch;
return scratch.get();
}
}

View File

@ -20,7 +20,7 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.PositiveIntOutputs;
@ -72,7 +72,7 @@ public class FSTBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
}
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<>(INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();
final IntsRefBuilder scratch = new IntsRefBuilder();
final long numTerms;
if (regex == null && frequency == null) {

View File

@ -22,9 +22,8 @@ package org.elasticsearch.index.fielddata.plain;
import com.google.common.base.Preconditions;
import org.apache.lucene.index.*;
import org.apache.lucene.util.*;
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.breaker.CircuitBreaker;
@ -97,7 +96,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
// Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
// longs is going to be monotonically increasing
final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();
final PackedLongValues.Builder valuesBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
TermsEnum termsEnum = estimator.beforeLoad(terms);
@ -111,9 +110,9 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
final long value = indexedAsLong
? NumericUtils.prefixCodedToLong(term)
: NumericUtils.prefixCodedToInt(term);
assert values.size() == 0 || value > values.get(values.size() - 1);
values.add(value);
valuesBuilder.add(value);
}
final PackedLongValues values = valuesBuilder.build();
final Ordinals build = builder.build(fieldDataType.getSettings());
CommonSettings.MemoryStorageFormat formatHint = CommonSettings.getMemoryStorageHint(fieldDataType);
@ -206,7 +205,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
};
break;
case PAGED:
final AppendingDeltaPackedLongBuffer dpValues = new AppendingDeltaPackedLongBuffer(reader.maxDoc() / pageSize + 1, pageSize, acceptableOverheadRatio);
final PackedLongValues.Builder dpValues = PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio);
long lastValue = 0;
for (int i = 0; i < reader.maxDoc(); i++) {
@ -217,13 +216,13 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
}
dpValues.add(lastValue);
}
dpValues.freeze();
ramBytesUsed = dpValues.ramBytesUsed();
final PackedLongValues pagedValues = dpValues.build();
data = new AtomicLongFieldData(ramBytesUsed) {
@Override
public SortedNumericDocValues getLongValues() {
return pagedSingles(dpValues, docsWithValues);
return pagedSingles(pagedValues, docsWithValues);
}
};
@ -260,7 +259,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
}
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader reader, MonotonicAppendingLongBuffer values, Ordinals build, RandomAccessOrds ordinals,
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals,
long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {
CommonSettings.MemoryStorageFormat format;
@ -318,7 +317,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
return format;
}
private long getPageMemoryUsage(MonotonicAppendingLongBuffer values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
int bitsRequired;
long pageMemorySize = 0;
PackedInts.FormatAndBits formatAndBits;
@ -484,7 +483,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
return DocValues.singleton(values, docsWithFields);
}
private static SortedNumericDocValues pagedSingles(final AppendingDeltaPackedLongBuffer values, final FixedBitSet docsWithValue) {
private static SortedNumericDocValues pagedSingles(final PackedLongValues values, final FixedBitSet docsWithValue) {
return DocValues.singleton(new NumericDocValues() {
// we need to wrap since NumericDocValues must return 0 when a doc has no value
@Override

View File

@ -21,7 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
/**
@ -29,10 +29,10 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals;
public class PagedBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
private final PagedBytes.Reader bytes;
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
private final PackedLongValues termOrdToBytesOffset;
protected final Ordinals ordinals;
public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Ordinals ordinals) {
public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset, Ordinals ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
@ -61,9 +61,9 @@ public class PagedBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
private final BytesRef scratch = new BytesRef();
private final PagedBytes.Reader bytes;
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
private final PackedLongValues termOrdToBytesOffset;
ValuesHolder(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset) {
ValuesHolder(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
}

View File

@ -23,7 +23,8 @@ import org.apache.lucene.codecs.blocktree.Stats;
import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
@ -71,7 +72,7 @@ public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
final PagedBytes bytes = new PagedBytes(15);
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
final PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
final long numTerms;
if (regex == null && frequency == null) {
numTerms = terms.size();
@ -102,7 +103,7 @@ public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
PagedBytes.Reader bytesReader = bytes.freeze(true);
final Ordinals ordinals = builder.build(fieldDataType.getSettings());
data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset, ordinals);
data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset.build(), ordinals);
success = true;
return data;
} finally {

View File

@ -23,12 +23,13 @@ import com.carrotsearch.hppc.ObjectObjectOpenHashMap;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import com.google.common.collect.ImmutableSortedSet;
import org.apache.lucene.index.*;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.Nullable;
@ -136,7 +137,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<AtomicPare
typeToAtomicFieldData.put(
cursor.key,
new PagedBytesAtomicFieldData(bytesReader, cursor.value.termOrdToBytesOffset, ordinals)
new PagedBytesAtomicFieldData(bytesReader, cursor.value.termOrdToBytesOffset.build(), ordinals)
);
}
data = new ParentChildAtomicFieldData(typeToAtomicFieldData.build());
@ -183,12 +184,12 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<AtomicPare
class TypeBuilder {
final PagedBytes bytes;
final MonotonicAppendingLongBuffer termOrdToBytesOffset;
final PackedLongValues.Builder termOrdToBytesOffset;
final OrdinalsBuilder builder;
TypeBuilder(float acceptableTransientOverheadRatio, AtomicReader reader) throws IOException {
bytes = new PagedBytes(15);
termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
}
}
@ -299,7 +300,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<AtomicPare
for (Map.Entry<String, SortedDocValues[]> entry : types.entrySet()) {
final String parentType = entry.getKey();
final SortedDocValues[] values = entry.getValue();
final XOrdinalMap ordinalMap = XOrdinalMap.build(null, entry.getValue(), PackedInts.DEFAULT);
final OrdinalMap ordinalMap = OrdinalMap.build(null, entry.getValue(), PackedInts.DEFAULT);
ramBytesUsed += ordinalMap.ramBytesUsed();
for (int i = 0; i < values.length; ++i) {
final SortedDocValues segmentValues = values[i];

View File

@ -24,7 +24,7 @@ import com.google.common.base.Charsets;
import com.google.common.base.Predicate;
import com.google.common.collect.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.queries.TermFilter;
@ -1064,11 +1064,12 @@ public class MapperService extends AbstractIndexComponent {
}
}
final class SmartIndexNameSearchAnalyzer extends SimpleAnalyzerWrapper {
final class SmartIndexNameSearchAnalyzer extends DelegatingAnalyzerWrapper {
private final Analyzer defaultAnalyzer;
SmartIndexNameSearchAnalyzer(Analyzer defaultAnalyzer) {
super(Analyzer.PER_FIELD_REUSE_STRATEGY);
this.defaultAnalyzer = defaultAnalyzer;
}
@ -1095,11 +1096,12 @@ public class MapperService extends AbstractIndexComponent {
}
}
final class SmartIndexNameSearchQuoteAnalyzer extends SimpleAnalyzerWrapper {
final class SmartIndexNameSearchQuoteAnalyzer extends DelegatingAnalyzerWrapper {
private final Analyzer defaultAnalyzer;
SmartIndexNameSearchQuoteAnalyzer(Analyzer defaultAnalyzer) {
super(Analyzer.PER_FIELD_REUSE_STRATEGY);
this.defaultAnalyzer = defaultAnalyzer;
}

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.mapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.common.lucene.BytesRefs;
@ -84,10 +84,10 @@ public final class Uid {
}
public static BytesRef typePrefixAsBytes(BytesRef type) {
BytesRef bytesRef = new BytesRef(type.length + 1);
BytesRefBuilder bytesRef = new BytesRefBuilder();
bytesRef.append(type);
bytesRef.append(DELIMITER_BYTES);
return bytesRef;
return bytesRef.toBytesRef();
}
public static Uid createUid(String uid) {
@ -127,10 +127,11 @@ public final class Uid {
return ref;
}
public static void createUidAsBytes(BytesRef type, BytesRef id, BytesRef spare) {
public static BytesRef createUidAsBytes(BytesRef type, BytesRef id, BytesRefBuilder spare) {
spare.copyBytes(type);
spare.append(DELIMITER_BYTES);
spare.append(id);
return spare.get();
}
public static BytesRef[] createTypeUids(Collection<String> types, Object ids) {
@ -140,13 +141,13 @@ public final class Uid {
public static BytesRef[] createTypeUids(Collection<String> types, List<? extends Object> ids) {
final int numIds = ids.size();
BytesRef[] uids = new BytesRef[types.size() * ids.size()];
BytesRef typeBytes = new BytesRef();
BytesRef idBytes = new BytesRef();
BytesRefBuilder typeBytes = new BytesRefBuilder();
BytesRefBuilder idBytes = new BytesRefBuilder();
int index = 0;
for (String type : types) {
UnicodeUtil.UTF16toUTF8(type, 0, type.length(), typeBytes);
typeBytes.copyChars(type);
for (int i = 0; i < numIds; i++, index++) {
uids[index] = Uid.createUidAsBytes(typeBytes, BytesRefs.toBytesRef(ids.get(i), idBytes));
uids[index] = Uid.createUidAsBytes(typeBytes.get(), BytesRefs.toBytesRef(ids.get(i), idBytes));
}
}
return uids;

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -162,9 +163,9 @@ public class ByteFieldMapper extends NumberFieldMapper<Byte> {
@Override
public BytesRef indexedValueForSearch(Object value) {
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.intToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
private byte parseValue(Object value) {

View File

@ -26,6 +26,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -249,9 +250,9 @@ public class DateFieldMapper extends NumberFieldMapper<Long> {
@Override
public BytesRef indexedValueForSearch(Object value) {
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
private long parseValue(Object value) {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -167,9 +168,9 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
@Override
public BytesRef indexedValueForSearch(Object value) {
long longValue = NumericUtils.doubleToSortableLong(parseDoubleValue(value));
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.longToPrefixCoded(longValue, 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
@Override

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -166,9 +167,9 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
@Override
public BytesRef indexedValueForSearch(Object value) {
int intValue = NumericUtils.floatToSortableInt(parseValue(value));
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.intToPrefixCoded(intValue, 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
private float parseValue(Object value) {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -161,9 +162,9 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
@Override
public BytesRef indexedValueForSearch(Object value) {
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.intToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
private int parseValue(Object value) {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -161,9 +162,9 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
@Override
public BytesRef indexedValueForSearch(Object value) {
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.longToPrefixCoded(parseLongValue(value), 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
@Override

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -163,9 +164,9 @@ public class ShortFieldMapper extends NumberFieldMapper<Short> {
@Override
public BytesRef indexedValueForSearch(Object value) {
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.intToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
private short parseValue(Object value) {

View File

@ -26,6 +26,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Numbers;
@ -167,9 +168,9 @@ public class BoostFieldMapper extends NumberFieldMapper<Float> implements Intern
@Override
public BytesRef indexedValueForSearch(Object value) {
int intValue = NumericUtils.floatToSortableInt(parseValue(value));
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.intToPrefixCoded(intValue, precisionStep(), bytesRef);
return bytesRef;
return bytesRef.get();
}
private float parseValue(Object value) {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Explicit;
@ -206,9 +207,9 @@ public class IpFieldMapper extends NumberFieldMapper<Long> {
@Override
public BytesRef indexedValueForSearch(Object value) {
BytesRef bytesRef = new BytesRef();
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
return bytesRef;
return bytesRef.get();
}
private long parseValue(Object value) {

View File

@ -218,11 +218,6 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
return upgradedMergeSpecification(delegate.findForcedDeletesMerges(segmentInfos, writer));
}
@Override
public void close() {
delegate.close();
}
@Override
public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, IndexWriter writer) throws IOException {
return delegate.useCompoundFile(segments, newSegment, writer);

View File

@ -38,29 +38,30 @@ import java.util.concurrent.CopyOnWriteArraySet;
public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<LogByteSizeMergePolicy> {
private final IndexSettingsService indexSettingsService;
public static final String MAX_MERGE_BYTE_SIZE_KEY = "index.merge.policy.max_merge_sizes";
public static final String MIN_MERGE_BYTE_SIZE_KEY = "index.merge.policy.min_merge_size";
public static final String MERGE_FACTORY_KEY = "index.merge.policy.merge_factor";
private volatile ByteSizeValue minMergeSize;
private volatile ByteSizeValue maxMergeSize;
private volatile int mergeFactor;
private volatile int maxMergeDocs;
private final boolean calibrateSizeByDeletes;
private final Set<CustomLogByteSizeMergePolicy> policies = new CopyOnWriteArraySet<>();
private final ApplySettings applySettings = new ApplySettings();
private final LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
private static final ByteSizeValue DEFAULT_MIN_MERGE_SIZE = new ByteSizeValue((long) (LogByteSizeMergePolicy.DEFAULT_MIN_MERGE_MB * 1024 * 1024), ByteSizeUnit.BYTES);
private static final ByteSizeValue DEFAULT_MAX_MERGE_SIZE = new ByteSizeValue((long) LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_MB, ByteSizeUnit.MB);
@Inject
public LogByteSizeMergePolicyProvider(Store store, IndexSettingsService indexSettingsService) {
super(store);
Preconditions.checkNotNull(store, "Store must be provided to merge policy");
this.indexSettingsService = indexSettingsService;
this.minMergeSize = componentSettings.getAsBytesSize("min_merge_size", new ByteSizeValue((long) (LogByteSizeMergePolicy.DEFAULT_MIN_MERGE_MB * 1024 * 1024), ByteSizeUnit.BYTES));
this.maxMergeSize = componentSettings.getAsBytesSize("max_merge_size", new ByteSizeValue((long) LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_MB, ByteSizeUnit.MB));
this.mergeFactor = componentSettings.getAsInt("merge_factor", LogByteSizeMergePolicy.DEFAULT_MERGE_FACTOR);
this.maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_DOCS);
this.calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
ByteSizeValue minMergeSize = componentSettings.getAsBytesSize("min_merge_size", DEFAULT_MIN_MERGE_SIZE);
ByteSizeValue maxMergeSize = componentSettings.getAsBytesSize("max_merge_size", DEFAULT_MAX_MERGE_SIZE);
int mergeFactor = componentSettings.getAsInt("merge_factor", LogByteSizeMergePolicy.DEFAULT_MERGE_FACTOR);
int maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_DOCS);
boolean calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
mergePolicy.setMinMergeMB(minMergeSize.mbFrac());
mergePolicy.setMaxMergeMB(maxMergeSize.mbFrac());
mergePolicy.setMergeFactor(mergeFactor);
mergePolicy.setMaxMergeDocs(maxMergeDocs);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
mergePolicy.setNoCFSRatio(noCFSRatio);
logger.debug("using [log_bytes_size] merge policy with merge_factor[{}], min_merge_size[{}], max_merge_size[{}], max_merge_docs[{}], calibrate_size_by_deletes[{}]",
mergeFactor, minMergeSize, maxMergeSize, maxMergeDocs, calibrateSizeByDeletes);
@ -68,16 +69,7 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
}
@Override
public LogByteSizeMergePolicy newMergePolicy() {
final CustomLogByteSizeMergePolicy mergePolicy = new CustomLogByteSizeMergePolicy(this);
mergePolicy.setMinMergeMB(minMergeSize.mbFrac());
mergePolicy.setMaxMergeMB(maxMergeSize.mbFrac());
mergePolicy.setMergeFactor(mergeFactor);
mergePolicy.setMaxMergeDocs(maxMergeDocs);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
mergePolicy.setNoCFSRatio(noCFSRatio);
policies.add(mergePolicy);
public LogByteSizeMergePolicy getMergePolicy() {
return mergePolicy;
}
@ -90,72 +82,52 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
public static final String INDEX_MERGE_POLICY_MAX_MERGE_SIZE = "index.merge.policy.max_merge_size";
public static final String INDEX_MERGE_POLICY_MAX_MERGE_DOCS = "index.merge.policy.max_merge_docs";
public static final String INDEX_MERGE_POLICY_MERGE_FACTOR = "index.merge.policy.merge_factor";
public static final String INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES = "index.merge.policy.calibrate_size_by_deletes";
class ApplySettings implements IndexSettingsService.Listener {
@Override
public void onRefreshSettings(Settings settings) {
ByteSizeValue minMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MIN_MERGE_SIZE, LogByteSizeMergePolicyProvider.this.minMergeSize);
if (!minMergeSize.equals(LogByteSizeMergePolicyProvider.this.minMergeSize)) {
logger.info("updating min_merge_size from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.minMergeSize, minMergeSize);
LogByteSizeMergePolicyProvider.this.minMergeSize = minMergeSize;
for (CustomLogByteSizeMergePolicy policy : policies) {
policy.setMinMergeMB(minMergeSize.mbFrac());
}
double oldMinMergeSizeMB = mergePolicy.getMinMergeMB();
ByteSizeValue minMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MIN_MERGE_SIZE, DEFAULT_MIN_MERGE_SIZE);
if (minMergeSize.mbFrac() != oldMinMergeSizeMB) {
logger.info("updating min_merge_size from [{}mb] to [{}]", oldMinMergeSizeMB, minMergeSize);
mergePolicy.setMinMergeMB(minMergeSize.mbFrac());
}
ByteSizeValue maxMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGE_SIZE, LogByteSizeMergePolicyProvider.this.maxMergeSize);
if (!maxMergeSize.equals(LogByteSizeMergePolicyProvider.this.maxMergeSize)) {
logger.info("updating max_merge_size from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.maxMergeSize, maxMergeSize);
LogByteSizeMergePolicyProvider.this.maxMergeSize = maxMergeSize;
for (CustomLogByteSizeMergePolicy policy : policies) {
policy.setMaxMergeMB(maxMergeSize.mbFrac());
}
double oldMaxMergeSizeMB = mergePolicy.getMaxMergeMB();
ByteSizeValue maxMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGE_SIZE, DEFAULT_MAX_MERGE_SIZE);
if (maxMergeSize.mbFrac() != oldMaxMergeSizeMB) {
logger.info("updating max_merge_size from [{}mb] to [{}]", oldMaxMergeSizeMB, maxMergeSize);
mergePolicy.setMaxMergeMB(maxMergeSize.mbFrac());
}
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogByteSizeMergePolicyProvider.this.maxMergeDocs);
if (maxMergeDocs != LogByteSizeMergePolicyProvider.this.maxMergeDocs) {
logger.info("updating max_merge_docs from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.maxMergeDocs, maxMergeDocs);
LogByteSizeMergePolicyProvider.this.maxMergeDocs = maxMergeDocs;
for (CustomLogByteSizeMergePolicy policy : policies) {
policy.setMaxMergeDocs(maxMergeDocs);
}
int oldMaxMergeDocs = mergePolicy.getMaxMergeDocs();
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_DOCS);
if (maxMergeDocs != oldMaxMergeDocs) {
logger.info("updating max_merge_docs from [{}] to [{}]", oldMaxMergeDocs, maxMergeDocs);
mergePolicy.setMaxMergeDocs(maxMergeDocs);
}
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogByteSizeMergePolicyProvider.this.mergeFactor);
if (mergeFactor != LogByteSizeMergePolicyProvider.this.mergeFactor) {
logger.info("updating merge_factor from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.mergeFactor, mergeFactor);
LogByteSizeMergePolicyProvider.this.mergeFactor = mergeFactor;
for (CustomLogByteSizeMergePolicy policy : policies) {
policy.setMergeFactor(mergeFactor);
}
int oldMergeFactor = mergePolicy.getMergeFactor();
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogByteSizeMergePolicy.DEFAULT_MERGE_FACTOR);
if (mergeFactor != oldMergeFactor) {
logger.info("updating merge_factor from [{}] to [{}]", oldMergeFactor, mergeFactor);
mergePolicy.setMergeFactor(mergeFactor);
}
boolean oldCalibrateSizeByDeletes = mergePolicy.getCalibrateSizeByDeletes();
boolean calibrateSizeByDeletes = settings.getAsBoolean(INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES, true);
if (calibrateSizeByDeletes != oldCalibrateSizeByDeletes) {
logger.info("updating calibrate_size_by_deletes from [{}] to [{}]", oldCalibrateSizeByDeletes, calibrateSizeByDeletes);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
}
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogByteSizeMergePolicyProvider.this.noCFSRatio)));
if (noCFSRatio != LogByteSizeMergePolicyProvider.this.noCFSRatio) {
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogByteSizeMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
LogByteSizeMergePolicyProvider.this.noCFSRatio = noCFSRatio;
for (CustomLogByteSizeMergePolicy policy : policies) {
policy.setNoCFSRatio(noCFSRatio);
}
mergePolicy.setNoCFSRatio(noCFSRatio);
}
}
}
public static class CustomLogByteSizeMergePolicy extends LogByteSizeMergePolicy {
private final LogByteSizeMergePolicyProvider provider;
public CustomLogByteSizeMergePolicy(LogByteSizeMergePolicyProvider provider) {
super();
this.provider = provider;
}
@Override
public void close() {
super.close();
provider.policies.remove(this);
}
}
}

View File

@ -36,27 +36,30 @@ import java.util.concurrent.CopyOnWriteArraySet;
public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDocMergePolicy> {
private final IndexSettingsService indexSettingsService;
private final ApplySettings applySettings = new ApplySettings();
private final LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
public static final String MAX_MERGE_DOCS_KEY = "index.merge.policy.max_merge_docs";
public static final String MIN_MERGE_DOCS_KEY = "index.merge.policy.min_merge_docs";
public static final String MERGE_FACTORY_KEY = "index.merge.policy.merge_factor";
private volatile int minMergeDocs;
private volatile int maxMergeDocs;
private volatile int mergeFactor;
private final boolean calibrateSizeByDeletes;
private final Set<CustomLogDocMergePolicy> policies = new CopyOnWriteArraySet<>();
private final ApplySettings applySettings = new ApplySettings();
@Inject
public LogDocMergePolicyProvider(Store store, IndexSettingsService indexSettingsService) {
super(store);
Preconditions.checkNotNull(store, "Store must be provided to merge policy");
this.indexSettingsService = indexSettingsService;
this.minMergeDocs = componentSettings.getAsInt("min_merge_docs", LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS);
this.maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS);
this.mergeFactor = componentSettings.getAsInt("merge_factor", LogDocMergePolicy.DEFAULT_MERGE_FACTOR);
this.calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
int minMergeDocs = componentSettings.getAsInt("min_merge_docs", LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS);
int maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS);
int mergeFactor = componentSettings.getAsInt("merge_factor", LogDocMergePolicy.DEFAULT_MERGE_FACTOR);
boolean calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
mergePolicy.setMinMergeDocs(minMergeDocs);
mergePolicy.setMaxMergeDocs(maxMergeDocs);
mergePolicy.setMergeFactor(mergeFactor);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
mergePolicy.setNoCFSRatio(noCFSRatio);
logger.debug("using [log_doc] merge policy with merge_factor[{}], min_merge_docs[{}], max_merge_docs[{}], calibrate_size_by_deletes[{}]",
mergeFactor, minMergeDocs, maxMergeDocs, calibrateSizeByDeletes);
@ -69,76 +72,52 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
}
@Override
public LogDocMergePolicy newMergePolicy() {
final CustomLogDocMergePolicy mergePolicy = new CustomLogDocMergePolicy(this);
mergePolicy.setMinMergeDocs(minMergeDocs);
mergePolicy.setMaxMergeDocs(maxMergeDocs);
mergePolicy.setMergeFactor(mergeFactor);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
mergePolicy.setNoCFSRatio(noCFSRatio);
policies.add(mergePolicy);
public LogDocMergePolicy getMergePolicy() {
return mergePolicy;
}
public static final String INDEX_MERGE_POLICY_MIN_MERGE_DOCS = "index.merge.policy.min_merge_docs";
public static final String INDEX_MERGE_POLICY_MAX_MERGE_DOCS = "index.merge.policy.max_merge_docs";
public static final String INDEX_MERGE_POLICY_MERGE_FACTOR = "index.merge.policy.merge_factor";
public static final String INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES = "index.merge.policy.calibrate_size_by_deletes";
class ApplySettings implements IndexSettingsService.Listener {
@Override
public void onRefreshSettings(Settings settings) {
int minMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MIN_MERGE_DOCS, LogDocMergePolicyProvider.this.minMergeDocs);
if (minMergeDocs != LogDocMergePolicyProvider.this.minMergeDocs) {
logger.info("updating min_merge_docs from [{}] to [{}]", LogDocMergePolicyProvider.this.minMergeDocs, minMergeDocs);
LogDocMergePolicyProvider.this.minMergeDocs = minMergeDocs;
for (CustomLogDocMergePolicy policy : policies) {
policy.setMinMergeDocs(minMergeDocs);
}
int oldMinMergeDocs = mergePolicy.getMinMergeDocs();
int minMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MIN_MERGE_DOCS, LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS);
if (minMergeDocs != oldMinMergeDocs) {
logger.info("updating min_merge_docs from [{}] to [{}]", oldMinMergeDocs, minMergeDocs);
mergePolicy.setMinMergeDocs(minMergeDocs);
}
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogDocMergePolicyProvider.this.maxMergeDocs);
if (maxMergeDocs != LogDocMergePolicyProvider.this.maxMergeDocs) {
logger.info("updating max_merge_docs from [{}] to [{}]", LogDocMergePolicyProvider.this.maxMergeDocs, maxMergeDocs);
LogDocMergePolicyProvider.this.maxMergeDocs = maxMergeDocs;
for (CustomLogDocMergePolicy policy : policies) {
policy.setMaxMergeDocs(maxMergeDocs);
}
int oldMaxMergeDocs = mergePolicy.getMaxMergeDocs();
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS);
if (maxMergeDocs != oldMaxMergeDocs) {
logger.info("updating max_merge_docs from [{}] to [{}]", oldMaxMergeDocs, maxMergeDocs);
mergePolicy.setMaxMergeDocs(maxMergeDocs);
}
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogDocMergePolicyProvider.this.mergeFactor);
if (mergeFactor != LogDocMergePolicyProvider.this.mergeFactor) {
logger.info("updating merge_factor from [{}] to [{}]", LogDocMergePolicyProvider.this.mergeFactor, mergeFactor);
LogDocMergePolicyProvider.this.mergeFactor = mergeFactor;
for (CustomLogDocMergePolicy policy : policies) {
policy.setMergeFactor(mergeFactor);
}
int oldMergeFactor = mergePolicy.getMergeFactor();
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogDocMergePolicy.DEFAULT_MERGE_FACTOR);
if (mergeFactor != oldMergeFactor) {
logger.info("updating merge_factor from [{}] to [{}]", oldMergeFactor, mergeFactor);
mergePolicy.setMergeFactor(mergeFactor);
}
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogDocMergePolicyProvider.this.noCFSRatio)));
final boolean compoundFormat = noCFSRatio != 0.0;
boolean oldCalibrateSizeByDeletes = mergePolicy.getCalibrateSizeByDeletes();
boolean calibrateSizeByDeletes = settings.getAsBoolean(INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES, true);
if (calibrateSizeByDeletes != oldCalibrateSizeByDeletes) {
logger.info("updating calibrate_size_by_deletes from [{}] to [{}]", oldCalibrateSizeByDeletes, calibrateSizeByDeletes);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
}
double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogDocMergePolicyProvider.this.noCFSRatio)));
if (noCFSRatio != LogDocMergePolicyProvider.this.noCFSRatio) {
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogDocMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
LogDocMergePolicyProvider.this.noCFSRatio = noCFSRatio;
for (CustomLogDocMergePolicy policy : policies) {
policy.setNoCFSRatio(noCFSRatio);
}
mergePolicy.setNoCFSRatio(noCFSRatio);
}
}
}
public static class CustomLogDocMergePolicy extends LogDocMergePolicy {
private final LogDocMergePolicyProvider provider;
public CustomLogDocMergePolicy(LogDocMergePolicyProvider provider) {
super();
this.provider = provider;
}
@Override
public void close() {
super.close();
provider.policies.remove(this);
}
}
}

View File

@ -28,5 +28,5 @@ import org.elasticsearch.index.shard.IndexShardComponent;
*/
public interface MergePolicyProvider<T extends MergePolicy> extends IndexShardComponent, CloseableIndexComponent {
T newMergePolicy();
T getMergePolicy();
}

View File

@ -28,65 +28,35 @@ import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.index.settings.IndexSettingsService;
import org.elasticsearch.index.store.Store;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<TieredMergePolicy> {
private final IndexSettingsService indexSettingsService;
private final Set<CustomTieredMergePolicyProvider> policies = new CopyOnWriteArraySet<>();
private volatile double forceMergeDeletesPctAllowed;
private volatile ByteSizeValue floorSegment;
private volatile int maxMergeAtOnce;
private volatile int maxMergeAtOnceExplicit;
private volatile ByteSizeValue maxMergedSegment;
private volatile double segmentsPerTier;
private volatile double reclaimDeletesWeight;
private final ApplySettings applySettings = new ApplySettings();
private final TieredMergePolicy mergePolicy = new TieredMergePolicy();
public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d;
public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = new ByteSizeValue(2, ByteSizeUnit.MB);
public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10;
public static final int DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT = 30;
public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = new ByteSizeValue(5, ByteSizeUnit.GB);
public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d;
public static final double DEFAULT_RECLAIM_DELETES_WEIGHT = 2.0d;
@Inject
public TieredMergePolicyProvider(Store store, IndexSettingsService indexSettingsService) {
super(store);
this.indexSettingsService = indexSettingsService;
this.forceMergeDeletesPctAllowed = componentSettings.getAsDouble("expunge_deletes_allowed", 10d); // percentage
this.floorSegment = componentSettings.getAsBytesSize("floor_segment", new ByteSizeValue(2, ByteSizeUnit.MB));
this.maxMergeAtOnce = componentSettings.getAsInt("max_merge_at_once", 10);
this.maxMergeAtOnceExplicit = componentSettings.getAsInt("max_merge_at_once_explicit", 30);
double forceMergeDeletesPctAllowed = componentSettings.getAsDouble("expunge_deletes_allowed", DEFAULT_EXPUNGE_DELETES_ALLOWED); // percentage
ByteSizeValue floorSegment = componentSettings.getAsBytesSize("floor_segment", DEFAULT_FLOOR_SEGMENT);
int maxMergeAtOnce = componentSettings.getAsInt("max_merge_at_once", DEFAULT_MAX_MERGE_AT_ONCE);
int maxMergeAtOnceExplicit = componentSettings.getAsInt("max_merge_at_once_explicit", DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT);
// TODO is this really a good default number for max_merge_segment, what happens for large indices, won't they end up with many segments?
this.maxMergedSegment = componentSettings.getAsBytesSize("max_merged_segment", componentSettings.getAsBytesSize("max_merge_segment", new ByteSizeValue(5, ByteSizeUnit.GB)));
this.segmentsPerTier = componentSettings.getAsDouble("segments_per_tier", 10.0d);
this.reclaimDeletesWeight = componentSettings.getAsDouble("reclaim_deletes_weight", 2.0d);
ByteSizeValue maxMergedSegment = componentSettings.getAsBytesSize("max_merged_segment", DEFAULT_MAX_MERGED_SEGMENT);
double segmentsPerTier = componentSettings.getAsDouble("segments_per_tier", DEFAULT_SEGMENTS_PER_TIER);
double reclaimDeletesWeight = componentSettings.getAsDouble("reclaim_deletes_weight", DEFAULT_RECLAIM_DELETES_WEIGHT);
fixSettingsIfNeeded();
logger.debug("using [tiered] merge policy with expunge_deletes_allowed[{}], floor_segment[{}], max_merge_at_once[{}], max_merge_at_once_explicit[{}], max_merged_segment[{}], segments_per_tier[{}], reclaim_deletes_weight[{}]",
forceMergeDeletesPctAllowed, floorSegment, maxMergeAtOnce, maxMergeAtOnceExplicit, maxMergedSegment, segmentsPerTier, reclaimDeletesWeight);
indexSettingsService.addListener(applySettings);
}
private void fixSettingsIfNeeded() {
// fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce
if (!(segmentsPerTier >= maxMergeAtOnce)) {
int newMaxMergeAtOnce = (int) segmentsPerTier;
// max merge at once should be at least 2
if (newMaxMergeAtOnce <= 1) {
newMaxMergeAtOnce = 2;
}
logger.debug("[tiered] merge policy changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or equal to it", maxMergeAtOnce, newMaxMergeAtOnce, segmentsPerTier);
this.maxMergeAtOnce = newMaxMergeAtOnce;
}
}
@Override
public TieredMergePolicy newMergePolicy() {
final CustomTieredMergePolicyProvider mergePolicy = new CustomTieredMergePolicyProvider(this);
maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
mergePolicy.setNoCFSRatio(noCFSRatio);
mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
@ -95,6 +65,28 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.mbFrac());
mergePolicy.setSegmentsPerTier(segmentsPerTier);
mergePolicy.setReclaimDeletesWeight(reclaimDeletesWeight);
logger.debug("using [tiered] merge mergePolicy with expunge_deletes_allowed[{}], floor_segment[{}], max_merge_at_once[{}], max_merge_at_once_explicit[{}], max_merged_segment[{}], segments_per_tier[{}], reclaim_deletes_weight[{}]",
forceMergeDeletesPctAllowed, floorSegment, maxMergeAtOnce, maxMergeAtOnceExplicit, maxMergedSegment, segmentsPerTier, reclaimDeletesWeight);
indexSettingsService.addListener(applySettings);
}
private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) {
// fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce
if (!(segmentsPerTier >= maxMergeAtOnce)) {
int newMaxMergeAtOnce = (int) segmentsPerTier;
// max merge at once should be at least 2
if (newMaxMergeAtOnce <= 1) {
newMaxMergeAtOnce = 2;
}
logger.debug("[tiered] merge mergePolicy changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or equal to it", maxMergeAtOnce, newMaxMergeAtOnce, segmentsPerTier);
maxMergeAtOnce = newMaxMergeAtOnce;
}
return maxMergeAtOnce;
}
@Override
public TieredMergePolicy getMergePolicy() {
return mergePolicy;
}
@ -114,95 +106,62 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
class ApplySettings implements IndexSettingsService.Listener {
@Override
public void onRefreshSettings(Settings settings) {
double expungeDeletesPctAllowed = settings.getAsDouble(INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED, TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed);
if (expungeDeletesPctAllowed != TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed) {
logger.info("updating [expunge_deletes_allowed] from [{}] to [{}]", TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed, expungeDeletesPctAllowed);
TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed = expungeDeletesPctAllowed;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setForceMergeDeletesPctAllowed(expungeDeletesPctAllowed);
}
double oldExpungeDeletesPctAllowed = mergePolicy.getForceMergeDeletesPctAllowed();
double expungeDeletesPctAllowed = settings.getAsDouble(INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED, DEFAULT_EXPUNGE_DELETES_ALLOWED);
if (expungeDeletesPctAllowed != oldExpungeDeletesPctAllowed) {
logger.info("updating [expunge_deletes_allowed] from [{}] to [{}]", oldExpungeDeletesPctAllowed, expungeDeletesPctAllowed);
mergePolicy.setForceMergeDeletesPctAllowed(expungeDeletesPctAllowed);
}
ByteSizeValue floorSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_FLOOR_SEGMENT, TieredMergePolicyProvider.this.floorSegment);
if (!floorSegment.equals(TieredMergePolicyProvider.this.floorSegment)) {
logger.info("updating [floor_segment] from [{}] to [{}]", TieredMergePolicyProvider.this.floorSegment, floorSegment);
TieredMergePolicyProvider.this.floorSegment = floorSegment;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setFloorSegmentMB(floorSegment.mbFrac());
}
double oldFloorSegmentMB = mergePolicy.getFloorSegmentMB();
ByteSizeValue floorSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_FLOOR_SEGMENT, DEFAULT_FLOOR_SEGMENT);
if (floorSegment.mbFrac() != oldFloorSegmentMB) {
logger.info("updating [floor_segment] from [{}mb] to [{}]", oldFloorSegmentMB, floorSegment);
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
}
int maxMergeAtOnce = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE, TieredMergePolicyProvider.this.maxMergeAtOnce);
if (maxMergeAtOnce != TieredMergePolicyProvider.this.maxMergeAtOnce) {
logger.info("updating [max_merge_at_once] from [{}] to [{}]", TieredMergePolicyProvider.this.maxMergeAtOnce, maxMergeAtOnce);
TieredMergePolicyProvider.this.maxMergeAtOnce = maxMergeAtOnce;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setMaxMergeAtOnce(maxMergeAtOnce);
}
double oldSegmentsPerTier = mergePolicy.getSegmentsPerTier();
double segmentsPerTier = settings.getAsDouble(INDEX_MERGE_POLICY_SEGMENTS_PER_TIER, DEFAULT_SEGMENTS_PER_TIER);
if (segmentsPerTier != oldSegmentsPerTier) {
logger.info("updating [segments_per_tier] from [{}] to [{}]", oldSegmentsPerTier, segmentsPerTier);
mergePolicy.setSegmentsPerTier(segmentsPerTier);
}
int maxMergeAtOnceExplicit = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_EXPLICIT, TieredMergePolicyProvider.this.maxMergeAtOnceExplicit);
if (maxMergeAtOnceExplicit != TieredMergePolicyProvider.this.maxMergeAtOnceExplicit) {
logger.info("updating [max_merge_at_once_explicit] from [{}] to [{}]", TieredMergePolicyProvider.this.maxMergeAtOnceExplicit, maxMergeAtOnceExplicit);
TieredMergePolicyProvider.this.maxMergeAtOnceExplicit = maxMergeAtOnceExplicit;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setMaxMergeAtOnceExplicit(maxMergeAtOnceExplicit);
}
int oldMaxMergeAtOnce = mergePolicy.getMaxMergeAtOnce();
int maxMergeAtOnce = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE, DEFAULT_MAX_MERGE_AT_ONCE);
if (maxMergeAtOnce != oldMaxMergeAtOnce) {
logger.info("updating [max_merge_at_once] from [{}] to [{}]", oldMaxMergeAtOnce, maxMergeAtOnce);
maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce);
}
ByteSizeValue maxMergedSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT, TieredMergePolicyProvider.this.maxMergedSegment);
if (!maxMergedSegment.equals(TieredMergePolicyProvider.this.maxMergedSegment)) {
logger.info("updating [max_merged_segment] from [{}] to [{}]", TieredMergePolicyProvider.this.maxMergedSegment, maxMergedSegment);
TieredMergePolicyProvider.this.maxMergedSegment = maxMergedSegment;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setFloorSegmentMB(maxMergedSegment.mbFrac());
}
int oldMaxMergeAtOnceExplicit = mergePolicy.getMaxMergeAtOnceExplicit();
int maxMergeAtOnceExplicit = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_EXPLICIT, DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT);
if (maxMergeAtOnceExplicit != oldMaxMergeAtOnceExplicit) {
logger.info("updating [max_merge_at_once_explicit] from [{}] to [{}]", oldMaxMergeAtOnceExplicit, maxMergeAtOnceExplicit);
mergePolicy.setMaxMergeAtOnceExplicit(maxMergeAtOnceExplicit);
}
double segmentsPerTier = settings.getAsDouble(INDEX_MERGE_POLICY_SEGMENTS_PER_TIER, TieredMergePolicyProvider.this.segmentsPerTier);
if (segmentsPerTier != TieredMergePolicyProvider.this.segmentsPerTier) {
logger.info("updating [segments_per_tier] from [{}] to [{}]", TieredMergePolicyProvider.this.segmentsPerTier, segmentsPerTier);
TieredMergePolicyProvider.this.segmentsPerTier = segmentsPerTier;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setSegmentsPerTier(segmentsPerTier);
}
double oldMaxMergedSegmentMB = mergePolicy.getMaxMergedSegmentMB();
ByteSizeValue maxMergedSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT, DEFAULT_MAX_MERGED_SEGMENT);
if (maxMergedSegment.mbFrac() != oldMaxMergedSegmentMB) {
logger.info("updating [max_merged_segment] from [{}mb] to [{}]", oldMaxMergedSegmentMB, maxMergedSegment);
mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.mbFrac());
}
double reclaimDeletesWeight = settings.getAsDouble(INDEX_MERGE_POLICY_RECLAIM_DELETES_WEIGHT, TieredMergePolicyProvider.this.reclaimDeletesWeight);
if (reclaimDeletesWeight != TieredMergePolicyProvider.this.reclaimDeletesWeight) {
logger.info("updating [reclaim_deletes_weight] from [{}] to [{}]", TieredMergePolicyProvider.this.reclaimDeletesWeight, reclaimDeletesWeight);
TieredMergePolicyProvider.this.reclaimDeletesWeight = reclaimDeletesWeight;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setReclaimDeletesWeight(reclaimDeletesWeight);
}
double oldReclaimDeletesWeight = mergePolicy.getReclaimDeletesWeight();
double reclaimDeletesWeight = settings.getAsDouble(INDEX_MERGE_POLICY_RECLAIM_DELETES_WEIGHT, DEFAULT_RECLAIM_DELETES_WEIGHT);
if (reclaimDeletesWeight != oldReclaimDeletesWeight) {
logger.info("updating [reclaim_deletes_weight] from [{}] to [{}]", oldReclaimDeletesWeight, reclaimDeletesWeight);
mergePolicy.setReclaimDeletesWeight(reclaimDeletesWeight);
}
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
if (noCFSRatio != TieredMergePolicyProvider.this.noCFSRatio) {
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(TieredMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
mergePolicy.setNoCFSRatio(noCFSRatio);
TieredMergePolicyProvider.this.noCFSRatio = noCFSRatio;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setNoCFSRatio(noCFSRatio);
}
}
fixSettingsIfNeeded();
}
}
public static class CustomTieredMergePolicyProvider extends TieredMergePolicy {
private final TieredMergePolicyProvider provider;
public CustomTieredMergePolicyProvider(TieredMergePolicyProvider provider) {
super();
this.provider = provider;
}
@Override
public void close() {
super.close();
provider.policies.remove(this);
}
}
}

View File

@ -27,8 +27,7 @@ import org.apache.lucene.queries.ExtendedCommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
@ -211,11 +210,11 @@ public class CommonTermsQueryParser implements QueryParser {
try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
BytesRefBuilder builder = new BytesRefBuilder();
while (source.incrementToken()) {
BytesRef ref = new BytesRef(termAtt.length() * 4); // oversize for
// UTF-8
UnicodeUtil.UTF16toUTF8(termAtt.buffer(), 0, termAtt.length(), ref);
query.add(new Term(field, ref));
builder.copyChars(termAtt);
query.add(new Term(field, builder.toBytesRef()));
count++;
}
}

View File

@ -23,10 +23,7 @@ import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.*;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.lucene.search.AndFilter;
import org.elasticsearch.common.util.BytesRefHash;
@ -131,7 +128,7 @@ final class ParentIdsFilter extends Filter {
}
TermsEnum termsEnum = terms.iterator(null);
BytesRef uidSpare = new BytesRef();
BytesRefBuilder uidSpare = new BytesRefBuilder();
BytesRef idSpare = new BytesRef();
if (acceptDocs == null) {
@ -148,8 +145,8 @@ final class ParentIdsFilter extends Filter {
long size = parentIds.size();
for (int i = 0; i < size; i++) {
parentIds.get(i, idSpare);
Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare);
if (termsEnum.seekExact(uidSpare)) {
BytesRef uid = Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare);
if (termsEnum.seekExact(uid)) {
int docId;
docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
if (result == null) {

View File

@ -19,8 +19,10 @@
package org.elasticsearch.index.store;
import org.apache.lucene.store.*;
import org.elasticsearch.Version;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FileSwitchDirectory;
import org.apache.lucene.store.FilterDirectory;
import org.elasticsearch.common.Nullable;
/**
@ -28,10 +30,6 @@ import org.elasticsearch.common.Nullable;
*/
public final class DirectoryUtils {
static {
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9 : "Remove the special case for NRTCachingDirectory - it implements FilterDirectory in 4.10";
}
private DirectoryUtils() {} // no instance
/**
@ -60,8 +58,6 @@ public final class DirectoryUtils {
while (true) {
if ((current instanceof FilterDirectory)) {
current = ((FilterDirectory) current).getDelegate();
} else if (current instanceof NRTCachingDirectory) { // remove this when we upgrade to Lucene 4.10
current = ((NRTCachingDirectory) current).getDelegate();
} else {
break;
}

View File

@ -482,8 +482,8 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
}
Version maxVersion = Version.LUCENE_3_0; // we don't know which version was used to write so we take the max version.
for (SegmentCommitInfo info : segmentCommitInfos) {
final Version version = Lucene.parseVersionLenient(info.info.getVersion(), Version.LUCENE_3_0);
if (version.onOrAfter(maxVersion)) {
final Version version = info.info.getVersion();
if (version != null && version.onOrAfter(maxVersion)) {
maxVersion = version;
}
for (String file : info.files()) {

View File

@ -137,7 +137,7 @@ public class StoreFileMetaData implements Streamable {
out.writeVLong(length);
out.writeOptionalString(checksum);
if (out.getVersion().onOrAfter(org.elasticsearch.Version.V_1_3_0)) {
out.writeOptionalString(writtenBy == null ? null : writtenBy.name());
out.writeOptionalString(writtenBy == null ? null : writtenBy.toString());
}
if (out.getVersion().onOrAfter(org.elasticsearch.Version.V_1_4_0)) {
out.writeBytesRef(hash);

View File

@ -117,7 +117,7 @@ public final class RecoveryFileChunkRequest extends TransportRequest { // publi
out.writeOptionalString(metaData.checksum());
out.writeBytesReference(content);
if (out.getVersion().onOrAfter(org.elasticsearch.Version.V_1_3_0)) {
out.writeOptionalString(metaData.writtenBy() == null ? null : metaData.writtenBy().name());
out.writeOptionalString(metaData.writtenBy() == null ? null : metaData.writtenBy().toString());
}
}

View File

@ -21,7 +21,7 @@ package org.elasticsearch.script.expression;
import org.apache.lucene.expressions.Bindings;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.XSimpleBindings;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
@ -39,7 +39,7 @@ import java.util.Map;
class ExpressionScript implements SearchScript {
final Expression expression;
final XSimpleBindings bindings;
final SimpleBindings bindings;
final ValueSource source;
final ReplaceableConstValueSource specialValue; // _value
Map<String, Scorer> context;
@ -47,7 +47,7 @@ class ExpressionScript implements SearchScript {
FunctionValues values;
int docid;
ExpressionScript(Expression e, XSimpleBindings b, ReplaceableConstValueSource v) {
ExpressionScript(Expression e, SimpleBindings b, ReplaceableConstValueSource v) {
expression = e;
bindings = b;
context = Collections.EMPTY_MAP;

View File

@ -20,9 +20,9 @@
package org.elasticsearch.script.expression;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.XSimpleBindings;
import org.apache.lucene.expressions.js.XJavascriptCompiler;
import org.apache.lucene.expressions.js.XVariableContext;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.expressions.js.VariableContext;
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
import org.apache.lucene.search.SortField;
import org.elasticsearch.common.Nullable;
@ -71,7 +71,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
public Object compile(String script) {
try {
// NOTE: validation is delayed to allow runtime vars, and we don't have access to per index stuff here
return XJavascriptCompiler.compile(script);
return JavascriptCompiler.compile(script);
} catch (ParseException e) {
throw new ExpressionScriptCompilationException("Failed to parse expression: " + script, e);
}
@ -83,7 +83,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
MapperService mapper = lookup.doc().mapperService();
// NOTE: if we need to do anything complicated with bindings in the future, we can just extend Bindings,
// instead of complicating SimpleBindings (which should stay simple)
XSimpleBindings bindings = new XSimpleBindings();
SimpleBindings bindings = new SimpleBindings();
ReplaceableConstValueSource specialValue = null;
for (String variable : expr.variables) {
@ -109,14 +109,14 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
}
} else {
XVariableContext[] parts = XVariableContext.parse(variable);
VariableContext[] parts = VariableContext.parse(variable);
if (parts[0].text.equals("doc") == false) {
throw new ExpressionScriptCompilationException("Unknown variable [" + parts[0].text + "] in expression");
}
if (parts.length < 2 || parts[1].type != XVariableContext.Type.STR_INDEX) {
if (parts.length < 2 || parts[1].type != VariableContext.Type.STR_INDEX) {
throw new ExpressionScriptCompilationException("Variable 'doc' in expression must be used with a specific field like: doc['myfield'].value");
}
if (parts.length < 3 || parts[2].type != XVariableContext.Type.MEMBER || parts[2].text.equals("value") == false) {
if (parts.length < 3 || parts[2].type != VariableContext.Type.MEMBER || parts[2].text.equals("value") == false) {
throw new ExpressionScriptCompilationException("Invalid member for field data in expression. Only '.value' is currently supported.");
}
String fieldname = parts[1].text;

View File

@ -23,6 +23,7 @@ package org.elasticsearch.search;
import org.apache.lucene.index.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.index.fielddata.FieldData;
@ -439,7 +440,7 @@ public enum MultiValueMode {
*/
public NumericDocValues select(final SortedNumericDocValues values, final long missingValue, final FixedBitSet rootDocs, final FixedBitSet innerDocs, int maxDoc) {
if (rootDocs == null || innerDocs == null) {
return select(FieldData.emptySortedNumeric(maxDoc), missingValue);
return select(DocValues.emptySortedNumeric(maxDoc), missingValue);
}
return new NumericDocValues() {
@ -625,7 +626,7 @@ public enum MultiValueMode {
}
return new BinaryDocValues() {
final BytesRef spare = new BytesRef();
final BytesRefBuilder spare = new BytesRefBuilder();
@Override
public BytesRef get(int rootDoc) {
@ -637,7 +638,7 @@ public enum MultiValueMode {
final int prevRootDoc = rootDocs.prevSetBit(rootDoc - 1);
final int firstNestedDoc = innerDocs.nextSetBit(prevRootDoc + 1);
BytesRef accumulated = null;
BytesRefBuilder accumulated = null;
for (int doc = firstNestedDoc; doc != -1 && doc < rootDoc; doc = innerDocs.nextSetBit(doc + 1)) {
values.setDocument(doc);
@ -647,7 +648,7 @@ public enum MultiValueMode {
spare.copyBytes(innerValue);
accumulated = spare;
} else {
final BytesRef applied = apply(accumulated, innerValue);
final BytesRef applied = apply(accumulated.get(), innerValue);
if (applied == innerValue) {
accumulated.copyBytes(innerValue);
}
@ -655,7 +656,7 @@ public enum MultiValueMode {
}
}
return accumulated == null ? missingValue : accumulated;
return accumulated == null ? missingValue : accumulated.get();
}
};
}

View File

@ -20,8 +20,8 @@
package org.elasticsearch.search.aggregations;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.search.aggregations.support.AggregationContext;
@ -42,8 +42,8 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
static class PerSegmentCollects {
AtomicReaderContext readerContext;
AppendingPackedLongBuffer docs;
AppendingPackedLongBuffer buckets;
PackedLongValues.Builder docs;
PackedLongValues.Builder buckets;
int lastDocId = 0;
PerSegmentCollects(AtomicReaderContext readerContext) {
@ -54,7 +54,7 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
if (docs == null) {
// TODO unclear what might be reasonable constructor args to pass to this collection
// No way of accurately predicting how many docs will be collected
docs = new AppendingPackedLongBuffer();
docs = PackedLongValues.packedBuilder(PackedInts.COMPACT);
}
// Store as delta-encoded for better compression
docs.add(doc - lastDocId);
@ -63,7 +63,7 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
if (owningBucketOrdinal != 0) {
// Store all of the prior bucketOrds (which up until now have
// all been zero based)
buckets = new AppendingPackedLongBuffer();
buckets = PackedLongValues.packedBuilder(PackedInts.COMPACT);
for (int i = 0; i < docs.size() - 1; i++) {
buckets.add(0);
}
@ -75,12 +75,6 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
}
}
void endCollect() {
if (docs != null) {
docs.freeze();
}
if (buckets != null) {
buckets.freeze();
}
}
boolean hasItems() {
@ -94,15 +88,15 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
return;
}
if (buckets == null) {
final AppendingDeltaPackedLongBuffer.Iterator docsIter = docs.iterator();
final PackedLongValues.Iterator docsIter = docs.build().iterator();
while (docsIter.hasNext()) {
lastDocId += (int) docsIter.next();
collector.collect(lastDocId, 0);
}
} else {
assert docs.size() == buckets.size();
final AppendingDeltaPackedLongBuffer.Iterator docsIter = docs.iterator();
final AppendingDeltaPackedLongBuffer.Iterator bucketsIter = buckets.iterator();
final PackedLongValues.Iterator docsIter = docs.build().iterator();
final PackedLongValues.Iterator bucketsIter = buckets.build().iterator();
while (docsIter.hasNext()) {
lastDocId += (int) docsIter.next();
collector.collect(lastDocId, bucketsIter.next());

View File

@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BytesRefHash;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
@ -44,7 +45,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
protected final BytesRefHash bucketOrds;
private final IncludeExclude includeExclude;
private SortedBinaryDocValues values;
private final BytesRef previous;
private final BytesRefBuilder previous;
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
InternalOrder order, BucketCountThresholds bucketCountThresholds,
@ -54,7 +55,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
this.valuesSource = valuesSource;
this.includeExclude = includeExclude;
bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.bigArrays());
previous = new BytesRef();
previous = new BytesRefBuilder();
}
@Override
@ -74,13 +75,13 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
final int valuesCount = values.count();
// SortedBinaryDocValues don't guarantee uniqueness so we need to take care of dups
previous.length = 0;
previous.clear();
for (int i = 0; i < valuesCount; ++i) {
final BytesRef bytes = values.valueAt(i);
if (includeExclude != null && !includeExclude.accept(bytes)) {
continue;
}
if (previous.equals(bytes)) {
if (previous.get().equals(bytes)) {
continue;
}
long bucketOrdinal = bucketOrds.add(bytes);

View File

@ -21,9 +21,8 @@ package org.elasticsearch.search.aggregations.bucket.terms.support;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentParser;
@ -43,7 +42,7 @@ public class IncludeExclude {
private final Matcher include;
private final Matcher exclude;
private final CharsRef scratch = new CharsRef();
private final CharsRefBuilder scratch = new CharsRefBuilder();
/**
* @param include The regular expression pattern for the terms to be included
@ -61,18 +60,18 @@ public class IncludeExclude {
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
*/
public boolean accept(BytesRef value) {
UnicodeUtil.UTF8toUTF16(value, scratch);
scratch.copyUTF8Bytes(value);
if (include == null) {
// exclude must not be null
return !exclude.reset(scratch).matches();
return !exclude.reset(scratch.get()).matches();
}
if (!include.reset(scratch).matches()) {
if (!include.reset(scratch.get()).matches()) {
return false;
}
if (exclude == null) {
return true;
}
return !exclude.reset(scratch).matches();
return !exclude.reset(scratch.get()).matches();
}
/**

View File

@ -45,7 +45,7 @@ public class ScriptBytesValues extends SortingBinaryDocValues implements ScriptV
private void set(int i, Object o) {
if (o == null) {
values[i].length = 0;
values[i].clear();
} else {
values[i].copyChars(o.toString());
}

View File

@ -20,7 +20,8 @@
package org.elasticsearch.search.lookup;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRefBuilder;
import java.io.IOException;
import java.util.Iterator;
@ -36,17 +37,19 @@ public class CachedPositionIterator extends PositionIterator {
// all payloads of the term in the current document in one bytes array.
// payloadStarts and payloadLength mark the start and end of one payload.
final BytesRef payloads = new BytesRef();
final BytesRefBuilder payloads = new BytesRefBuilder();
final IntsRef payloadsLengths = new IntsRef(0);
final IntsRefBuilder payloadsLengths = new IntsRefBuilder();
final IntsRef payloadsStarts = new IntsRef(0);
final IntsRefBuilder payloadsStarts = new IntsRefBuilder();
final IntsRef positions = new IntsRef(0);
final IntsRefBuilder positions = new IntsRefBuilder();
final IntsRef startOffsets = new IntsRef(0);
final IntsRefBuilder startOffsets = new IntsRefBuilder();
final IntsRef endOffsets = new IntsRef(0);
final IntsRefBuilder endOffsets = new IntsRefBuilder();
final BytesRef payload = new BytesRef();
@Override
public Iterator<TermPosition> reset() {
@ -61,12 +64,13 @@ public class CachedPositionIterator extends PositionIterator {
@Override
public TermPosition next() {
termPosition.position = positions.ints[pos];
termPosition.startOffset = startOffsets.ints[pos];
termPosition.endOffset = endOffsets.ints[pos];
termPosition.payload = payloads;
payloads.offset = payloadsStarts.ints[pos];
payloads.length = payloadsLengths.ints[pos];
termPosition.position = positions.intAt(pos);
termPosition.startOffset = startOffsets.intAt(pos);
termPosition.endOffset = endOffsets.intAt(pos);
termPosition.payload = payload;
payload.bytes = payloads.bytes();
payload.offset = payloadsStarts.intAt(pos);
payload.length = payloadsLengths.intAt(pos);
pos++;
return termPosition;
}
@ -82,44 +86,34 @@ public class CachedPositionIterator extends PositionIterator {
TermPosition termPosition;
for (int i = 0; i < freq; i++) {
termPosition = super.next();
positions.ints[i] = termPosition.position;
positions.setIntAt(i, termPosition.position);
addPayload(i, termPosition.payload);
startOffsets.ints[i] = termPosition.startOffset;
endOffsets.ints[i] = termPosition.endOffset;
startOffsets.setIntAt(i, termPosition.startOffset);
endOffsets.setIntAt(i, termPosition.endOffset);
}
}
private void ensureSize(int freq) {
if (freq == 0) {
return;
}
if (startOffsets.ints.length < freq) {
startOffsets.grow(freq);
endOffsets.grow(freq);
positions.grow(freq);
payloadsLengths.grow(freq);
payloadsStarts.grow(freq);
}
payloads.offset = 0;
payloadsLengths.offset = 0;
payloadsStarts.offset = 0;
startOffsets.grow(freq);
endOffsets.grow(freq);
positions.grow(freq);
payloadsLengths.grow(freq);
payloadsStarts.grow(freq);
payloads.grow(freq * 8);// this is just a guess....
}
private void addPayload(int i, BytesRef currPayload) {
if (currPayload != null) {
payloadsLengths.ints[i] = currPayload.length;
payloadsStarts.ints[i] = i == 0 ? 0 : payloadsStarts.ints[i - 1] + payloadsLengths.ints[i - 1];
if (payloads.bytes.length < payloadsStarts.ints[i] + payloadsLengths.ints[i]) {
payloads.offset = 0; // the offset serves no purpose here. but
// we must assure that it is 0 before
// grow() is called
payloads.grow(payloads.bytes.length * 2); // just a guess
}
System.arraycopy(currPayload.bytes, currPayload.offset, payloads.bytes, payloadsStarts.ints[i], currPayload.length);
payloadsLengths.setIntAt(i, currPayload.length);
payloadsStarts.setIntAt(i, i == 0 ? 0 : payloadsStarts.intAt(i - 1) + payloadsLengths.intAt(i - 1));
payloads.grow(payloadsStarts.intAt(i) + currPayload.length);
System.arraycopy(currPayload.bytes, currPayload.offset, payloads.bytes(), payloadsStarts.intAt(i), currPayload.length);
} else {
payloadsLengths.ints[i] = 0;
payloadsStarts.ints[i] = i == 0 ? 0 : payloadsStarts.ints[i - 1] + payloadsLengths.ints[i - 1];
payloadsLengths.setIntAt(i, 0);
payloadsStarts.setIntAt(i, i == 0 ? 0 : payloadsStarts.intAt(i - 1) + payloadsLengths.intAt(i - 1));
}
}

View File

@ -21,8 +21,7 @@ package org.elasticsearch.search.lookup;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.CharsRefBuilder;
public class TermPosition {
@ -30,11 +29,11 @@ public class TermPosition {
public int startOffset = -1;
public int endOffset = -1;
public BytesRef payload;
private CharsRef spare = new CharsRef(0);
private CharsRefBuilder spare = new CharsRefBuilder();
public String payloadAsString() {
if (payload != null && payload.length != 0) {
UnicodeUtil.UTF8toUTF16(payload.bytes, payload.offset, payload.length, spare);
spare.copyUTF8Bytes(payload);
return spare.toString();
} else {
return null;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.cache.fixedbitset.FixedBitSetFilter;
@ -156,12 +157,12 @@ public class ScriptSortParser implements SortParser {
protected SortedBinaryDocValues getValues(AtomicReaderContext context) {
searchScript.setNextReader(context);
final BinaryDocValues values = new BinaryDocValues() {
final BytesRef spare = new BytesRef();
final BytesRefBuilder spare = new BytesRefBuilder();
@Override
public BytesRef get(int docID) {
searchScript.setNextDocId(docID);
spare.copyChars(searchScript.run().toString());
return spare;
return spare.get();
}
};
return FieldData.singleton(values, null);

View File

@ -20,7 +20,7 @@ package org.elasticsearch.search.suggest;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
@ -76,7 +76,7 @@ public class SuggestPhase extends AbstractComponent implements SearchPhase {
public Suggest execute(SuggestionSearchContext suggest, IndexReader reader) {
try {
CharsRef spare = new CharsRef(); // Maybe add CharsRef to CacheRecycler?
CharsRefBuilder spare = new CharsRefBuilder();
final List<Suggestion<? extends Entry<? extends Option>>> suggestions = new ArrayList<>(suggest.suggestions().size());
for (Map.Entry<String, SuggestionSearchContext.SuggestionContext> entry : suggest.suggestions().entrySet()) {

View File

@ -25,8 +25,9 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.spell.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.ParseField;
@ -78,32 +79,14 @@ public final class SuggestUtils {
return directSpellChecker;
}
public static BytesRef join(BytesRef separator, BytesRef result, BytesRef... toJoin) {
int len = separator.length * toJoin.length - 1;
for (BytesRef br : toJoin) {
len += br.length;
}
result.grow(len);
return joinPreAllocated(separator, result, toJoin);
}
public static BytesRef joinPreAllocated(BytesRef separator, BytesRef result, BytesRef... toJoin) {
result.length = 0;
result.offset = 0;
public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
result.clear();
for (int i = 0; i < toJoin.length - 1; i++) {
BytesRef br = toJoin[i];
System.arraycopy(br.bytes, br.offset, result.bytes, result.offset, br.length);
result.offset += br.length;
System.arraycopy(separator.bytes, separator.offset, result.bytes, result.offset, separator.length);
result.offset += separator.length;
result.append(toJoin[i]);
result.append(separator);
}
final BytesRef br = toJoin[toJoin.length-1];
System.arraycopy(br.bytes, br.offset, result.bytes, result.offset, br.length);
result.length = result.offset + br.length;
result.offset = 0;
return result;
result.append(toJoin[toJoin.length-1]);
return result.get();
}
public static abstract class TokenConsumer {
@ -117,12 +100,9 @@ public final class SuggestUtils {
offsetAttr = stream.addAttribute(OffsetAttribute.class);
}
protected BytesRef fillBytesRef(BytesRef spare) {
spare.offset = 0;
spare.length = spare.bytes.length;
char[] source = charTermAttr.buffer();
UnicodeUtil.UTF16toUTF8(source, 0, charTermAttr.length(), spare);
return spare;
protected BytesRef fillBytesRef(BytesRefBuilder spare) {
spare.copyChars(charTermAttr);
return spare.get();
}
public abstract void nextToken() throws IOException;
@ -130,9 +110,9 @@ public final class SuggestUtils {
public void end() {}
}
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRef spare) throws IOException {
UnicodeUtil.UTF8toUTF16(toAnalyze, spare);
return analyze(analyzer, spare, field, consumer);
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException {
spare.copyUTF8Bytes(toAnalyze);
return analyze(analyzer, spare.get(), field, consumer);
}
public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException {

View File

@ -20,21 +20,21 @@
package org.elasticsearch.search.suggest;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import java.io.IOException;
public abstract class Suggester<T extends SuggestionSearchContext.SuggestionContext> {
protected abstract Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
innerExecute(String name, T suggestion, IndexReader indexReader, CharsRef spare) throws IOException;
innerExecute(String name, T suggestion, IndexReader indexReader, CharsRefBuilder spare) throws IOException;
public abstract String[] names();
public abstract SuggestContextParser getContextParser();
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
execute(String name, T suggestion, IndexReader indexReader, CharsRef spare) throws IOException {
execute(String name, T suggestion, IndexReader indexReader, CharsRefBuilder spare) throws IOException {
// #3469 We want to ignore empty shards
if (indexReader.numDocs() == 0) {
return null;

View File

@ -187,7 +187,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
@Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
analyzingSuggestLookupProvider.parsePayload(payload, spare);
builder.addSurface(spare.surfaceForm, spare.payload, spare.weight);
builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
// multi fields have the same surface form so we sum up here
maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
}

View File

@ -25,10 +25,7 @@ import org.apache.lucene.index.*;
import org.apache.lucene.index.FilterAtomicReader.FilterTerms;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.IOContext.Context;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.store.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ElasticsearchIllegalStateException;
@ -342,12 +339,12 @@ public class Completion090PostingsFormat extends PostingsFormat {
ref.weight = input.readVLong() - 1;
int len = input.readVInt();
ref.surfaceForm.grow(len);
ref.surfaceForm.length = len;
input.readBytes(ref.surfaceForm.bytes, ref.surfaceForm.offset, ref.surfaceForm.length);
ref.surfaceForm.setLength(len);
input.readBytes(ref.surfaceForm.bytes(), 0, ref.surfaceForm.length());
len = input.readVInt();
ref.payload.grow(len);
ref.payload.length = len;
input.readBytes(ref.payload.bytes, ref.payload.offset, ref.payload.length);
ref.payload.setLength(len);
input.readBytes(ref.payload.bytes(), 0, ref.payload.length());
input.close();
}
}

View File

@ -24,9 +24,8 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.text.StringText;
@ -49,13 +48,13 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
@Override
protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name,
CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRef spare) throws IOException {
CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRefBuilder spare) throws IOException {
if (suggestionContext.mapper() == null || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
throw new ElasticsearchException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
}
CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
UnicodeUtil.UTF8toUTF16(suggestionContext.getText(), spare);
spare.copyUTF8Bytes(suggestionContext.getText());
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length());
completionSuggestion.addTerm(completionSuggestEntry);
@ -73,7 +72,7 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
// docs from the segment that had a value in this segment.
continue;
}
List<Lookup.LookupResult> lookupResults = lookup.lookup(spare, false, suggestionContext.getSize());
List<Lookup.LookupResult> lookupResults = lookup.lookup(spare.get(), false, suggestionContext.getSize());
for (Lookup.LookupResult res : lookupResults) {
final String key = res.key.toString();

View File

@ -76,7 +76,7 @@ public final class CompletionTokenStream extends TokenStream {
* produced. Multi Fields have the same surface form and therefore sum up
*/
posInc = 0;
Util.toBytesRef(finiteStrings.next(), bytesAtt.getBytesRef()); // now we have UTF-8
Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
if (charTermAttribute != null) {
charTermAttribute.setLength(0);
charTermAttribute.append(bytesAtt.toUTF16());
@ -123,12 +123,17 @@ public final class CompletionTokenStream extends TokenStream {
public interface ByteTermAttribute extends TermToBytesRefAttribute {
// marker interface
/**
* Return the builder from which the term is derived.
*/
public BytesRefBuilder builder();
public CharSequence toUTF16();
}
public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
private final BytesRef bytes = new BytesRef();
private CharsRef charsRef;
private final BytesRefBuilder bytes = new BytesRefBuilder();
private CharsRefBuilder charsRef;
@Override
public void fillBytesRef() {
@ -136,13 +141,18 @@ public final class CompletionTokenStream extends TokenStream {
}
@Override
public BytesRef getBytesRef() {
public BytesRefBuilder builder() {
return bytes;
}
@Override
public BytesRef getBytesRef() {
return bytes.get();
}
@Override
public void clear() {
bytes.length = 0;
bytes.clear();
}
@Override
@ -154,10 +164,10 @@ public final class CompletionTokenStream extends TokenStream {
@Override
public CharSequence toUTF16() {
if (charsRef == null) {
charsRef = new CharsRef();
charsRef = new CharsRefBuilder();
}
UnicodeUtil.UTF8toUTF16(bytes, charsRef);
return charsRef;
charsRef.copyUTF8Bytes(getBytesRef());
return charsRef.get();
}
}
}

View File

@ -20,6 +20,7 @@
package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import java.io.IOException;
@ -30,8 +31,8 @@ interface PayloadProcessor {
void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException;
static class SuggestPayload {
final BytesRef payload = new BytesRef();
final BytesRefBuilder payload = new BytesRefBuilder();
long weight = 0;
final BytesRef surfaceForm = new BytesRef();
final BytesRefBuilder surfaceForm = new BytesRefBuilder();
}
}

View File

@ -25,8 +25,9 @@ import com.google.common.collect.Lists;
import org.apache.lucene.analysis.PrefixAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
@ -278,9 +279,9 @@ public class CategoryContextMapping extends ContextMapping {
public Automaton toAutomaton() {
List<Automaton> automatons = new ArrayList<>();
for (CharSequence value : values) {
automatons.add(BasicAutomata.makeString(value.toString()));
automatons.add(Automata.makeString(value.toString()));
}
return Automaton.union(automatons);
return Operations.union(automatons);
}
@Override

View File

@ -24,9 +24,9 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.fst.FST;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.xcontent.ToXContent;
@ -246,21 +246,20 @@ public abstract class ContextMapping implements ToXContent {
* @return Automaton matching the given Query
*/
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
Automaton a = BasicAutomata.makeEmptyString();
Automaton a = Automata.makeEmptyString();
Automaton gap = BasicAutomata.makeChar(ContextMapping.SEPARATOR);
Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
if (preserveSep) {
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
gap = BasicOperations.concatenate(gap, BasicAutomata.makeChar(XAnalyzingSuggester.SEP_LABEL));
gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
}
for (ContextQuery query : queries) {
a = Automaton.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
}
BasicOperations.determinize(a);
return a;
return Operations.determinize(a);
}
/**

View File

@ -24,9 +24,9 @@ import com.google.common.collect.Lists;
import org.apache.lucene.analysis.PrefixAnalyzer.PrefixTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.fst.FST;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.geo.GeoHashUtils;
@ -689,12 +689,12 @@ public class GeolocationContextMapping extends ContextMapping {
public Automaton toAutomaton() {
Automaton automaton;
if(precisions == null || precisions.length == 0) {
automaton = BasicAutomata.makeString(location);
automaton = Automata.makeString(location);
} else {
automaton = BasicAutomata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
automaton = Automata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
for (int i = 1; i < precisions.length; i++) {
final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i])));
automaton = BasicOperations.union(automaton, BasicAutomata.makeString(cell));
automaton = Operations.union(automaton, Automata.makeString(cell));
}
}
return automaton;

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.suggest.phrase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
@ -46,10 +47,10 @@ public final class Correction implements Comparable<Correction> {
}
public BytesRef join(BytesRef separator, BytesRef preTag, BytesRef postTag) {
return join(separator, new BytesRef(), preTag, postTag);
return join(separator, new BytesRefBuilder(), preTag, postTag);
}
public BytesRef join(BytesRef separator, BytesRef result, BytesRef preTag, BytesRef postTag) {
public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) {
BytesRef[] toJoin = new BytesRef[this.candidates.length];
int len = separator.length * this.candidates.length - 1;
for (int i = 0; i < toJoin.length; i++) {
@ -58,7 +59,8 @@ public final class Correction implements Comparable<Correction> {
toJoin[i] = candidate.term;
} else {
final int maxLen = preTag.length + postTag.length + candidate.term.length;
final BytesRef highlighted = new BytesRef(maxLen);// just allocate once
final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once
highlighted.grow(maxLen);
if (i == 0 || candidates[i-1].userInput) {
highlighted.append(preTag);
}
@ -66,13 +68,12 @@ public final class Correction implements Comparable<Correction> {
if (toJoin.length == i + 1 || candidates[i+1].userInput) {
highlighted.append(postTag);
}
toJoin[i] = highlighted;
toJoin[i] = highlighted.get();
}
len += toJoin[i].length;
}
result.offset = 0;
result.grow(len);
return SuggestUtils.joinPreAllocated(separator, result, toJoin);
return SuggestUtils.join(separator, result, toJoin);
}
/** Lower scores sorts first; if scores are equal,

View File

@ -24,17 +24,13 @@ import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.search.suggest.SuggestUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.*;
//TODO public for tests
public final class DirectCandidateGenerator extends CandidateGenerator {
@ -51,8 +47,8 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
private final Analyzer postFilter;
private final double nonErrorLikelihood;
private final boolean useTotalTermFrequency;
private final CharsRef spare = new CharsRef();
private final BytesRef byteSpare = new BytesRef();
private final CharsRefBuilder spare = new CharsRefBuilder();
private final BytesRefBuilder byteSpare = new BytesRefBuilder();
private final int numCandidates;
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
@ -129,11 +125,11 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
return set;
}
protected BytesRef preFilter(final BytesRef term, final CharsRef spare, final BytesRef byteSpare) throws IOException {
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException {
if (preFilter == null) {
return term;
}
final BytesRef result = byteSpare;
final BytesRefBuilder result = byteSpare;
SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() {
@Override
@ -141,25 +137,25 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
this.fillBytesRef(result);
}
}, spare);
return result;
return result.get();
}
protected void postFilter(final Candidate candidate, final CharsRef spare, BytesRef byteSpare, final List<Candidate> candidates) throws IOException {
protected void postFilter(final Candidate candidate, final CharsRefBuilder spare, BytesRefBuilder byteSpare, final List<Candidate> candidates) throws IOException {
if (postFilter == null) {
candidates.add(candidate);
} else {
final BytesRef result = byteSpare;
final BytesRefBuilder result = byteSpare;
SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
@Override
public void nextToken() throws IOException {
this.fillBytesRef(result);
if (posIncAttr.getPositionIncrement() > 0 && result.bytesEquals(candidate.term)) {
BytesRef term = BytesRef.deepCopyOf(result);
if (posIncAttr.getPositionIncrement() > 0 && result.get().bytesEquals(candidate.term)) {
BytesRef term = result.toBytesRef();
long freq = frequency(term);
candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false));
candidates.add(new Candidate(result.toBytesRef(), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false));
} else {
candidates.add(new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false));
candidates.add(new Candidate(result.toBytesRef(), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false));
}
}
}, spare);

View File

@ -46,15 +46,15 @@ public final class LaplaceScorer extends WordScorer {
@Override
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
SuggestUtils.join(separator, spare, w_1.term, word.term);
return (alpha + frequency(spare)) / (alpha + w_1.frequency + vocabluarySize);
return (alpha + frequency(spare.get())) / (alpha + w_1.frequency + vocabluarySize);
}
@Override
protected double scoreTrigram(Candidate word, Candidate w_1, Candidate w_2) throws IOException {
SuggestUtils.join(separator, spare, w_2.term, w_1.term, word.term);
long trigramCount = frequency(spare);
long trigramCount = frequency(spare.get());
SuggestUtils.join(separator, spare, w_1.term, word.term);
return (alpha + trigramCount) / (alpha + frequency(spare) + vocabluarySize);
return (alpha + trigramCount) / (alpha + frequency(spare.get()) + vocabluarySize);
}

Some files were not shown because too many files have changed in this diff Show More