Upgrade to Lucene 4.9 (closes #6623)

This commit is contained in:
Robert Muir 2014-06-26 08:18:59 -04:00
parent b43b56a6a8
commit b55ad98d73
135 changed files with 1025 additions and 2197 deletions

View File

@ -18,8 +18,6 @@ java.util.Collections#sort(java.util.List,java.util.Comparator)
java.io.StringReader#<init>(java.lang.String) @ Use FastStringReader instead
org.apache.lucene.util.RamUsageEstimator#sizeOf(java.lang.Object) @ This can be a perfromance trap
@defaultMessage Reference management is tricky, leave it to SearcherManager
org.apache.lucene.index.IndexReader#decRef()
org.apache.lucene.index.IndexReader#incRef()
@ -55,9 +53,3 @@ java.lang.Math#abs(long)
@defaultMessage Use Long.compare instead we are on Java7
com.google.common.primitives.Longs#compare(long,long)
@defaultMessage we have an optimized XStringField to reduce analysis creation overhead
org.apache.lucene.document.Field#<init>(java.lang.String,java.lang.String,org.apache.lucene.document.FieldType)
@defaultMessage Use XNativeFSLockFactory instead of the buggy NativeFSLockFactory see LUCENE-5738 - remove once Lucene 4.9 is released
org.apache.lucene.store.NativeFSLockFactory

View File

@ -31,7 +31,7 @@
</parent>
<properties>
<lucene.version>4.8.1</lucene.version>
<lucene.version>4.9.0</lucene.version>
<tests.jvms>auto</tests.jvms>
<tests.shuffle>true</tests.shuffle>
<tests.output>onerror</tests.output>

View File

@ -1,62 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.CloseableThreadLocal;
import java.io.IOException;
/**
* A string/text field that optimizes the case for non analyzed fields to reuse a thread local token
* stream (instead of creating it each time). This reduces analysis chain overhead and object creation
* (which is significant, yay Attributes).
* <p/>
* Not to be confused with Lucene StringField, this handles analyzed text as well, and relies on providing
* the FieldType. Couldn't come up with a good name for this that is different from Text/String...
*/
public class XStringField extends Field {
private static final CloseableThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new CloseableThreadLocal<StringTokenStream>() {
@Override
protected StringTokenStream initialValue() {
return new StringTokenStream();
}
};
public XStringField(String name, String value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;
}
// Only use the cached TokenStream if the value is indexed and not-tokenized
if (fieldType().tokenized()) {
return super.tokenStream(analyzer);
}
StringTokenStream nonAnalyzedTokenStream = NOT_ANALYZED_TOKENSTREAM.get();
nonAnalyzedTokenStream.setValue((String) fieldsData);
return nonAnalyzedTokenStream;
}
}

View File

@ -0,0 +1,306 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
/** maps per-segment ordinals to/from global ordinal space */
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
// TODO: use more efficient packed ints structures?
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
public class XOrdinalMap implements Accountable {
static {
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5780, LUCENE-5782)";
}
private static class SegmentMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
private static int[] map(final long[] weights) {
final int[] newToOld = new int[weights.length];
for (int i = 0; i < weights.length; ++i) {
newToOld[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = newToOld[i];
newToOld[i] = newToOld[j];
newToOld[j] = tmp;
}
@Override
protected int compare(int i, int j) {
// j first since we actually want higher weights first
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
}
}.sort(0, weights.length);
return newToOld;
}
/** Inverse the map. */
private static int[] inverse(int[] map) {
final int[] inverse = new int[map.length];
for (int i = 0; i < map.length; ++i) {
inverse[map[i]] = i;
}
return inverse;
}
private final int[] newToOld, oldToNew;
SegmentMap(long[] weights) {
newToOld = map(weights);
oldToNew = inverse(newToOld);
assert Arrays.equals(newToOld, inverse(oldToNew));
}
int newToOld(int segment) {
return newToOld[segment];
}
int oldToNew(int segment) {
return oldToNew[segment];
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
}
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedDocValues} instance as a weight.
* @see #build(Object, TermsEnum[], long[], float)
*/
public static XOrdinalMap build(Object owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedSetDocValues} instance as a weight.
* @see #build(Object, TermsEnum[], long[], float)
*/
public static XOrdinalMap build(Object owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Creates an ordinal map that allows mapping ords to/from a merged
* space from <code>subs</code>.
* @param owner a cache key
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
* not be dense (e.g. can be FilteredTermsEnums}.
* @param weights a weight for each sub. This is ideally correlated with
* the number of unique terms that each sub introduces compared
* to the other subs
* @throws IOException if an I/O error occurred.
*/
public static XOrdinalMap build(Object owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
if (subs.length != weights.length) {
throw new IllegalArgumentException("subs and weights must have the same length");
}
// enums are not sorted, so let's sort to save memory
final SegmentMap segmentMap = new SegmentMap(weights);
return new XOrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
}
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(XOrdinalMap.class);
// cache key of whoever asked for this awful thing
final Object owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
final MonotonicAppendingLongBuffer globalOrdDeltas;
// globalOrd -> first segment container
final AppendingPackedLongBuffer firstSegments;
// for every segment, segmentOrd -> globalOrd
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
final SegmentMap segmentMap;
// ram usage
final long ramBytesUsed;
XOrdinalMap(Object owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
this.segmentMap = segmentMap;
// even though we accept an overhead ratio, we keep these ones with COMPACT
// since they are only used to resolve values given a global ord, which is
// slow anyway
globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long segmentOrds[] = new long[subs.length];
ReaderSlice slices[] = new ReaderSlice[subs.length];
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
for (int i = 0; i < slices.length; i++) {
slices[i] = new ReaderSlice(0, 0, i);
indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
}
MultiTermsEnum mte = new MultiTermsEnum(slices);
mte.reset(indexes);
long globalOrd = 0;
while (mte.next() != null) {
TermsEnumWithSlice matches[] = mte.getMatchArray();
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
for (int i = 0; i < mte.getMatchCount(); i++) {
int segmentIndex = matches[i].index;
long segmentOrd = matches[i].terms.ord();
long delta = globalOrd - segmentOrd;
// We compute the least segment where the term occurs. In case the
// first segment contains most (or better all) values, this will
// help save significant memory
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
// for each per-segment ord, map it back to the global term.
while (segmentOrds[segmentIndex] <= segmentOrd) {
ordDeltaBits[segmentIndex] |= delta;
ordDeltas[segmentIndex].add(delta);
segmentOrds[segmentIndex]++;
}
}
// for each unique term, just mark the first segment index/delta where it occurs
assert firstSegmentIndex < segmentOrds.length;
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
firstSegments.freeze();
globalOrdDeltas.freeze();
for (int i = 0; i < ordDeltas.length; ++i) {
ordDeltas[i].freeze();
}
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
segmentToGlobalOrds = new LongValues[subs.length];
long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed()
+ firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
final MonotonicAppendingLongBuffer deltas = ordDeltas[i];
if (ordDeltaBits[i] == 0L) {
// segment ords perfectly match global ordinals
// likely in case of low cardinalities and large segments
segmentToGlobalOrds[i] = LongValues.IDENTITY;
} else {
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
final long monotonicBits = deltas.ramBytesUsed() * 8;
final long packedBits = bitsRequired * deltas.size();
if (deltas.size() <= Integer.MAX_VALUE
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
assert !it.hasNext();
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + newDeltas.get((int) ord);
}
};
ramBytesUsed += newDeltas.ramBytesUsed();
} else {
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + deltas.get(ord);
}
};
ramBytesUsed += deltas.ramBytesUsed();
}
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
}
}
this.ramBytesUsed = ramBytesUsed;
}
/**
* Given a segment number, return a {@link LongValues} instance that maps
* segment ordinals to global ordinals.
*/
public LongValues getGlobalOrds(int segmentIndex) {
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
}
/**
* Given global ordinal, returns the ordinal of the first segment which contains
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
*/
public long getFirstSegmentOrd(long globalOrd) {
return globalOrd - globalOrdDeltas.get(globalOrd);
}
/**
* Given a global ordinal, returns the index of the first
* segment that contains this term.
*/
public int getFirstSegmentNumber(long globalOrd) {
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
}
/**
* Returns the total number of unique terms in global ord space.
*/
public long getValueCount() {
return globalOrdDeltas.size();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
}

View File

@ -250,8 +250,8 @@ public class XAnalyzingSuggester extends Lookup {
}
/** Returns byte size of the underlying FST. */
public long sizeInBytes() {
return fst == null ? 0 : fst.sizeInBytes();
public long ramBytesUsed() {
return fst == null ? 0 : fst.ramBytesUsed();
}
private static void copyDestTransitions(State from, State to, List<Transition> transitions) {
@ -910,7 +910,7 @@ public class XAnalyzingSuggester extends Lookup {
// TODO: we could walk & add simultaneously, so we
// don't have to alloc [possibly biggish]
// intermediate HashSet in RAM:
return XSpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
}
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {

View File

@ -219,7 +219,7 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
}
Automaton toLevenshteinAutomata(Automaton automaton) {
final Set<IntsRef> ref = XSpecialOperations.getFiniteStrings(automaton, -1);
final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
Automaton subs[] = new Automaton[ref.size()];
int upto = 0;
for (IntsRef path : ref) {

View File

@ -1,200 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search.suggest.analyzing;
import java.util.Collections;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Set;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.State;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.Util;
import org.elasticsearch.Version;
class XSpecialOperations {
// TODO Lucene 4.9: remove this once we upgrade; see
// LUCENE-5628
static {
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Remove this code once we upgrade to Lucene 4.9 where LUCENE-5628 is fixed";
}
private static class PathNode {
/** Which state the path node ends on, whose
* transitions we are enumerating. */
public State state;
/** Which state the current transition leads to. */
public State to;
/** Which transition we are on. */
public int transition;
/** Which label we are on, in the min-max range of the
* current Transition */
public int label;
public void resetState(State state) {
assert state.numTransitions() != 0;
this.state = state;
transition = 0;
Transition t = state.transitionsArray[transition];
label = t.getMin();
to = t.getDest();
}
/** Returns next label of current transition, or
* advances to next transition and returns its first
* label, if current one is exhausted. If there are
* no more transitions, returns -1. */
public int nextLabel() {
if (label > state.transitionsArray[transition].getMax()) {
// We've exhaused the current transition's labels;
// move to next transitions:
transition++;
if (transition >= state.numTransitions()) {
// We're done iterating transitions leaving this state
return -1;
}
Transition t = state.transitionsArray[transition];
label = t.getMin();
to = t.getDest();
}
return label++;
}
}
private static PathNode getNode(PathNode[] nodes, int index) {
assert index < nodes.length;
if (nodes[index] == null) {
nodes[index] = new PathNode();
}
return nodes[index];
}
// TODO: this is a dangerous method ... Automaton could be
// huge ... and it's better in general for caller to
// enumerate & process in a single walk:
/** Returns the set of accepted strings, up to at most
* <code>limit</code> strings. If more than <code>limit</code>
* strings are accepted, the first limit strings found are returned. If <code>limit</code> == -1, then
* the limit is infinite. If the {@link Automaton} has
* cycles then this method might throw {@code
* IllegalArgumentException} but that is not guaranteed
* when the limit is set. */
public static Set<IntsRef> getFiniteStrings(Automaton a, int limit) {
Set<IntsRef> results = new HashSet<>();
if (limit == -1 || limit > 0) {
// OK
} else {
throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit);
}
if (a.getSingleton() != null) {
// Easy case: automaton accepts only 1 string
results.add(Util.toUTF32(a.getSingleton(), new IntsRef()));
} else {
if (a.getInitialState().isAccept()) {
// Special case the empty string, as usual:
results.add(new IntsRef());
}
if (a.getInitialState().numTransitions() > 0 && (limit == -1 || results.size() < limit)) {
// TODO: we could use state numbers here and just
// alloc array, but asking for states array can be
// costly (it's lazily computed):
// Tracks which states are in the current path, for
// cycle detection:
Set<State> pathStates = Collections.newSetFromMap(new IdentityHashMap<State,Boolean>());
// Stack to hold our current state in the
// recursion/iteration:
PathNode[] nodes = new PathNode[4];
pathStates.add(a.getInitialState());
PathNode root = getNode(nodes, 0);
root.resetState(a.getInitialState());
IntsRef string = new IntsRef(1);
string.length = 1;
while (string.length > 0) {
PathNode node = nodes[string.length-1];
// Get next label leaving the current node:
int label = node.nextLabel();
if (label != -1) {
string.ints[string.length-1] = label;
if (node.to.isAccept()) {
// This transition leads to an accept state,
// so we save the current string:
results.add(IntsRef.deepCopyOf(string));
if (results.size() == limit) {
break;
}
}
if (node.to.numTransitions() != 0) {
// Now recurse: the destination of this transition has
// outgoing transitions:
if (pathStates.contains(node.to)) {
throw new IllegalArgumentException("automaton has cycles");
}
pathStates.add(node.to);
// Push node onto stack:
if (nodes.length == string.length) {
PathNode[] newNodes = new PathNode[ArrayUtil.oversize(nodes.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(nodes, 0, newNodes, 0, nodes.length);
nodes = newNodes;
}
getNode(nodes, string.length).resetState(node.to);
string.length++;
string.grow(string.length);
}
} else {
// No more transitions leaving this state,
// pop/return back to previous state:
assert pathStates.contains(node.state);
pathStates.remove(node.state);
string.length--;
}
}
}
}
return results;
}
}

View File

@ -89,25 +89,11 @@ public class BufferedChecksumIndexOutput extends BufferedIndexOutput {
}
}
@Override
public void seek(long pos) throws IOException {
// seek might be called on files, which means that the checksum is not file checksum
// but a checksum of the bytes written to this stream, which is the same for each
// type of file in lucene
super.seek(pos);
delegate.seek(pos);
}
@Override
public long length() throws IOException {
return delegate.length();
}
@Override
public void setLength(long length) throws IOException {
delegate.setLength(length);
}
@Override
public String toString() {
return delegate.toString();

View File

@ -0,0 +1,155 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.store;
import java.io.IOException;
import java.util.zip.CRC32;
/** Base implementation class for buffered {@link IndexOutput}. */
public abstract class BufferedIndexOutput extends IndexOutput {
/** The default buffer size in bytes ({@value #DEFAULT_BUFFER_SIZE}). */
public static final int DEFAULT_BUFFER_SIZE = 16384;
private final int bufferSize;
private final byte[] buffer;
private long bufferStart = 0; // position in file of buffer
private int bufferPosition = 0; // position in buffer
private final CRC32 crc = new CRC32();
/**
* Creates a new {@link BufferedIndexOutput} with the default buffer size
* ({@value #DEFAULT_BUFFER_SIZE} bytes see {@link #DEFAULT_BUFFER_SIZE})
*/
public BufferedIndexOutput() {
this(DEFAULT_BUFFER_SIZE);
}
/**
* Creates a new {@link BufferedIndexOutput} with the given buffer size.
* @param bufferSize the buffer size in bytes used to buffer writes internally.
* @throws IllegalArgumentException if the given buffer size is less or equal to <tt>0</tt>
*/
public BufferedIndexOutput(int bufferSize) {
if (bufferSize <= 0) {
throw new IllegalArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")");
}
this.bufferSize = bufferSize;
buffer = new byte[bufferSize];
}
@Override
public void writeByte(byte b) throws IOException {
if (bufferPosition >= bufferSize)
flush();
buffer[bufferPosition++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
int bytesLeft = bufferSize - bufferPosition;
// is there enough space in the buffer?
if (bytesLeft >= length) {
// we add the data to the end of the buffer
System.arraycopy(b, offset, buffer, bufferPosition, length);
bufferPosition += length;
// if the buffer is full, flush it
if (bufferSize - bufferPosition == 0)
flush();
} else {
// is data larger then buffer?
if (length > bufferSize) {
// we flush the buffer
if (bufferPosition > 0)
flush();
// and write data at once
crc.update(b, offset, length);
flushBuffer(b, offset, length);
bufferStart += length;
} else {
// we fill/flush the buffer (until the input is written)
int pos = 0; // position in the input data
int pieceLength;
while (pos < length) {
pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft;
System.arraycopy(b, pos + offset, buffer, bufferPosition, pieceLength);
pos += pieceLength;
bufferPosition += pieceLength;
// if the buffer is full, flush it
bytesLeft = bufferSize - bufferPosition;
if (bytesLeft == 0) {
flush();
bytesLeft = bufferSize;
}
}
}
}
}
@Override
public void flush() throws IOException {
crc.update(buffer, 0, bufferPosition);
flushBuffer(buffer, bufferPosition);
bufferStart += bufferPosition;
bufferPosition = 0;
}
/** Expert: implements buffer write. Writes bytes at the current position in
* the output.
* @param b the bytes to write
* @param len the number of bytes to write
*/
private void flushBuffer(byte[] b, int len) throws IOException {
flushBuffer(b, 0, len);
}
/** Expert: implements buffer write. Writes bytes at the current position in
* the output.
* @param b the bytes to write
* @param offset the offset in the byte array
* @param len the number of bytes to write
*/
protected abstract void flushBuffer(byte[] b, int offset, int len) throws IOException;
@Override
public void close() throws IOException {
flush();
}
@Override
public long getFilePointer() {
return bufferStart + bufferPosition;
}
@Override
public abstract long length() throws IOException;
/**
* Returns size of the used output buffer in bytes.
* */
public final int getBufferSize() {
return bufferSize;
}
@Override
public long getChecksum() throws IOException {
flush();
return crc.getValue();
}
}

View File

@ -117,12 +117,6 @@ public final class RateLimitedFSDirectory extends FilterDirectory{
return delegate.length();
}
@Override
public void seek(long pos) throws IOException {
flush();
delegate.seek(pos);
}
@Override
public void flush() throws IOException {
try {
@ -132,11 +126,6 @@ public final class RateLimitedFSDirectory extends FilterDirectory{
}
}
@Override
public void setLength(long length) throws IOException {
delegate.setLength(length);
}
@Override
public void close() throws IOException {
try {

View File

@ -1,246 +0,0 @@
package org.apache.lucene.store;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.channels.OverlappingFileLockException;
import java.nio.file.StandardOpenOption;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.Version;
/**
* <p>Implements {@link LockFactory} using native OS file
* locks. Note that because this LockFactory relies on
* java.nio.* APIs for locking, any problems with those APIs
* will cause locking to fail. Specifically, on certain NFS
* environments the java.nio.* locks will fail (the lock can
* incorrectly be double acquired) whereas {@link
* SimpleFSLockFactory} worked perfectly in those same
* environments. For NFS based access to an index, it's
* recommended that you try {@link SimpleFSLockFactory}
* first and work around the one limitation that a lock file
* could be left when the JVM exits abnormally.</p>
*
* <p>The primary benefit of {@link XNativeFSLockFactory} is
* that locks (not the lock file itsself) will be properly
* removed (by the OS) if the JVM has an abnormal exit.</p>
*
* <p>Note that, unlike {@link SimpleFSLockFactory}, the existence of
* leftover lock files in the filesystem is fine because the OS
* will free the locks held against these files even though the
* files still remain. Lucene will never actively remove the lock
* files, so although you see them, the index may not be locked.</p>
*
* <p>Special care needs to be taken if you change the locking
* implementation: First be certain that no writer is in fact
* writing to the index otherwise you can easily corrupt
* your index. Be sure to do the LockFactory change on all Lucene
* instances and clean up all leftover lock files before starting
* the new configuration for the first time. Different implementations
* can not work together!</p>
*
* <p>If you suspect that this or any other LockFactory is
* not working properly in your environment, you can easily
* test it by using {@link VerifyingLockFactory}, {@link
* LockVerifyServer} and {@link LockStressTest}.</p>
*
* @see LockFactory
*/
public class XNativeFSLockFactory extends FSLockFactory {
static {
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48 : "Remove this class in Lucene 4.9";
}
/**
* Create a XNativeFSLockFactory instance, with null (unset)
* lock directory. When you pass this factory to a {@link FSDirectory}
* subclass, the lock directory is automatically set to the
* directory itself. Be sure to create one instance for each directory
* your create!
*/
public XNativeFSLockFactory() {
this((File) null);
}
/**
* Create a XNativeFSLockFactory instance, storing lock
* files into the specified lockDirName:
*
* @param lockDirName where lock files are created.
*/
public XNativeFSLockFactory(String lockDirName) {
this(new File(lockDirName));
}
/**
* Create a XNativeFSLockFactory instance, storing lock
* files into the specified lockDir:
*
* @param lockDir where lock files are created.
*/
public XNativeFSLockFactory(File lockDir) {
setLockDir(lockDir);
}
@Override
public synchronized Lock makeLock(String lockName) {
if (lockPrefix != null)
lockName = lockPrefix + "-" + lockName;
return new NativeFSLock(lockDir, lockName);
}
@Override
public void clearLock(String lockName) throws IOException {
makeLock(lockName).close();
}
}
class NativeFSLock extends Lock {
private FileChannel channel;
private FileLock lock;
private File path;
private File lockDir;
private static final Set<String> LOCK_HELD = Collections.synchronizedSet(new HashSet<String>());
public NativeFSLock(File lockDir, String lockFileName) {
this.lockDir = lockDir;
path = new File(lockDir, lockFileName);
}
@Override
public synchronized boolean obtain() throws IOException {
if (lock != null) {
// Our instance is already locked:
return false;
}
// Ensure that lockDir exists and is a directory.
if (!lockDir.exists()) {
if (!lockDir.mkdirs())
throw new IOException("Cannot create directory: " +
lockDir.getAbsolutePath());
} else if (!lockDir.isDirectory()) {
// TODO: NoSuchDirectoryException instead?
throw new IOException("Found regular file where directory expected: " +
lockDir.getAbsolutePath());
}
final String canonicalPath = path.getCanonicalPath();
// Make sure nobody else in-process has this lock held
// already, and, mark it held if not:
// This is a pretty crazy workaround for some documented
// but yet awkward JVM behavior:
//
// On some systems, closing a channel releases all locks held by the Java virtual machine on the underlying file
// regardless of whether the locks were acquired via that channel or via another channel open on the same file.
// It is strongly recommended that, within a program, a unique channel be used to acquire all locks on any given
// file.
//
// This essentially means if we close "A" channel for a given file all locks might be released... the odd part
// is that we can't re-obtain the lock in the same JVM but from a different process if that happens. Nevertheless
// this is super trappy. See LUCENE-5738
boolean obtained = false;
if (LOCK_HELD.add(canonicalPath)) {
try {
channel = FileChannel.open(path.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
try {
lock = channel.tryLock();
obtained = lock != null;
} catch (IOException | OverlappingFileLockException e) {
// At least on OS X, we will sometimes get an
// intermittent "Permission Denied" IOException,
// which seems to simply mean "you failed to get
// the lock". But other IOExceptions could be
// "permanent" (eg, locking is not supported via
// the filesystem). So, we record the failure
// reason here; the timeout obtain (usually the
// one calling us) will use this as "root cause"
// if it fails to get the lock.
failureReason = e;
}
} finally {
if (obtained == false) { // not successful - clear up and move out
clearLockHeld(path);
final FileChannel toClose = channel;
channel = null;
IOUtils.closeWhileHandlingException(toClose);
}
}
}
return obtained;
}
@Override
public synchronized void close() throws IOException {
try {
if (lock != null) {
try {
lock.release();
lock = null;
} finally {
clearLockHeld(path);
}
}
} finally {
IOUtils.close(channel);
channel = null;
}
}
private static final void clearLockHeld(File path) throws IOException {
boolean remove = LOCK_HELD.remove(path.getCanonicalPath());
assert remove : "Lock was cleared but never marked as held";
}
@Override
public synchronized boolean isLocked() {
// The test for is isLocked is not directly possible with native file locks:
// First a shortcut, if a lock reference in this instance is available
if (lock != null) return true;
// Look if lock file is present; if not, there can definitely be no lock!
if (!path.exists()) return false;
// Try to obtain and release (if was locked) the lock
try {
boolean obtained = obtain();
if (obtained) close();
return !obtained;
} catch (IOException ioe) {
return false;
}
}
@Override
public String toString() {
return "NativeFSLock@" + path;
}
}

View File

@ -42,152 +42,152 @@ public class Version implements Serializable {
// the (internal) format of the id is there so we can easily do after/before checks on the id
public static final int V_0_18_0_ID = /*00*/180099;
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_1_ID = /*00*/180199;
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_2_ID = /*00*/180299;
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_3_ID = /*00*/180399;
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_4_ID = /*00*/180499;
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_5_ID = /*00*/180599;
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_6_ID = /*00*/180699;
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_7_ID = /*00*/180799;
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_18_8_ID = /*00*/180899;
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_0_RC1_ID = /*00*/190051;
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_0_RC2_ID = /*00*/190052;
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_0_RC3_ID = /*00*/190053;
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_0_ID = /*00*/190099;
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_1_ID = /*00*/190199;
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_2_ID = /*00*/190299;
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_3_ID = /*00*/190399;
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_4_ID = /*00*/190499;
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_5_ID = /*00*/190599;
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_6_ID = /*00*/190699;
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_7_ID = /*00*/190799;
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_8_ID = /*00*/190899;
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_9_ID = /*00*/190999;
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_10_ID = /*00*/191099;
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_11_ID = /*00*/191199;
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_12_ID = /*00*/191299;
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_19_13_ID = /*00*/191399;
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_0_RC1_ID = /*00*/200051;
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_0_ID = /*00*/200099;
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_1_ID = /*00*/200199;
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_2_ID = /*00*/200299;
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_3_ID = /*00*/200399;
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_4_ID = /*00*/200499;
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_5_ID = /*00*/200599;
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_6_ID = /*00*/200699;
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_20_7_ID = /*00*/200799;
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
public static final int V_0_90_0_Beta1_ID = /*00*/900001;
public static final Version V_0_90_0_Beta1 = new Version(V_0_90_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_41);
public static final Version V_0_90_0_Beta1 = new Version(V_0_90_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_4_1);
public static final int V_0_90_0_RC1_ID = /*00*/900051;
public static final Version V_0_90_0_RC1 = new Version(V_0_90_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_41);
public static final Version V_0_90_0_RC1 = new Version(V_0_90_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_4_1);
public static final int V_0_90_0_RC2_ID = /*00*/900052;
public static final Version V_0_90_0_RC2 = new Version(V_0_90_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_42);
public static final Version V_0_90_0_RC2 = new Version(V_0_90_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_4_2);
public static final int V_0_90_0_ID = /*00*/900099;
public static final Version V_0_90_0 = new Version(V_0_90_0_ID, false, org.apache.lucene.util.Version.LUCENE_42);
public static final Version V_0_90_0 = new Version(V_0_90_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_2);
public static final int V_0_90_1_ID = /*00*/900199;
public static final Version V_0_90_1 = new Version(V_0_90_1_ID, false, org.apache.lucene.util.Version.LUCENE_43);
public static final Version V_0_90_1 = new Version(V_0_90_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_3);
public static final int V_0_90_2_ID = /*00*/900299;
public static final Version V_0_90_2 = new Version(V_0_90_2_ID, false, org.apache.lucene.util.Version.LUCENE_43);
public static final Version V_0_90_2 = new Version(V_0_90_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_3);
public static final int V_0_90_3_ID = /*00*/900399;
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_44);
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_4);
public static final int V_0_90_4_ID = /*00*/900499;
public static final Version V_0_90_4 = new Version(V_0_90_4_ID, false, org.apache.lucene.util.Version.LUCENE_44);
public static final Version V_0_90_4 = new Version(V_0_90_4_ID, false, org.apache.lucene.util.Version.LUCENE_4_4);
public static final int V_0_90_5_ID = /*00*/900599;
public static final Version V_0_90_5 = new Version(V_0_90_5_ID, false, org.apache.lucene.util.Version.LUCENE_44);
public static final Version V_0_90_5 = new Version(V_0_90_5_ID, false, org.apache.lucene.util.Version.LUCENE_4_4);
public static final int V_0_90_6_ID = /*00*/900699;
public static final Version V_0_90_6 = new Version(V_0_90_6_ID, false, org.apache.lucene.util.Version.LUCENE_45);
public static final Version V_0_90_6 = new Version(V_0_90_6_ID, false, org.apache.lucene.util.Version.LUCENE_4_5);
public static final int V_0_90_7_ID = /*00*/900799;
public static final Version V_0_90_7 = new Version(V_0_90_7_ID, false, org.apache.lucene.util.Version.LUCENE_45);
public static final Version V_0_90_7 = new Version(V_0_90_7_ID, false, org.apache.lucene.util.Version.LUCENE_4_5);
public static final int V_0_90_8_ID = /*00*/900899;
public static final Version V_0_90_8 = new Version(V_0_90_8_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_0_90_8 = new Version(V_0_90_8_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_0_90_9_ID = /*00*/900999;
public static final Version V_0_90_9 = new Version(V_0_90_9_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_0_90_9 = new Version(V_0_90_9_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_0_90_10_ID = /*00*/901099;
public static final Version V_0_90_10 = new Version(V_0_90_10_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_0_90_10 = new Version(V_0_90_10_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_0_90_11_ID = /*00*/901199;
public static final Version V_0_90_11 = new Version(V_0_90_11_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_0_90_11 = new Version(V_0_90_11_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_0_90_12_ID = /*00*/901299;
public static final Version V_0_90_12 = new Version(V_0_90_12_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_0_90_12 = new Version(V_0_90_12_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_0_90_13_ID = /*00*/901399;
public static final Version V_0_90_13 = new Version(V_0_90_13_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_0_90_13 = new Version(V_0_90_13_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_0_90_14_ID = /*00*/901499;
public static final Version V_0_90_14 = new Version(V_0_90_14_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_0_90_14 = new Version(V_0_90_14_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_0_Beta1_ID = /*00*/1000001;
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_45);
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_4_5);
public static final int V_1_0_0_Beta2_ID = /*00*/1000002;
public static final Version V_1_0_0_Beta2 = new Version(V_1_0_0_Beta2_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_0_Beta2 = new Version(V_1_0_0_Beta2_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_0_RC1_ID = /*00*/1000051;
public static final Version V_1_0_0_RC1 = new Version(V_1_0_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_0_RC1 = new Version(V_1_0_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_0_RC2_ID = /*00*/1000052;
public static final Version V_1_0_0_RC2 = new Version(V_1_0_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_0_RC2 = new Version(V_1_0_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_0_ID = /*00*/1000099;
public static final Version V_1_0_0 = new Version(V_1_0_0_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_0 = new Version(V_1_0_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_1_ID = /*00*/1000199;
public static final Version V_1_0_1 = new Version(V_1_0_1_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_1 = new Version(V_1_0_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_2_ID = /*00*/1000299;
public static final Version V_1_0_2 = new Version(V_1_0_2_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_2 = new Version(V_1_0_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_3_ID = /*00*/1000399;
public static final Version V_1_0_3 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_3 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_0_4_ID = /*00*/1000499;
public static final Version V_1_0_4 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_46);
public static final Version V_1_0_4 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
public static final int V_1_1_0_ID = /*00*/1010099;
public static final Version V_1_1_0 = new Version(V_1_1_0_ID, false, org.apache.lucene.util.Version.LUCENE_47);
public static final Version V_1_1_0 = new Version(V_1_1_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_7);
public static final int V_1_1_1_ID = /*00*/1010199;
public static final Version V_1_1_1 = new Version(V_1_1_1_ID, false, org.apache.lucene.util.Version.LUCENE_47);
public static final Version V_1_1_1 = new Version(V_1_1_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_7);
public static final int V_1_1_2_ID = /*00*/1010299;
public static final Version V_1_1_2 = new Version(V_1_1_2_ID, false, org.apache.lucene.util.Version.LUCENE_47);
public static final Version V_1_1_2 = new Version(V_1_1_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_7);
public static final int V_1_2_0_ID = /*00*/1020099;
public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_48);
public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_8);
public static final int V_1_2_1_ID = /*00*/1020199;
public static final Version V_1_2_1 = new Version(V_1_2_1_ID, false, org.apache.lucene.util.Version.LUCENE_48);
public static final Version V_1_2_1 = new Version(V_1_2_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_8);
public static final int V_1_2_2_ID = /*00*/1020299;
public static final Version V_1_2_2 = new Version(V_1_2_2_ID, false, org.apache.lucene.util.Version.LUCENE_48);
public static final Version V_1_2_2 = new Version(V_1_2_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_8);
public static final int V_1_3_0_ID = /*00*/1030099;
public static final Version V_1_3_0 = new Version(V_1_3_0_ID, false, org.apache.lucene.util.Version.LUCENE_48);
public static final Version V_1_3_0 = new Version(V_1_3_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_9);
public static final int V_2_0_0_ID = /*00*/2000099;
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_48);
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_9);
public static final Version CURRENT = V_2_0_0;

View File

@ -21,6 +21,7 @@ package org.elasticsearch.common.compress.lzf;
import com.ning.compress.lzf.ChunkDecoder;
import com.ning.compress.lzf.LZFChunk;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.IndexInput;
import org.elasticsearch.common.compress.CompressedIndexInput;
import org.elasticsearch.common.lucene.store.InputStreamIndexInput;
@ -71,4 +72,9 @@ public class LZFCompressedIndexInput extends CompressedIndexInput<LZFCompressorC
cloned.inputBuffer = new byte[LZFChunk.MAX_CHUNK_LEN];
return cloned;
}
@Override
public IndexInput slice(String description, long offset, long length) throws IOException {
return BufferedIndexInput.wrap(description, this, offset, length);
}
}

View File

@ -45,7 +45,7 @@ import java.io.IOException;
*/
public class Lucene {
public static final Version VERSION = Version.LUCENE_48;
public static final Version VERSION = Version.LUCENE_4_9;
public static final Version ANALYZER_VERSION = VERSION;
public static final Version QUERYPARSER_VERSION = VERSION;
@ -63,56 +63,28 @@ public class Lucene {
if (version == null) {
return defaultVersion;
}
if ("4.8".equals(version)) {
return VERSION.LUCENE_48;
switch(version) {
case "4.9": return VERSION.LUCENE_4_9;
case "4.8": return VERSION.LUCENE_4_8;
case "4.7": return VERSION.LUCENE_4_7;
case "4.6": return VERSION.LUCENE_4_6;
case "4.5": return VERSION.LUCENE_4_5;
case "4.4": return VERSION.LUCENE_4_4;
case "4.3": return VERSION.LUCENE_4_3;
case "4.2": return VERSION.LUCENE_4_2;
case "4.1": return VERSION.LUCENE_4_1;
case "4.0": return VERSION.LUCENE_4_0;
case "3.6": return VERSION.LUCENE_3_6;
case "3.5": return VERSION.LUCENE_3_5;
case "3.4": return VERSION.LUCENE_3_4;
case "3.3": return VERSION.LUCENE_3_3;
case "3.2": return VERSION.LUCENE_3_2;
case "3.1": return VERSION.LUCENE_3_1;
case "3.0": return VERSION.LUCENE_3_0;
default:
logger.warn("no version match {}, default to {}", version, defaultVersion);
return defaultVersion;
}
if ("4.7".equals(version)) {
return VERSION.LUCENE_47;
}
if ("4.6".equals(version)) {
return VERSION.LUCENE_46;
}
if ("4.5".equals(version)) {
return VERSION.LUCENE_45;
}
if ("4.4".equals(version)) {
return VERSION.LUCENE_44;
}
if ("4.3".equals(version)) {
return Version.LUCENE_43;
}
if ("4.2".equals(version)) {
return Version.LUCENE_42;
}
if ("4.1".equals(version)) {
return Version.LUCENE_41;
}
if ("4.0".equals(version)) {
return Version.LUCENE_40;
}
if ("3.6".equals(version)) {
return Version.LUCENE_36;
}
if ("3.5".equals(version)) {
return Version.LUCENE_35;
}
if ("3.4".equals(version)) {
return Version.LUCENE_34;
}
if ("3.3".equals(version)) {
return Version.LUCENE_33;
}
if ("3.2".equals(version)) {
return Version.LUCENE_32;
}
if ("3.1".equals(version)) {
return Version.LUCENE_31;
}
if ("3.0".equals(version)) {
return Version.LUCENE_30;
}
logger.warn("no version match {}, default to {}", version, defaultVersion);
return defaultVersion;
}
/**

View File

@ -21,7 +21,6 @@ package org.elasticsearch.common.lucene;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.Nullable;
@ -45,17 +44,9 @@ public class SegmentReaderUtils {
return internalSegmentReader(reader, false);
}
static {
assert Version.LUCENE_48.onOrAfter(Lucene.VERSION) : "Use AtomicReader.addCoreClosedListener instead of trying to unwrap the atomic reader: https://issues.apache.org/jira/browse/LUCENE-5701";
}
public static boolean registerCoreListener(AtomicReader reader, SegmentReader.CoreClosedListener listener) {
SegmentReader segReader = SegmentReaderUtils.segmentReaderOrNull(reader);
if (segReader != null) {
segReader.addCoreClosedListener(listener);
return true;
}
return false;
reader.addCoreClosedListener(listener);
return true;
}
private static SegmentReader internalSegmentReader(AtomicReader reader, boolean fail) {

View File

@ -25,7 +25,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import java.io.IOException;
import java.io.Reader;
@ -63,15 +62,14 @@ public class AllField extends Field {
return allEntries;
}
static {
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Re-use the incoming AllTokenStream once we upgrade to Lucene 4.9";
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
try {
allEntries.reset(); // reset the all entries, just in case it was read already
if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// TODO: we should be able to reuse "previous" if its instanceof AllTokenStream?
// but we need to be careful this optimization is safe (and tested)...
// AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
// positions are indexed:
return AllTokenStream.allTokenStream(name, allEntries, analyzer);

View File

@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
@ -44,6 +45,11 @@ public class AllDocIdSet extends DocIdSet {
return true;
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_INT;
}
@Override
public DocIdSetIterator iterator() throws IOException {
return new Iterator(maxDoc);

View File

@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.ArrayList;
@ -48,6 +49,15 @@ public class AndDocIdSet extends DocIdSet {
return true;
}
@Override
public long ramBytesUsed() {
long ramBytesUsed = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
for (DocIdSet set : sets) {
ramBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF + set.ramBytesUsed();
}
return ramBytesUsed;
}
@Override
public Bits bits() throws IOException {
Bits[] bits = new Bits[sets.length];

View File

@ -24,6 +24,7 @@ import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.Nullable;
import java.io.IOException;
@ -32,19 +33,18 @@ import java.io.IOException;
*/
public class DocIdSets {
/**
* Return the size of the doc id set, plus a reference to it.
*/
public static long sizeInBytes(DocIdSet docIdSet) {
if (docIdSet instanceof FixedBitSet) {
return ((FixedBitSet) docIdSet).getBits().length * 8 + 16;
}
// only for empty ones and unknowns...
return 1;
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + docIdSet.ramBytesUsed();
}
/**
* Is it an empty {@link DocIdSet}?
*/
public static boolean isEmpty(@Nullable DocIdSet set) {
return set == null || set == EMPTY_DOCIDSET;
return set == null || set == DocIdSet.EMPTY;
}
/**
@ -63,16 +63,16 @@ public class DocIdSets {
* always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
*/
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
if (set == null || set == EMPTY_DOCIDSET) {
return EMPTY_DOCIDSET;
if (set == null || set == DocIdSet.EMPTY) {
return DocIdSet.EMPTY;
}
DocIdSetIterator it = set.iterator();
if (it == null) {
return EMPTY_DOCIDSET;
return DocIdSet.EMPTY;
}
int doc = it.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return EMPTY_DOCIDSET;
return DocIdSet.EMPTY;
}
if (set instanceof FixedBitSet) {
return set;
@ -85,26 +85,6 @@ public class DocIdSets {
} while (doc != DocIdSetIterator.NO_MORE_DOCS);
return fixedBitSet;
}
/** An empty {@code DocIdSet} instance */
protected static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return DocIdSetIterator.empty();
}
@Override
public boolean isCacheable() {
return true;
}
// we explicitly provide no random access, as this filter is 100% sparse and iterator exits faster
@Override
public Bits bits() {
return null;
}
};
/**
* Gets a set to bits.

View File

@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
@ -43,6 +44,11 @@ public class NotDocIdSet extends DocIdSet {
return set.isCacheable();
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + set.ramBytesUsed();
}
@Override
public Bits bits() throws IOException {
Bits bits = set.bits();

View File

@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
@ -46,6 +47,15 @@ public class OrDocIdSet extends DocIdSet {
return true;
}
@Override
public long ramBytesUsed() {
long ramBytesUsed = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
for (DocIdSet set : sets) {
ramBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF + set.ramBytesUsed();
}
return ramBytesUsed;
}
@Override
public Bits bits() throws IOException {
Bits[] bits = new Bits[sets.length];

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.lucene.docset.DocIdSets;
import java.io.IOException;
@ -90,6 +91,11 @@ public class ApplyAcceptedDocsFilter extends Filter {
return innerSet.isCacheable();
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + innerSet.ramBytesUsed();
}
@Override
public Bits bits() throws IOException {
Bits bits = innerSet.bits();
@ -202,5 +208,10 @@ public class ApplyAcceptedDocsFilter extends Filter {
public boolean isCacheable() {
return delegate.isCacheable();
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + delegate.ramBytesUsed();
}
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.common.lucene.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
@ -49,14 +50,14 @@ public class MoreLikeThisQuery extends Query {
private String[] moreLikeFields;
private Analyzer analyzer;
private float percentTermsToMatch = DEFAULT_PERCENT_TERMS_TO_MATCH;
private int minTermFrequency = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
private int maxQueryTerms = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
private Set<?> stopWords = XMoreLikeThis.DEFAULT_STOP_WORDS;
private int minDocFreq = XMoreLikeThis.DEFAULT_MIN_DOC_FREQ;
private int maxDocFreq = XMoreLikeThis.DEFAULT_MAX_DOC_FREQ;
private int minWordLen = XMoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
private int maxWordLen = XMoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
private boolean boostTerms = XMoreLikeThis.DEFAULT_BOOST;
private int minTermFrequency = MoreLikeThis.DEFAULT_MIN_TERM_FREQ;
private int maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
private Set<?> stopWords = MoreLikeThis.DEFAULT_STOP_WORDS;
private int minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ;
private int maxDocFreq = MoreLikeThis.DEFAULT_MAX_DOC_FREQ;
private int minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
private int maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
private boolean boostTerms = MoreLikeThis.DEFAULT_BOOST;
private float boostTermsFactor = 1;
@ -134,7 +135,7 @@ public class MoreLikeThisQuery extends Query {
@Override
public Query rewrite(IndexReader reader) throws IOException {
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
MoreLikeThis mlt = new MoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);

View File

@ -1,964 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/**
* Copyright 2004-2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.elasticsearch.common.lucene.search;
import java.io.*;
import java.util.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.Version;
import org.elasticsearch.common.io.FastStringReader;
/**
* Generate "more like this" similarity queries.
* Based on this mail:
* <code><pre>
* Lucene does let you access the document frequency of terms, with IndexReader.docFreq().
* Term frequencies can be computed by re-tokenizing the text, which, for a single document,
* is usually fast enough. But looking up the docFreq() of every term in the document is
* probably too slow.
* <p/>
* You can use some heuristics to prune the set of terms, to avoid calling docFreq() too much,
* or at all. Since you're trying to maximize a tf*idf score, you're probably most interested
* in terms with a high tf. Choosing a tf threshold even as low as two or three will radically
* reduce the number of terms under consideration. Another heuristic is that terms with a
* high idf (i.e., a low df) tend to be longer. So you could threshold the terms by the
* number of characters, not selecting anything less than, e.g., six or seven characters.
* With these sorts of heuristics you can usually find small set of, e.g., ten or fewer terms
* that do a pretty good job of characterizing a document.
* <p/>
* It all depends on what you're trying to do. If you're trying to eek out that last percent
* of precision and recall regardless of computational difficulty so that you can win a TREC
* competition, then the techniques I mention above are useless. But if you're trying to
* provide a "more like this" button on a search results page that does a decent job and has
* good performance, such techniques might be useful.
* <p/>
* An efficient, effective "more-like-this" query generator would be a great contribution, if
* anyone's interested. I'd imagine that it would take a Reader or a String (the document's
* text), analyzer Analyzer, and return a set of representative terms using heuristics like those
* above. The frequency and length thresholds could be parameters, etc.
* <p/>
* Doug
* </pre></code>
* <p/>
* <p/>
* <p/>
* <h3>Initial Usage</h3>
* <p/>
* This class has lots of options to try to make it efficient and flexible.
* The simplest possible usage is as follows. The bold
* fragment is specific to this class.
* <p/>
* <pre class="prettyprint">
* <p/>
* IndexReader ir = ...
* IndexSearcher is = ...
* <p/>
* MoreLikeThis mlt = new MoreLikeThis(ir);
* Reader target = ... // orig source of doc you want to find similarities to
* Query query = mlt.like( target);
* <p/>
* Hits hits = is.search(query);
* // now the usual iteration thru 'hits' - the only thing to watch for is to make sure
* //you ignore the doc if it matches your 'target' document, as it should be similar to itself
* <p/>
* </pre>
* <p/>
* Thus you:
* <ol>
* <li> do your normal, Lucene setup for searching,
* <li> create a MoreLikeThis,
* <li> get the text of the doc you want to find similarities to
* <li> then call one of the like() calls to generate a similarity query
* <li> call the searcher to find the similar docs
* </ol>
* <p/>
* <h3>More Advanced Usage</h3>
* <p/>
* You may want to use {@link #setFieldNames setFieldNames(...)} so you can examine
* multiple fields (e.g. body and title) for similarity.
* <p/>
* <p/>
* Depending on the size of your index and the size and makeup of your documents you
* may want to call the other set methods to control how the similarity queries are
* generated:
* <ul>
* <li> {@link #setMinTermFreq setMinTermFreq(...)}
* <li> {@link #setMinDocFreq setMinDocFreq(...)}
* <li> {@link #setMaxDocFreq setMaxDocFreq(...)}
* <li> {@link #setMaxDocFreqPct setMaxDocFreqPct(...)}
* <li> {@link #setMinWordLen setMinWordLen(...)}
* <li> {@link #setMaxWordLen setMaxWordLen(...)}
* <li> {@link #setMaxQueryTerms setMaxQueryTerms(...)}
* <li> {@link #setMaxNumTokensParsed setMaxNumTokensParsed(...)}
* <li> {@link #setStopWords setStopWord(...)}
* </ul>
* <p/>
* <hr>
* <pre>
* Changes: Mark Harwood 29/02/04
* Some bugfixing, some refactoring, some optimisation.
* - bugfix: retrieveTerms(int docNum) was not working for indexes without a termvector -added missing code
* - bugfix: No significant terms being created for fields with a termvector - because
* was only counting one occurrence per term/field pair in calculations(ie not including frequency info from TermVector)
* - refactor: moved common code into isNoiseWord()
* - optimise: when no termvector support available - used maxNumTermsParsed to limit amount of tokenization
* </pre>
*/
public final class XMoreLikeThis {
static {
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Remove this class once we upgrade to Lucene 4.9";
}
/**
* Default maximum number of tokens to parse in each example doc field that is not stored with TermVector support.
*
* @see #getMaxNumTokensParsed
*/
public static final int DEFAULT_MAX_NUM_TOKENS_PARSED = 5000;
/**
* Ignore terms with less than this frequency in the source doc.
*
* @see #getMinTermFreq
* @see #setMinTermFreq
*/
public static final int DEFAULT_MIN_TERM_FREQ = 2;
/**
* Ignore words which do not occur in at least this many docs.
*
* @see #getMinDocFreq
* @see #setMinDocFreq
*/
public static final int DEFAULT_MIN_DOC_FREQ = 5;
/**
* Ignore words which occur in more than this many docs.
*
* @see #getMaxDocFreq
* @see #setMaxDocFreq
* @see #setMaxDocFreqPct
*/
public static final int DEFAULT_MAX_DOC_FREQ = Integer.MAX_VALUE;
/**
* Boost terms in query based on score.
*
* @see #isBoost
* @see #setBoost
*/
public static final boolean DEFAULT_BOOST = false;
/**
* Default field names. Null is used to specify that the field names should be looked
* up at runtime from the provided reader.
*/
public static final String[] DEFAULT_FIELD_NAMES = new String[]{"contents"};
/**
* Ignore words less than this length or if 0 then this has no effect.
*
* @see #getMinWordLen
* @see #setMinWordLen
*/
public static final int DEFAULT_MIN_WORD_LENGTH = 0;
/**
* Ignore words greater than this length or if 0 then this has no effect.
*
* @see #getMaxWordLen
* @see #setMaxWordLen
*/
public static final int DEFAULT_MAX_WORD_LENGTH = 0;
/**
* Default set of stopwords.
* If null means to allow stop words.
*
* @see #setStopWords
* @see #getStopWords
*/
public static final Set<?> DEFAULT_STOP_WORDS = null;
/**
* Current set of stop words.
*/
private Set<?> stopWords = DEFAULT_STOP_WORDS;
/**
* Return a Query with no more than this many terms.
*
* @see BooleanQuery#getMaxClauseCount
* @see #getMaxQueryTerms
* @see #setMaxQueryTerms
*/
public static final int DEFAULT_MAX_QUERY_TERMS = 25;
/**
* Analyzer that will be used to parse the doc.
*/
private Analyzer analyzer = null;
/**
* Ignore words less frequent that this.
*/
private int minTermFreq = DEFAULT_MIN_TERM_FREQ;
/**
* Ignore words which do not occur in at least this many docs.
*/
private int minDocFreq = DEFAULT_MIN_DOC_FREQ;
/**
* Ignore words which occur in more than this many docs.
*/
private int maxDocFreq = DEFAULT_MAX_DOC_FREQ;
/**
* Should we apply a boost to the Query based on the scores?
*/
private boolean boost = DEFAULT_BOOST;
/**
* Field name we'll analyze.
*/
private String[] fieldNames = DEFAULT_FIELD_NAMES;
/**
* The maximum number of tokens to parse in each example doc field that is not stored with TermVector support
*/
private int maxNumTokensParsed = DEFAULT_MAX_NUM_TOKENS_PARSED;
/**
* Ignore words if less than this len.
*/
private int minWordLen = DEFAULT_MIN_WORD_LENGTH;
/**
* Ignore words if greater than this len.
*/
private int maxWordLen = DEFAULT_MAX_WORD_LENGTH;
/**
* Don't return a query longer than this.
*/
private int maxQueryTerms = DEFAULT_MAX_QUERY_TERMS;
/**
* For idf() calculations.
*/
private TFIDFSimilarity similarity;// = new DefaultSimilarity();
/**
* IndexReader to use
*/
private final IndexReader ir;
/**
* Boost factor to use when boosting the terms
*/
private float boostFactor = 1;
/**
* Returns the boost factor used when boosting terms
*
* @return the boost factor used when boosting terms
* @see #setBoostFactor(float)
*/
public float getBoostFactor() {
return boostFactor;
}
/**
* Sets the boost factor to use when boosting terms
*
* @see #getBoostFactor()
*/
public void setBoostFactor(float boostFactor) {
this.boostFactor = boostFactor;
}
/**
* Constructor requiring an IndexReader.
*/
public XMoreLikeThis(IndexReader ir) {
this(ir, new DefaultSimilarity());
}
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
this.ir = ir;
this.similarity = sim;
}
public TFIDFSimilarity getSimilarity() {
return similarity;
}
public void setSimilarity(TFIDFSimilarity similarity) {
this.similarity = similarity;
}
/**
* Returns an analyzer that will be used to parse source doc with. The default analyzer
* is not set.
*
* @return the analyzer that will be used to parse source doc with.
*/
public Analyzer getAnalyzer() {
return analyzer;
}
/**
* Sets the analyzer to use. An analyzer is not required for generating a query with the
* {@link #like(int)} method, all other 'like' methods require an analyzer.
*
* @param analyzer the analyzer to use to tokenize text.
*/
public void setAnalyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}
/**
* Returns the frequency below which terms will be ignored in the source doc. The default
* frequency is the {@link #DEFAULT_MIN_TERM_FREQ}.
*
* @return the frequency below which terms will be ignored in the source doc.
*/
public int getMinTermFreq() {
return minTermFreq;
}
/**
* Sets the frequency below which terms will be ignored in the source doc.
*
* @param minTermFreq the frequency below which terms will be ignored in the source doc.
*/
public void setMinTermFreq(int minTermFreq) {
this.minTermFreq = minTermFreq;
}
/**
* Returns the frequency at which words will be ignored which do not occur in at least this
* many docs. The default frequency is {@link #DEFAULT_MIN_DOC_FREQ}.
*
* @return the frequency at which words will be ignored which do not occur in at least this
* many docs.
*/
public int getMinDocFreq() {
return minDocFreq;
}
/**
* Sets the frequency at which words will be ignored which do not occur in at least this
* many docs.
*
* @param minDocFreq the frequency at which words will be ignored which do not occur in at
* least this many docs.
*/
public void setMinDocFreq(int minDocFreq) {
this.minDocFreq = minDocFreq;
}
/**
* Returns the maximum frequency in which words may still appear.
* Words that appear in more than this many docs will be ignored. The default frequency is
* {@link #DEFAULT_MAX_DOC_FREQ}.
*
* @return get the maximum frequency at which words are still allowed,
* words which occur in more docs than this are ignored.
*/
public int getMaxDocFreq() {
return maxDocFreq;
}
/**
* Set the maximum frequency in which words may still appear. Words that appear
* in more than this many docs will be ignored.
*
* @param maxFreq the maximum count of documents that a term may appear
* in to be still considered relevant
*/
public void setMaxDocFreq(int maxFreq) {
this.maxDocFreq = maxFreq;
}
/**
* Set the maximum percentage in which words may still appear. Words that appear
* in more than this many percent of all docs will be ignored.
*
* @param maxPercentage the maximum percentage of documents (0-100) that a term may appear
* in to be still considered relevant
*/
public void setMaxDocFreqPct(int maxPercentage) {
this.maxDocFreq = maxPercentage * ir.numDocs() / 100;
}
/**
* Returns whether to boost terms in query based on "score" or not. The default is
* {@link #DEFAULT_BOOST}.
*
* @return whether to boost terms in query based on "score" or not.
* @see #setBoost
*/
public boolean isBoost() {
return boost;
}
/**
* Sets whether to boost terms in query based on "score" or not.
*
* @param boost true to boost terms in query based on "score", false otherwise.
* @see #isBoost
*/
public void setBoost(boolean boost) {
this.boost = boost;
}
/**
* Returns the field names that will be used when generating the 'More Like This' query.
* The default field names that will be used is {@link #DEFAULT_FIELD_NAMES}.
*
* @return the field names that will be used when generating the 'More Like This' query.
*/
public String[] getFieldNames() {
return fieldNames;
}
/**
* Sets the field names that will be used when generating the 'More Like This' query.
* Set this to null for the field names to be determined at runtime from the IndexReader
* provided in the constructor.
*
* @param fieldNames the field names that will be used when generating the 'More Like This'
* query.
*/
public void setFieldNames(String[] fieldNames) {
this.fieldNames = fieldNames;
}
/**
* Returns the minimum word length below which words will be ignored. Set this to 0 for no
* minimum word length. The default is {@link #DEFAULT_MIN_WORD_LENGTH}.
*
* @return the minimum word length below which words will be ignored.
*/
public int getMinWordLen() {
return minWordLen;
}
/**
* Sets the minimum word length below which words will be ignored.
*
* @param minWordLen the minimum word length below which words will be ignored.
*/
public void setMinWordLen(int minWordLen) {
this.minWordLen = minWordLen;
}
/**
* Returns the maximum word length above which words will be ignored. Set this to 0 for no
* maximum word length. The default is {@link #DEFAULT_MAX_WORD_LENGTH}.
*
* @return the maximum word length above which words will be ignored.
*/
public int getMaxWordLen() {
return maxWordLen;
}
/**
* Sets the maximum word length above which words will be ignored.
*
* @param maxWordLen the maximum word length above which words will be ignored.
*/
public void setMaxWordLen(int maxWordLen) {
this.maxWordLen = maxWordLen;
}
/**
* Set the set of stopwords.
* Any word in this set is considered "uninteresting" and ignored.
* Even if your Analyzer allows stopwords, you might want to tell the MoreLikeThis code to ignore them, as
* for the purposes of document similarity it seems reasonable to assume that "a stop word is never interesting".
*
* @param stopWords set of stopwords, if null it means to allow stop words
* @see #getStopWords
*/
public void setStopWords(Set<?> stopWords) {
this.stopWords = stopWords;
}
/**
* Get the current stop words being used.
*
* @see #setStopWords
*/
public Set<?> getStopWords() {
return stopWords;
}
/**
* Returns the maximum number of query terms that will be included in any generated query.
* The default is {@link #DEFAULT_MAX_QUERY_TERMS}.
*
* @return the maximum number of query terms that will be included in any generated query.
*/
public int getMaxQueryTerms() {
return maxQueryTerms;
}
/**
* Sets the maximum number of query terms that will be included in any generated query.
*
* @param maxQueryTerms the maximum number of query terms that will be included in any
* generated query.
*/
public void setMaxQueryTerms(int maxQueryTerms) {
this.maxQueryTerms = maxQueryTerms;
}
/**
* @return The maximum number of tokens to parse in each example doc field that is not stored with TermVector support
* @see #DEFAULT_MAX_NUM_TOKENS_PARSED
*/
public int getMaxNumTokensParsed() {
return maxNumTokensParsed;
}
/**
* @param i The maximum number of tokens to parse in each example doc field that is not stored with TermVector support
*/
public void setMaxNumTokensParsed(int i) {
maxNumTokensParsed = i;
}
/**
* Return a query that will return docs like the passed lucene document ID.
*
* @param docNum the documentID of the lucene doc to generate the 'More Like This" query for.
* @return a query that will return docs like the passed lucene document ID.
*/
public Query like(int docNum) throws IOException {
if (fieldNames == null) {
// gather list of valid fields from lucene
Collection<String> fields = MultiFields.getIndexedFields(ir);
fieldNames = fields.toArray(new String[fields.size()]);
}
return createQuery(retrieveTerms(docNum));
}
/**
* Return a query that will return docs like the passed Reader.
*
* @return a query that will return docs like the passed Reader.
*/
@Deprecated
public Query like(Reader r, String fieldName) throws IOException {
return like(fieldName, r);
}
/**
* Return a query that will return docs like the passed Readers.
* This was added in order to treat multi-value fields.
*
* @return a query that will return docs like the passed Readers.
*/
public Query like(String fieldName, Reader... readers) throws IOException {
Map<String, Int> words = new HashMap<>();
for (Reader r : readers) {
addTermFrequencies(r, words, fieldName);
}
return createQuery(createQueue(words));
}
/**
* Create the More like query from a PriorityQueue
*/
private Query createQuery(PriorityQueue<Object[]> q) {
BooleanQuery query = new BooleanQuery();
Object cur;
int qterms = 0;
float bestScore = 0;
while ((cur = q.pop()) != null) {
Object[] ar = (Object[]) cur;
TermQuery tq = new TermQuery(new Term((String) ar[1], (String) ar[0]));
if (boost) {
if (qterms == 0) {
bestScore = ((Float) ar[2]);
}
float myScore = ((Float) ar[2]);
tq.setBoost(boostFactor * myScore / bestScore);
}
try {
query.add(tq, BooleanClause.Occur.SHOULD);
}
catch (BooleanQuery.TooManyClauses ignore) {
break;
}
qterms++;
if (maxQueryTerms > 0 && qterms >= maxQueryTerms) {
break;
}
}
return query;
}
/**
* Create a PriorityQueue from a word->tf map.
*
* @param words a map of words keyed on the word(String) with Int objects as the values.
*/
private PriorityQueue<Object[]> createQueue(Map<String, Int> words) throws IOException {
// have collected all words in doc and their freqs
int numDocs = ir.numDocs();
FreqQ res = new FreqQ(words.size()); // will order words by score
for (String word : words.keySet()) { // for every word
int tf = words.get(word).x; // term freq in the source doc
if (minTermFreq > 0 && tf < minTermFreq) {
continue; // filter out words that don't occur enough times in the source
}
// go through all the fields and find the largest document frequency
String topField = fieldNames[0];
int docFreq = 0;
for (String fieldName : fieldNames) {
int freq = ir.docFreq(new Term(fieldName, word));
topField = (freq > docFreq) ? fieldName : topField;
docFreq = (freq > docFreq) ? freq : docFreq;
}
if (minDocFreq > 0 && docFreq < minDocFreq) {
continue; // filter out words that don't occur in enough docs
}
if (docFreq > maxDocFreq) {
continue; // filter out words that occur in too many docs
}
if (docFreq == 0) {
continue; // index update problem?
}
float idf = similarity.idf(docFreq, numDocs);
float score = tf * idf;
// only really need 1st 3 entries, other ones are for troubleshooting
res.insertWithOverflow(new Object[]{word, // the word
topField, // the top field
score, // overall score
idf, // idf
docFreq, // freq in all docs
tf
});
}
return res;
}
/**
* Describe the parameters that control how the "more like this" query is formed.
*/
public String describeParams() {
StringBuilder sb = new StringBuilder();
sb.append("\t").append("maxQueryTerms : ").append(maxQueryTerms).append("\n");
sb.append("\t").append("minWordLen : ").append(minWordLen).append("\n");
sb.append("\t").append("maxWordLen : ").append(maxWordLen).append("\n");
sb.append("\t").append("fieldNames : ");
String delim = "";
for (String fieldName : fieldNames) {
sb.append(delim).append(fieldName);
delim = ", ";
}
sb.append("\n");
sb.append("\t").append("boost : ").append(boost).append("\n");
sb.append("\t").append("minTermFreq : ").append(minTermFreq).append("\n");
sb.append("\t").append("minDocFreq : ").append(minDocFreq).append("\n");
return sb.toString();
}
/**
* Find words for a more-like-this query former.
*
* @param docNum the id of the lucene document from which to find terms
*/
public PriorityQueue<Object[]> retrieveTerms(int docNum) throws IOException {
Map<String, Int> termFreqMap = new HashMap<>();
for (String fieldName : fieldNames) {
final Fields vectors = ir.getTermVectors(docNum);
final Terms vector;
if (vectors != null) {
vector = vectors.terms(fieldName);
} else {
vector = null;
}
// field does not store term vector info
if (vector == null) {
Document d = ir.document(docNum);
IndexableField fields[] = d.getFields(fieldName);
for (IndexableField field : fields) {
final String stringValue = field.stringValue();
if (stringValue != null) {
addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
}
}
} else {
addTermFrequencies(termFreqMap, vector);
}
}
return createQueue(termFreqMap);
}
/**
* Adds terms and frequencies found in vector into the Map termFreqMap
*
* @param termFreqMap a Map of terms and their frequencies
* @param vector List of terms and their frequencies for a doc/field
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
final CharsRef spare = new CharsRef();
BytesRef text;
while((text = termsEnum.next()) != null) {
UnicodeUtil.UTF8toUTF16(text, spare);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}
final int freq = (int) termsEnum.totalTermFreq();
// increment frequency
Int cnt = termFreqMap.get(term);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
cnt.x = freq;
} else {
cnt.x += freq;
}
}
}
/**
* Adds term frequencies found by tokenizing text from reader into the Map words
*
* @param r a source of text to be tokenized
* @param termFreqMap a Map of terms and their frequencies
* @param fieldName Used by analyzer for any special per-field analysis
*/
private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName)
throws IOException {
if (analyzer == null) {
throw new UnsupportedOperationException("To use MoreLikeThis without " +
"term vectors, you must provide an Analyzer");
}
TokenStream ts = analyzer.tokenStream(fieldName, r);
try {
int tokenCount = 0;
// for every token
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String word = termAtt.toString();
tokenCount++;
if (tokenCount > maxNumTokensParsed) {
break;
}
if (isNoiseWord(word)) {
continue;
}
// increment frequency
Int cnt = termFreqMap.get(word);
if (cnt == null) {
termFreqMap.put(word, new Int());
} else {
cnt.x++;
}
}
ts.end();
} finally {
IOUtils.closeWhileHandlingException(ts);
}
}
/**
* determines if the passed term is likely to be of interest in "more like" comparisons
*
* @param term The word being considered
* @return true if should be ignored, false if should be used in further analysis
*/
private boolean isNoiseWord(String term) {
int len = term.length();
if (minWordLen > 0 && len < minWordLen) {
return true;
}
if (maxWordLen > 0 && len > maxWordLen) {
return true;
}
return stopWords != null && stopWords.contains(term);
}
/**
* Find words for a more-like-this query former.
* The result is a priority queue of arrays with one entry for <b>every word</b> in the document.
* Each array has 6 elements.
* The elements are:
* <ol>
* <li> The word (String)
* <li> The top field that this word comes from (String)
* <li> The score for this word (Float)
* <li> The IDF value (Float)
* <li> The frequency of this word in the index (Integer)
* <li> The frequency of this word in the source document (Integer)
* </ol>
* This is a somewhat "advanced" routine, and in general only the 1st entry in the array is of interest.
* This method is exposed so that you can identify the "interesting words" in a document.
* For an easier method to call see {@link #retrieveInterestingTerms retrieveInterestingTerms()}.
*
* @param r the reader that has the content of the document
* @param fieldName field passed to the analyzer to use when analyzing the content
* @return the most interesting words in the document ordered by score, with the highest scoring, or best entry, first
* @see #retrieveInterestingTerms
*/
public PriorityQueue<Object[]> retrieveTerms(Reader r, String fieldName) throws IOException {
Map<String, Int> words = new HashMap<>();
addTermFrequencies(r, words, fieldName);
return createQueue(words);
}
/**
* @see #retrieveInterestingTerms(java.io.Reader, String)
*/
public String[] retrieveInterestingTerms(int docNum) throws IOException {
ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
PriorityQueue<Object[]> pq = retrieveTerms(docNum);
Object cur;
int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
// we just want to return the top words
while (((cur = pq.pop()) != null) && lim-- > 0) {
Object[] ar = (Object[]) cur;
al.add(ar[0]); // the 1st entry is the interesting word
}
String[] res = new String[al.size()];
return al.toArray(res);
}
/**
* Convenience routine to make it easy to return the most interesting words in a document.
* More advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly.
*
* @param r the source document
* @param fieldName field passed to analyzer to use when analyzing the content
* @return the most interesting words in the document
* @see #retrieveTerms(java.io.Reader, String)
* @see #setMaxQueryTerms
*/
public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException {
ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
PriorityQueue<Object[]> pq = retrieveTerms(r, fieldName);
Object cur;
int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
// we just want to return the top words
while (((cur = pq.pop()) != null) && lim-- > 0) {
Object[] ar = (Object[]) cur;
al.add(ar[0]); // the 1st entry is the interesting word
}
String[] res = new String[al.size()];
return al.toArray(res);
}
/**
* PriorityQueue that orders words by score.
*/
private static class FreqQ extends PriorityQueue<Object[]> {
FreqQ(int s) {
super(s);
}
@Override
protected boolean lessThan(Object[] aa, Object[] bb) {
Float fa = (Float) aa[2];
Float fb = (Float) bb[2];
return fa > fb;
}
}
/**
* Use for frequencies and to avoid renewing Integers.
*/
private static class Int {
int x;
Int() {
x = 1;
}
}
}

View File

@ -33,7 +33,7 @@ abstract class AbstractArray implements BigArray {
@Override
public final void close() {
bigArrays.ramBytesUsed.addAndGet(-sizeInBytes());
bigArrays.ramBytesUsed.addAndGet(-ramBytesUsed());
assert !released : "double release";
released = true;
doClose();

View File

@ -82,7 +82,7 @@ abstract class AbstractBigArray extends AbstractArray {
protected abstract int numBytesPerElement();
public final long sizeInBytes() {
public final long ramBytesUsed() {
// rough approximate, we only take into account the size of the values, not the overhead of the array objects
return ((long) pageIndex(size - 1) + 1) * pageSize() * numBytesPerElement();
}

View File

@ -19,17 +19,13 @@
package org.elasticsearch.common.util;
import org.apache.lucene.util.Accountable;
import org.elasticsearch.common.lease.Releasable;
/** Base abstraction of an array. */
public interface BigArray extends Releasable {
public interface BigArray extends Releasable, Accountable {
/** Return the length of this array. */
public long size();
/**
* Return an estimated memory usage of this instance.
*/
public long sizeInBytes();
}

View File

@ -118,7 +118,7 @@ public class BigArrays extends AbstractComponent {
}
@Override
public long sizeInBytes() {
public long ramBytesUsed() {
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
}
@ -169,7 +169,7 @@ public class BigArrays extends AbstractComponent {
}
@Override
public long sizeInBytes() {
public long ramBytesUsed() {
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
}
@ -212,7 +212,7 @@ public class BigArrays extends AbstractComponent {
}
@Override
public long sizeInBytes() {
public long ramBytesUsed() {
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
}
@ -254,7 +254,7 @@ public class BigArrays extends AbstractComponent {
}
@Override
public long sizeInBytes() {
public long ramBytesUsed() {
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
}
@ -297,7 +297,7 @@ public class BigArrays extends AbstractComponent {
}
@Override
public long sizeInBytes() {
public long ramBytesUsed() {
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
}
@ -340,7 +340,7 @@ public class BigArrays extends AbstractComponent {
}
@Override
public long sizeInBytes() {
public long ramBytesUsed() {
return SHALLOW_SIZE + RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_OBJECT_REF * size());
}
@ -386,16 +386,16 @@ public class BigArrays extends AbstractComponent {
}
private <T extends AbstractBigArray> T resizeInPlace(T array, long newSize) {
final long oldMemSize = array.sizeInBytes();
final long oldMemSize = array.ramBytesUsed();
array.resize(newSize);
validate(array.sizeInBytes() - oldMemSize);
validate(array.ramBytesUsed() - oldMemSize);
return array;
}
private <T extends BigArray> T validate(T array) {
boolean success = false;
try {
validate(array.sizeInBytes());
validate(array.ramBytesUsed());
success = true;
} finally {
if (!success) {

View File

@ -22,7 +22,7 @@ package org.elasticsearch.env;
import com.google.common.collect.Sets;
import com.google.common.primitives.Ints;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.XNativeFSLockFactory;
import org.apache.lucene.store.NativeFSLockFactory;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.cluster.node.DiscoveryNode;
@ -78,7 +78,7 @@ public class NodeEnvironment extends AbstractComponent {
}
logger.trace("obtaining node lock on {} ...", dir.getAbsolutePath());
try {
XNativeFSLockFactory lockFactory = new XNativeFSLockFactory(dir);
NativeFSLockFactory lockFactory = new NativeFSLockFactory(dir);
Lock tmpLock = lockFactory.makeLock("node.lock");
boolean obtained = tmpLock.obtain();
if (obtained) {

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.elasticsearch.common.io.Streams;

View File

@ -88,7 +88,7 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
@Override
public TokenStream create(TokenStream tokenStream) {
if (version.onOrAfter(Version.LUCENE_48)) {
if (version.onOrAfter(Version.LUCENE_4_8)) {
return new WordDelimiterFilter(version, tokenStream,
charTypeTable,
flags,

View File

@ -21,7 +21,7 @@ package org.elasticsearch.index.codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
@ -37,7 +37,7 @@ import org.elasticsearch.index.mapper.MapperService;
* configured for a specific field the default postings format is used.
*/
// LUCENE UPGRADE: make sure to move to a new codec depending on the lucene version
public class PerFieldMappingPostingFormatCodec extends Lucene46Codec {
public class PerFieldMappingPostingFormatCodec extends Lucene49Codec {
private final ESLogger logger;
private final MapperService mapperService;
private final PostingsFormat defaultPostingFormat;

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.codec.docvaluesformat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -35,7 +35,8 @@ public class DiskDocValuesFormatProvider extends AbstractDocValuesFormatProvider
@Inject
public DiskDocValuesFormatProvider(@Assisted String name, @Assisted Settings docValuesFormatSettings) {
super(name);
this.docValuesFormat = new DiskDocValuesFormat();
// TODO: log a warning if someone chooses this? just remove this together and map it to the 4.9 provider?
this.docValuesFormat = new Lucene49DocValuesFormat();
}
@Override

View File

@ -38,9 +38,10 @@ public class DocValuesFormats {
builtInDocValuesFormatsX.put(name, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormat.forName(name)));
}
// LUCENE UPGRADE: update those DVF if necessary
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene45")));
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene49")));
builtInDocValuesFormatsX.put("memory", new PreBuiltDocValuesFormatProvider.Factory("memory", DocValuesFormat.forName("Memory")));
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Disk")));
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Lucene49")));
builtInDocValuesFormatsX.put("Disk", new PreBuiltDocValuesFormatProvider.Factory("Disk", DocValuesFormat.forName("Lucene49")));
builtInDocValuesFormats = builtInDocValuesFormatsX.immutableMap();
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.codec.postingsformat;
import org.apache.lucene.codecs.BlockTreeTermsWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.codec.postingsformat;
import org.apache.lucene.codecs.BlockTreeTermsWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;

View File

@ -19,13 +19,14 @@
package org.elasticsearch.index.fielddata;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.ScriptDocValues.Strings;
/**
* The thread safe {@link org.apache.lucene.index.AtomicReader} level cache of the data.
*/
public interface AtomicFieldData<Script extends ScriptDocValues> extends RamUsage {
public interface AtomicFieldData<Script extends ScriptDocValues> extends Accountable {
/**
* Use a non thread safe (lightweight) view of the values as bytes.
@ -56,7 +57,7 @@ public interface AtomicFieldData<Script extends ScriptDocValues> extends RamUsag
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return 0;
}

View File

@ -26,6 +26,7 @@ import com.google.common.cache.RemovalNotification;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.util.Accountable;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.lucene.SegmentReaderUtils;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
@ -63,7 +64,7 @@ public interface IndexFieldDataCache {
interface Listener {
void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, RamUsage ramUsage);
void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, Accountable ramUsage);
void onUnload(FieldMapper.Names fieldNames, FieldDataType fieldDataType, boolean wasEvicted, long sizeInBytes);
}
@ -71,11 +72,11 @@ public interface IndexFieldDataCache {
/**
* The resident field data cache is a *per field* cache that keeps all the values in memory.
*/
static abstract class FieldBased implements IndexFieldDataCache, SegmentReader.CoreClosedListener, RemovalListener<FieldBased.Key, RamUsage>, IndexReader.ReaderClosedListener {
static abstract class FieldBased implements IndexFieldDataCache, SegmentReader.CoreClosedListener, RemovalListener<FieldBased.Key, Accountable>, IndexReader.ReaderClosedListener {
private final IndexService indexService;
private final FieldMapper.Names fieldNames;
private final FieldDataType fieldDataType;
private final Cache<Key, RamUsage> cache;
private final Cache<Key, Accountable> cache;
private final IndicesFieldDataCacheListener indicesFieldDataCacheListener;
private final ESLogger logger;
@ -92,15 +93,15 @@ public interface IndexFieldDataCache {
}
@Override
public void onRemoval(RemovalNotification<Key, RamUsage> notification) {
public void onRemoval(RemovalNotification<Key, Accountable> notification) {
final Key key = notification.getKey();
assert key != null && key.listeners != null;
final RamUsage value = notification.getValue();
final Accountable value = notification.getValue();
long sizeInBytes = key.sizeInBytes;
assert sizeInBytes >= 0 || value != null : "Expected size [" + sizeInBytes + "] to be positive or value [" + value + "] to be non-null";
if (sizeInBytes == -1 && value != null) {
sizeInBytes = value.getMemorySizeInBytes();
sizeInBytes = value.ramBytesUsed();
}
for (Listener listener : key.listeners) {
try {
@ -129,7 +130,7 @@ public interface IndexFieldDataCache {
}
}
final AtomicFieldData fieldData = indexFieldData.loadDirect(context);
key.sizeInBytes = fieldData.getMemorySizeInBytes();
key.sizeInBytes = fieldData.ramBytesUsed();
for (Listener listener : key.listeners) {
try {
listener.onLoad(fieldNames, fieldDataType, fieldData);
@ -146,7 +147,7 @@ public interface IndexFieldDataCache {
public <IFD extends IndexFieldData.WithOrdinals<?>> IFD load(final IndexReader indexReader, final IFD indexFieldData) throws Exception {
final Key key = new Key(indexReader.getCoreCacheKey());
//noinspection unchecked
return (IFD) cache.get(key, new Callable<RamUsage>() {
return (IFD) cache.get(key, new Callable<Accountable>() {
@Override
public GlobalOrdinalsIndexFieldData call() throws Exception {
indexReader.addReaderClosedListener(FieldBased.this);
@ -160,7 +161,7 @@ public interface IndexFieldDataCache {
}
}
GlobalOrdinalsIndexFieldData ifd = (GlobalOrdinalsIndexFieldData) indexFieldData.localGlobalDirect(indexReader);
key.sizeInBytes = ifd.getMemorySizeInBytes();
key.sizeInBytes = ifd.ramBytesUsed();
for (Listener listener : key.listeners) {
try {
listener.onLoad(fieldNames, fieldDataType, ifd);

View File

@ -1,30 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
/**
*/
public interface RamUsage {
/**
* Size (in bytes) of memory used by this particular instance.
*/
long getMemorySizeInBytes();
}

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.fielddata;
import com.carrotsearch.hppc.ObjectLongOpenHashMap;
import org.apache.lucene.util.Accountable;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.metrics.CounterMetric;
import org.elasticsearch.common.regex.Regex;
@ -69,18 +70,18 @@ public class ShardFieldData extends AbstractIndexShardComponent implements Index
}
@Override
public void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, RamUsage ramUsage) {
totalMetric.inc(ramUsage.getMemorySizeInBytes());
public void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, Accountable ramUsage) {
totalMetric.inc(ramUsage.ramBytesUsed());
String keyFieldName = fieldNames.indexName();
CounterMetric total = perFieldTotals.get(keyFieldName);
if (total != null) {
total.inc(ramUsage.getMemorySizeInBytes());
total.inc(ramUsage.ramBytesUsed());
} else {
total = new CounterMetric();
total.inc(ramUsage.getMemorySizeInBytes());
total.inc(ramUsage.ramBytesUsed());
CounterMetric prev = perFieldTotals.putIfAbsent(keyFieldName, total);
if (prev != null) {
prev.inc(ramUsage.getMemorySizeInBytes());
prev.inc(ramUsage.ramBytesUsed());
}
}
}

View File

@ -19,8 +19,9 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.XOrdinalMap;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.elasticsearch.index.fielddata.BytesValues;
/**
@ -29,32 +30,38 @@ import org.elasticsearch.index.fielddata.BytesValues;
public class GlobalOrdinalMapping extends BytesValues.WithOrdinals {
private final BytesValues.WithOrdinals values;
private final OrdinalMap ordinalMap;
private final XOrdinalMap ordinalMap;
private final LongValues mapping;
private final BytesValues.WithOrdinals[] bytesValues;
private final int segmentIndex;
GlobalOrdinalMapping(OrdinalMap ordinalMap, BytesValues.WithOrdinals[] bytesValues, int segmentIndex) {
GlobalOrdinalMapping(XOrdinalMap ordinalMap, BytesValues.WithOrdinals[] bytesValues, int segmentIndex) {
super(bytesValues[segmentIndex].isMultiValued());
this.values = bytesValues[segmentIndex];
this.segmentIndex = segmentIndex;
this.bytesValues = bytesValues;
this.ordinalMap = ordinalMap;
this.mapping = ordinalMap.getGlobalOrds(segmentIndex);
}
int readerIndex;
@Override
public long getMaxOrd() {
return ordinalMap.getValueCount();
}
// NOTE: careful if we change the API here: unnecessary branch for < 0 here hurts a lot.
// so if we already know the count (from setDocument), its bad to do it redundantly.
public long getGlobalOrd(long segmentOrd) {
return segmentOrd == MISSING_ORDINAL ? MISSING_ORDINAL : ordinalMap.getGlobalOrd(segmentIndex, segmentOrd);
return mapping.get(segmentOrd);
}
@Override
public long getOrd(int docId) {
return getGlobalOrd(values.getOrd(docId));
long v = values.getOrd(docId);
if (v < 0) {
return v;
} else {
return getGlobalOrd(v);
}
}
@Override
@ -70,7 +77,7 @@ public class GlobalOrdinalMapping extends BytesValues.WithOrdinals {
@Override
public BytesRef getValueByOrd(long globalOrd) {
final long segmentOrd = ordinalMap.getFirstSegmentOrd(globalOrd);
readerIndex = ordinalMap.getFirstSegmentNumber(globalOrd);
int readerIndex = ordinalMap.getFirstSegmentNumber(globalOrd);
return bytesValues[readerIndex].getValueByOrd(segmentOrd);
}

View File

@ -20,6 +20,7 @@ package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.Accountable;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
@ -27,14 +28,13 @@ import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.RamUsage;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.MultiValueMode;
/**
* {@link IndexFieldData} base class for concrete global ordinals implementations.
*/
public abstract class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexFieldData.WithOrdinals, RamUsage {
public abstract class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexFieldData.WithOrdinals, Accountable {
private final FieldMapper.Names fieldNames;
private final FieldDataType fieldDataType;
@ -93,7 +93,7 @@ public abstract class GlobalOrdinalsIndexFieldData extends AbstractIndexComponen
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return memorySizeInBytes;
}

View File

@ -20,12 +20,14 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.XOrdinalMap;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
@ -47,11 +49,14 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
final AtomicFieldData.WithOrdinals<?>[] atomicFD = new AtomicFieldData.WithOrdinals[indexReader.leaves().size()];
final TermsEnum[] subs = new TermsEnum[indexReader.leaves().size()];
final long[] weights = new long[subs.length];
for (int i = 0; i < indexReader.leaves().size(); ++i) {
atomicFD[i] = indexFieldData.load(indexReader.leaves().get(i));
subs[i] = atomicFD[i].getBytesValues().getTermsEnum();
BytesValues.WithOrdinals v = atomicFD[i].getBytesValues();
subs[i] = v.getTermsEnum();
weights[i] = v.getMaxOrd();
}
final OrdinalMap ordinalMap = new OrdinalMap(null, subs);
final XOrdinalMap ordinalMap = XOrdinalMap.build(null, subs, weights, PackedInts.DEFAULT);
final long memorySizeInBytes = ordinalMap.ramBytesUsed();
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.XOrdinalMap;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.AtomicFieldData;
@ -35,7 +35,7 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
private final Atomic[] atomicReaders;
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicFieldData.WithOrdinals[] segmentAfd, OrdinalMap ordinalMap, long memorySizeInBytes) {
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicFieldData.WithOrdinals[] segmentAfd, XOrdinalMap ordinalMap, long memorySizeInBytes) {
super(index, settings, fieldNames, fieldDataType, memorySizeInBytes);
this.atomicReaders = new Atomic[segmentAfd.length];
for (int i = 0; i < segmentAfd.length; i++) {
@ -51,10 +51,10 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
private final class Atomic implements AtomicFieldData.WithOrdinals {
private final WithOrdinals afd;
private final OrdinalMap ordinalMap;
private final XOrdinalMap ordinalMap;
private final int segmentIndex;
private Atomic(WithOrdinals afd, OrdinalMap ordinalMap, int segmentIndex) {
private Atomic(WithOrdinals afd, XOrdinalMap ordinalMap, int segmentIndex) {
this.afd = afd;
this.ordinalMap = ordinalMap;
this.segmentIndex = segmentIndex;
@ -75,8 +75,8 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
}
@Override
public long getMemorySizeInBytes() {
return afd.getMemorySizeInBytes();
public long ramBytesUsed() {
return afd.ramBytesUsed();
}
@Override

View File

@ -77,7 +77,8 @@ public class MultiOrdinals extends Ordinals {
assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds();
}
public long getMemorySizeInBytes() {
@Override
public long ramBytesUsed() {
return endOffsets.ramBytesUsed() + ords.ramBytesUsed();
}

View File

@ -19,15 +19,14 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.BytesValues;
/**
* A thread safe ordinals abstraction. Ordinals can only be positive integers.
*/
public abstract class Ordinals {
public abstract class Ordinals implements Accountable {
public static final ValuesHolder NO_VALUES = new ValuesHolder() {
@Override
@ -39,7 +38,7 @@ public abstract class Ordinals {
/**
* The memory size this ordinals take.
*/
public abstract long getMemorySizeInBytes();
public abstract long ramBytesUsed();
public abstract BytesValues.WithOrdinals ordinals(ValuesHolder values);

View File

@ -45,7 +45,7 @@ public class SinglePackedOrdinals extends Ordinals {
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_OBJECT_REF + reader.ramBytesUsed();
}

View File

@ -39,7 +39,7 @@ abstract class AbstractGeoPointIndexFieldData extends AbstractIndexFieldData<Ato
protected static class Empty extends AtomicGeoPointFieldData<ScriptDocValues> {
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return 0;
}

View File

@ -44,47 +44,33 @@ public class BinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocValues.
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
// TODO: Lucene doesn't expose it right now
return -1;
}
@Override
public BytesValues getBytesValues() {
final BinaryDocValues values;
final Bits docsWithField;
try {
final BinaryDocValues v = reader.getBinaryDocValues(field);
if (v == null) {
// segment has no value
values = DocValues.EMPTY_BINARY;
docsWithField = new Bits.MatchNoBits(reader.maxDoc());
} else {
values = v;
final Bits b = reader.getDocsWithField(field);
docsWithField = b == null ? new Bits.MatchAllBits(reader.maxDoc()) : b;
}
final BinaryDocValues values = DocValues.getBinary(reader, field);
final Bits docsWithField = DocValues.getDocsWithField(reader, field);
return new BytesValues(false) {
int docId;
@Override
public int setDocument(int docId) {
this.docId = docId;
return docsWithField.get(docId) ? 1 : 0;
}
@Override
public BytesRef nextValue() {
return values.get(docId);
}
};
} catch (IOException e) {
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
}
return new BytesValues(false) {
final BytesRef scratch = new BytesRef();
int docId;
@Override
public int setDocument(int docId) {
this.docId = docId;
return docsWithField.get(docId) ? 1 : 0;
}
@Override
public BytesRef nextValue() {
values.get(docId, scratch);
return scratch;
}
};
}
@Override

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -37,7 +36,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
BinaryDVNumericAtomicFieldData(BinaryDocValues values, NumericType numericType) {
super(numericType.isFloatingPoint());
this.values = values == null ? DocValues.EMPTY_BINARY : values;
this.values = values;
this.numericType = numericType;
}
@ -48,7 +47,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
}
return new LongValues(true) {
final BytesRef bytes = new BytesRef();
BytesRef bytes;
final ByteArrayDataInput in = new ByteArrayDataInput();
long[] longs = new long[8];
int i = Integer.MAX_VALUE;
@ -56,7 +55,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
@Override
public int setDocument(int docId) {
values.get(docId, bytes);
bytes = values.get(docId);
in.reset(bytes.bytes, bytes.offset, bytes.length);
if (!in.eof()) {
// first value uses vLong on top of zig-zag encoding, then deltas are encoded using vLong
@ -91,13 +90,13 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
case FLOAT:
return new DoubleValues(true) {
final BytesRef bytes = new BytesRef();
BytesRef bytes;
int i = Integer.MAX_VALUE;
int valueCount = 0;
@Override
public int setDocument(int docId) {
values.get(docId, bytes);
bytes = values.get(docId);
assert bytes.length % 4 == 0;
i = 0;
return valueCount = bytes.length / 4;
@ -113,13 +112,13 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
case DOUBLE:
return new DoubleValues(true) {
final BytesRef bytes = new BytesRef();
BytesRef bytes;
int i = Integer.MAX_VALUE;
int valueCount = 0;
@Override
public int setDocument(int docId) {
values.get(docId, bytes);
bytes = values.get(docId);
assert bytes.length % 8 == 0;
i = 0;
return valueCount = bytes.length / 8;
@ -138,7 +137,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return -1; // Lucene doesn't expose it
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
import com.google.common.base.Preconditions;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -63,7 +64,7 @@ public class BinaryDVNumericIndexFieldData extends DocValuesIndexFieldData imple
@Override
public BinaryDVNumericAtomicFieldData load(AtomicReaderContext context) {
try {
return new BinaryDVNumericAtomicFieldData(context.reader().getBinaryDocValues(fieldNames.indexName()), numericType);
return new BinaryDVNumericAtomicFieldData(DocValues.getBinary(context.reader(), fieldNames.indexName()), numericType);
} catch (IOException e) {
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
}

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.AtomicFieldData;
@ -33,11 +32,11 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocVal
BytesBinaryDVAtomicFieldData(BinaryDocValues values) {
super();
this.values = values == null ? DocValues.EMPTY_BINARY : values;
this.values = values;
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return -1; // not exposed by Lucene
}
@ -45,13 +44,13 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocVal
public BytesValues getBytesValues() {
return new BytesValues(true) {
final BytesRef bytes = new BytesRef();
BytesRef bytes;
final BytesRef scratch = new BytesRef();
final ByteArrayDataInput in = new ByteArrayDataInput();
@Override
public int setDocument(int docId) {
values.get(docId, bytes);
bytes = values.get(docId);
in.reset(bytes.bytes, bytes.offset, bytes.length);
if (bytes.length == 0) {
return 0;

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.Nullable;
@ -56,7 +57,7 @@ public class BytesBinaryDVIndexFieldData extends DocValuesIndexFieldData impleme
@Override
public BytesBinaryDVAtomicFieldData load(AtomicReaderContext context) {
try {
return new BytesBinaryDVAtomicFieldData(context.reader().getBinaryDocValues(fieldNames.indexName()));
return new BytesBinaryDVAtomicFieldData(DocValues.getBinary(context.reader(), fieldNames.indexName()));
} catch (IOException e) {
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
}

View File

@ -60,7 +60,7 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return 0;
}
@ -87,9 +87,9 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.ramBytesUsed();
}
return size;
}
@ -155,9 +155,9 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + RamUsageEstimator.sizeOf(set.getBits());
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
}
return size;
}
@ -236,9 +236,9 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed();
}
return size;
}

View File

@ -82,7 +82,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
if (terms == null) {
data = DoubleArrayAtomicFieldData.empty();
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
return data;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
@ -108,8 +108,8 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsageEstimator.NUM_BYTES_DOUBLE + (set == null ? 0 : RamUsageEstimator.sizeOf(set.getBits()) + RamUsageEstimator.NUM_BYTES_INT);
long uniqueValuesArraySize = values.sizeInBytes();
long ordinalsSize = build.getMemorySizeInBytes();
long uniqueValuesArraySize = values.ramBytesUsed();
long ordinalsSize = build.ramBytesUsed();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
data = new DoubleArrayAtomicFieldData.WithOrdinals(values, build);
success = true;
@ -135,7 +135,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
return data;
} finally {
if (success) {
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
}
}

View File

@ -52,11 +52,11 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
long size = ordinals.getMemorySizeInBytes();
long size = ordinals.ramBytesUsed();
// FST
size += fst == null ? 0 : fst.sizeInBytes();
size += fst == null ? 0 : fst.ramBytesUsed();
this.size = size;
}
return size;

View File

@ -67,7 +67,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<AtomicFi
// TODO: Use an actual estimator to estimate before loading.
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
if (terms == null) {
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.getMemorySizeInBytes());
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.ramBytesUsed());
return AtomicFieldData.WithOrdinals.EMPTY;
}
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -106,7 +106,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<AtomicFi
return data;
} finally {
if (success) {
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
}
}

View File

@ -59,7 +59,7 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return 0;
}
@ -86,9 +86,9 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.ramBytesUsed();
}
return size;
}
@ -151,9 +151,9 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + RamUsageEstimator.sizeOf(set.getBits());
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
}
return size;
}
@ -234,9 +234,9 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed();
}
return size;
}

View File

@ -80,7 +80,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
if (terms == null) {
data = FloatArrayAtomicFieldData.empty();
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
return data;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
@ -106,8 +106,8 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsageEstimator.NUM_BYTES_FLOAT + (set == null ? 0 : RamUsageEstimator.sizeOf(set.getBits()) + RamUsageEstimator.NUM_BYTES_INT);
long uniqueValuesArraySize = values.sizeInBytes();
long ordinalsSize = build.getMemorySizeInBytes();
long uniqueValuesArraySize = values.ramBytesUsed();
long ordinalsSize = build.ramBytesUsed();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
data = new FloatArrayAtomicFieldData.WithOrdinals(values, build);
success = true;
@ -133,7 +133,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
return data;
} finally {
if (success) {
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
}
}

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.util.ByteUtils;
@ -34,11 +33,11 @@ final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData<Scri
GeoPointBinaryDVAtomicFieldData(BinaryDocValues values) {
super();
this.values = values == null ? DocValues.EMPTY_BINARY : values;
this.values = values;
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return -1; // not exposed by Lucene
}
@ -56,14 +55,14 @@ final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData<Scri
public GeoPointValues getGeoPointValues() {
return new GeoPointValues(true) {
final BytesRef bytes = new BytesRef();
BytesRef bytes;
int i = Integer.MAX_VALUE;
int valueCount = 0;
final GeoPoint point = new GeoPoint();
@Override
public int setDocument(int docId) {
values.get(docId, bytes);
bytes = values.get(docId);
assert bytes.length % 16 == 0;
i = 0;
return valueCount = (bytes.length >>> 4);

View File

@ -20,18 +20,19 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldMapper.Names;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
import org.elasticsearch.search.MultiValueMode;
import java.io.IOException;
@ -54,7 +55,7 @@ public class GeoPointBinaryDVIndexFieldData extends DocValuesIndexFieldData impl
@Override
public AtomicGeoPointFieldData<ScriptDocValues> load(AtomicReaderContext context) {
try {
return new GeoPointBinaryDVAtomicFieldData(context.reader().getBinaryDocValues(fieldNames.indexName()));
return new GeoPointBinaryDVAtomicFieldData(DocValues.getBinary(context.reader(), fieldNames.indexName()));
} catch (IOException e) {
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
}

View File

@ -60,7 +60,7 @@ public abstract class GeoPointCompressedAtomicFieldData extends AtomicGeoPointFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed();
}
@ -120,7 +120,7 @@ public abstract class GeoPointCompressedAtomicFieldData extends AtomicGeoPointFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
}
@ -178,7 +178,7 @@ public abstract class GeoPointCompressedAtomicFieldData extends AtomicGeoPointFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + (lon.ramBytesUsed() + lat.ramBytesUsed());
}

View File

@ -85,7 +85,7 @@ public class GeoPointCompressedIndexFieldData extends AbstractGeoPointIndexField
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
if (terms == null) {
data = new Empty();
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
return data;
}
final long initialSize;
@ -147,7 +147,7 @@ public class GeoPointCompressedIndexFieldData extends AbstractGeoPointIndexField
return data;
} finally {
if (success) {
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
}
}

View File

@ -56,9 +56,9 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.sizeInBytes() + lat.sizeInBytes();
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed();
}
return size;
}
@ -112,9 +112,9 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.sizeInBytes() + lat.sizeInBytes() + RamUsageEstimator.sizeOf(set.getBits());
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
}
return size;
}
@ -167,9 +167,9 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + RamUsageEstimator.NUM_BYTES_INT/*numDocs*/ + (lon.sizeInBytes() + lat.sizeInBytes());
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + RamUsageEstimator.NUM_BYTES_INT/*numDocs*/ + (lon.ramBytesUsed() + lat.ramBytesUsed());
}
return size;
}

View File

@ -67,7 +67,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractGeoPointIndexFiel
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
if (terms == null) {
data = new Empty();
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
return data;
}
DoubleArray lat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
@ -114,7 +114,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractGeoPointIndexFiel
return data;
} finally {
if (success) {
estimator.afterLoad(null, data.getMemorySizeInBytes());
estimator.afterLoad(null, data.ramBytesUsed());
}
}

View File

@ -90,7 +90,7 @@ public class IndexIndexFieldData implements IndexFieldData.WithOrdinals<AtomicFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return 0;
}

View File

@ -49,7 +49,7 @@ public class NumericDVAtomicFieldData extends AbstractAtomicNumericFieldData {
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
// TODO: cannot be computed from Lucene
return -1;
}
@ -66,23 +66,13 @@ public class NumericDVAtomicFieldData extends AbstractAtomicNumericFieldData {
}
private DocValuesAndBits getDocValues() {
final NumericDocValues values;
final Bits docsWithField;
try {
final NumericDocValues v = reader.getNumericDocValues(field);
if (v == null) {
// segment has no value
values = DocValues.EMPTY_NUMERIC;
docsWithField = new Bits.MatchNoBits(reader.maxDoc());
} else {
values = v;
final Bits b = reader.getDocsWithField(field);
docsWithField = b == null ? new Bits.MatchAllBits(reader.maxDoc()) : b;
}
final NumericDocValues values = DocValues.getNumeric(reader, field);
final Bits docsWithField = DocValues.getDocsWithField(reader, field);
return new DocValuesAndBits(values, docsWithField);
} catch (IOException e) {
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
}
return new DocValuesAndBits(values, docsWithField);
}
@Override

View File

@ -59,7 +59,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return 0;
}
@ -86,9 +86,9 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.getMemorySizeInBytes();
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.ramBytesUsed();
}
return size;
}
@ -156,7 +156,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = values.ramBytesUsed() + 2 * RamUsageEstimator.NUM_BYTES_LONG;
}
@ -243,7 +243,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = values.ramBytesUsed();
}
@ -322,7 +322,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = values.ramBytesUsed();
}
@ -398,7 +398,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
size = values.ramBytesUsed() + 2 * RamUsageEstimator.NUM_BYTES_LONG;
}

View File

@ -101,7 +101,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType(), getFieldNames().fullName());
if (terms == null) {
data = PackedArrayAtomicFieldData.empty();
estimator.adjustForNoTerms(data.getMemorySizeInBytes());
estimator.adjustForNoTerms(data.ramBytesUsed());
return data;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
@ -231,7 +231,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
estimator.afterLoad(termsEnum, 0);
} else {
// Adjust as usual, based on the actual size of the field data
estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
estimator.afterLoad(termsEnum, data.ramBytesUsed());
}
}
@ -251,7 +251,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
// ordinal memory usage
final long ordinalsSize = build.getMemorySizeInBytes() + values.ramBytesUsed();
final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed();
// estimate the memory signature of paged packing
long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages

View File

@ -49,9 +49,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
if (size == -1) {
long size = ordinals.getMemorySizeInBytes();
long size = ordinals.ramBytesUsed();
// PackedBytes
size += readerBytesSize;
// PackedInts

View File

@ -18,7 +18,8 @@
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.codecs.BlockTreeTermsReader;
import org.apache.lucene.codecs.blocktree.FieldReader;
import org.apache.lucene.codecs.blocktree.Stats;
import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
@ -65,7 +66,7 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<Atomic
PagedBytesEstimator estimator = new PagedBytesEstimator(context, breakerService.getBreaker(), getFieldNames().fullName());
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.getMemorySizeInBytes());
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.ramBytesUsed());
return AtomicFieldData.WithOrdinals.EMPTY;
}
@ -115,7 +116,7 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<Atomic
estimator.afterLoad(termsEnum, 0);
} else {
// Call .afterLoad() to adjust the breaker now that we have an exact size
estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
estimator.afterLoad(termsEnum, data.ramBytesUsed());
}
}
@ -165,8 +166,8 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<Atomic
Fields fields = reader.fields();
final Terms fieldTerms = fields.terms(getFieldNames().indexName());
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
if (fieldTerms instanceof FieldReader) {
final Stats stats = ((FieldReader) fieldTerms).computeStats();
long totalTermBytes = stats.totalTermBytes;
if (logger.isTraceEnabled()) {
logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",

View File

@ -37,13 +37,13 @@ public class ParentChildAtomicFieldData implements AtomicFieldData {
this.typeToIds = typeToIds;
long size = 0;
for (ObjectCursor<PagedBytesAtomicFieldData> cursor : typeToIds.values()) {
size += cursor.value.getMemorySizeInBytes();
size += cursor.value.ramBytesUsed();
}
this.memorySizeInBytes = size;
}
@Override
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
return memorySizeInBytes;
}

View File

@ -152,7 +152,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<ParentChil
return data;
} finally {
if (success) {
estimator.afterLoad(estimatedTermsEnum, data.getMemorySizeInBytes());
estimator.afterLoad(estimatedTermsEnum, data.ramBytesUsed());
} else {
estimator.afterLoad(estimatedTermsEnum, 0);
}
@ -309,7 +309,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<ParentChil
PerType perType = new PerType(parentType.utf8ToString());
GlobalOrdinalsIndexFieldData globalIfd = (GlobalOrdinalsIndexFieldData) globalOrdinalsBuilder.build(indexReader, perType, indexSettings, breakerService);
globalIfdPerType.put(perType.type, globalIfd);
memorySizeInBytes += globalIfd.getMemorySizeInBytes();
memorySizeInBytes += globalIfd.ramBytesUsed();
}
return new ParentChildGlobalOrdinalsIndexFieldData(globalIfdPerType.build(), memorySizeInBytes);
}

View File

@ -19,10 +19,7 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchIllegalStateException;
@ -40,24 +37,26 @@ abstract class SortedSetDVAtomicFieldData {
private final AtomicReader reader;
private final String field;
private final boolean multiValued;
private final long valueCount;
SortedSetDVAtomicFieldData(AtomicReader reader, String field) {
this.reader = reader;
this.field = field;
SortedSetDocValues dv = getValuesNoException(reader, field);
this.multiValued = DocValues.unwrapSingleton(dv) == null;
this.valueCount = dv.getValueCount();
}
public boolean isMultiValued() {
// we could compute it when loading the values for the first time and then cache it but it would defeat the point of
// doc values which is to make loading faster
return true;
return multiValued;
}
public long getNumberUniqueValues() {
final SortedSetDocValues values = getValuesNoException(reader, field);
return values.getValueCount();
return valueCount;
}
public long getMemorySizeInBytes() {
public long ramBytesUsed() {
// There is no API to access memory usage per-field and RamUsageEstimator can't help since there are often references
// from a per-field instance to all other instances handled by the same format
return -1L;
@ -69,7 +68,11 @@ abstract class SortedSetDVAtomicFieldData {
public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getBytesValues() {
final SortedSetDocValues values = getValuesNoException(reader, field);
return new SortedSetValues(values);
if (values instanceof RandomAccessOrds) {
return new RandomAccessSortedSetValues((RandomAccessOrds)values, multiValued);
} else {
return new SortedSetValues(values, multiValued);
}
}
public TermsEnum getTermsEnum() {
@ -78,27 +81,58 @@ abstract class SortedSetDVAtomicFieldData {
private static SortedSetDocValues getValuesNoException(AtomicReader reader, String field) {
try {
SortedSetDocValues values = reader.getSortedSetDocValues(field);
if (values == null) {
// This field has not been populated
assert reader.getFieldInfos().fieldInfo(field) == null;
values = DocValues.EMPTY_SORTED_SET;
}
return values;
return DocValues.getSortedSet(reader, field);
} catch (IOException e) {
throw new ElasticsearchIllegalStateException("Couldn't load doc values", e);
}
}
private final static class RandomAccessSortedSetValues extends BytesValues.WithOrdinals {
private final RandomAccessOrds values;
private int index = 0;
RandomAccessSortedSetValues(RandomAccessOrds values, boolean multiValued) {
super(multiValued);
this.values = values;
}
static class SortedSetValues extends BytesValues.WithOrdinals {
@Override
public long getMaxOrd() {
return values.getValueCount();
}
@Override
public long getOrd(int docId) {
values.setDocument(docId);
return values.nextOrd();
}
@Override
public long nextOrd() {
return values.ordAt(index++);
}
@Override
public BytesRef getValueByOrd(long ord) {
return values.lookupOrd(ord);
}
@Override
public int setDocument(int docId) {
values.setDocument(docId);
index = 0;
return values.cardinality();
}
}
private final static class SortedSetValues extends BytesValues.WithOrdinals {
private final BytesRef scratch = new BytesRef();
private final SortedSetDocValues values;
private long[] ords;
private int ordIndex = Integer.MAX_VALUE;
SortedSetValues(SortedSetDocValues values) {
super(DocValues.unwrapSingleton(values) == null);
SortedSetValues(SortedSetDocValues values, boolean multiValued) {
super(multiValued);
this.values = values;
ords = new long[0];
}
@ -136,8 +170,7 @@ abstract class SortedSetDVAtomicFieldData {
@Override
public BytesRef getValueByOrd(long ord) {
values.lookupOrd(ord, scratch);
return scratch;
return values.lookupOrd(ord);
}
}
}

View File

@ -21,7 +21,6 @@ package org.elasticsearch.index.mapper.core;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.Filter;
@ -222,7 +221,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper<Boolean> {
if (value == null) {
return;
}
fields.add(new XStringField(names.indexName(), value ? "T" : "F", fieldType));
fields.add(new Field(names.indexName(), value ? "T" : "F", fieldType));
}
@Override

View File

@ -372,7 +372,7 @@ public class ByteFieldMapper extends NumberFieldMapper<Byte> {
}
@Override
public TokenStream tokenStream(Analyzer analyzer) {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
if (fieldType().indexed()) {
return mapper.popCachedStream().setIntValue(number);
}

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
@ -47,7 +46,6 @@ import org.elasticsearch.search.suggest.context.ContextMapping;
import org.elasticsearch.search.suggest.context.ContextMapping.ContextConfig;
import java.io.IOException;
import java.io.Reader;
import java.util.*;
import static org.elasticsearch.index.mapper.MapperBuilders.completionField;
@ -388,7 +386,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
surfaceForm, weight, payload);
}
private static final class SuggestField extends XStringField {
private static final class SuggestField extends Field {
private final BytesRef payload;
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
private final ContextMapping.Context ctx;
@ -401,8 +399,8 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer));
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer, previous));
return new CompletionTokenStream(ts, payload, toFiniteStrings);
}
}

View File

@ -375,7 +375,7 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
if (fieldType().indexed()) {
return mapper.popCachedStream().setDoubleValue(number);
}

View File

@ -381,7 +381,7 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
if (fieldType().indexed()) {
return mapper.popCachedStream().setFloatValue(number);
}

View File

@ -376,7 +376,7 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
if (fieldType().indexed()) {
return mapper.popCachedStream().setIntValue(number);
}

View File

@ -357,7 +357,7 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
if (fieldType().indexed()) {
return mapper.popCachedStream().setLongValue(number);
}

View File

@ -459,7 +459,7 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
return null;
}

View File

@ -374,7 +374,7 @@ public class ShortFieldMapper extends NumberFieldMapper<Short> {
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
if (fieldType().indexed()) {
return mapper.popCachedStream().setIntValue(number);
}

View File

@ -20,13 +20,9 @@
package org.elasticsearch.index.mapper.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
@ -287,7 +283,7 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
}
if (fieldType.indexed() || fieldType.stored()) {
Field field = new XStringField(names.indexName(), valueAndBoost.value(), fieldType);
Field field = new Field(names.indexName(), valueAndBoost.value(), fieldType);
field.setBoost(valueAndBoost.boost());
fields.add(field);
}

View File

@ -24,7 +24,6 @@ import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.google.common.base.Objects;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
@ -571,7 +570,7 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
}
if (fieldType.indexed() || fieldType.stored()) {
Field field = new XStringField(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
Field field = new Field(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
context.doc().add(field);
}
if (enableGeoHash) {

View File

@ -23,7 +23,6 @@ import com.google.common.collect.UnmodifiableIterator;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
@ -221,7 +220,7 @@ public class FieldNamesFieldMapper extends AbstractFieldMapper<String> implement
for (String path : paths) {
for (String fieldName : extractFieldNames(path)) {
if (fieldType.indexed() || fieldType.stored()) {
document.add(new XStringField(names().indexName(), fieldName, fieldType));
document.add(new Field(names().indexName(), fieldName, fieldType));
}
if (hasDocValues()) {
document.add(new SortedSetDocValuesField(names().indexName(), new BytesRef(fieldName)));

View File

@ -23,7 +23,6 @@ import com.google.common.collect.Iterables;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
@ -310,7 +309,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
} // else we are in the pre/post parse phase
if (fieldType.indexed() || fieldType.stored()) {
fields.add(new XStringField(names.indexName(), context.id(), fieldType));
fields.add(new Field(names.indexName(), context.id(), fieldType));
}
if (hasDocValues()) {
fields.add(new BinaryDocValuesField(names.indexName(), new BytesRef(context.id())));

View File

@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
@ -182,7 +181,7 @@ public class IndexFieldMapper extends AbstractFieldMapper<String> implements Int
if (!enabledState.enabled) {
return;
}
fields.add(new XStringField(names.indexName(), context.index(), fieldType));
fields.add(new Field(names.indexName(), context.index(), fieldType));
}
@Override

View File

@ -20,7 +20,6 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
@ -185,7 +184,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
// we are in the parsing of _parent phase
String parentId = context.parser().text();
context.sourceToParse().parent(parentId);
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
} else {
// otherwise, we are running it post processing of the xcontent
String parsedParentId = context.doc().get(Defaults.NAME);
@ -196,7 +195,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
throw new MapperParsingException("No parent id provided, not within the document, and not externally");
}
// we did not add it in the parsing phase, add it now
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
} else if (parentId != null && !parsedParentId.equals(Uid.createUid(context.stringBuilder(), type, parentId))) {
throw new MapperParsingException("Parent id mismatch, document value is [" + Uid.createUid(parsedParentId).id() + "], while external value is [" + parentId + "]");
}

View File

@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
@ -201,7 +200,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper<String> implements I
context.ignoredValue(names.indexName(), routing);
return;
}
fields.add(new XStringField(names.indexName(), routing, fieldType));
fields.add(new Field(names.indexName(), routing, fieldType));
}
}
}

View File

@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
@ -178,7 +177,7 @@ public class TypeFieldMapper extends AbstractFieldMapper<String> implements Inte
if (!fieldType.indexed() && !fieldType.stored()) {
return;
}
fields.add(new XStringField(names.indexName(), context.type(), fieldType));
fields.add(new Field(names.indexName(), context.type(), fieldType));
if (hasDocValues()) {
fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(context.type())));
}

View File

@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
@ -154,7 +153,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
// we need to go over the docs and add it...
for (int i = 1; i < context.docs().size(); i++) {
final Document doc = context.docs().get(i);
doc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
doc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
}
}
}
@ -172,7 +171,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
Field uid = new XStringField(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
Field uid = new Field(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
context.uid(uid);
fields.add(uid);
if (hasDocValues()) {

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.mapper.object;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
@ -453,12 +453,12 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll {
// we also rely on this for UidField#loadVersion
// this is a deeply nested field
nestedDoc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
}
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
nestedDoc.add(new XStringField(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
restoreDoc = context.switchDoc(nestedDoc);
context.addDoc(nestedDoc);
}

View File

@ -51,7 +51,6 @@ import java.util.Map;
* For now, this {@link MergePolicy} takes care of moving versions that used to
* be stored as payloads to numeric doc values.
*/
@SuppressWarnings("PMD.ProperCloneImplementation")
public final class ElasticsearchMergePolicy extends MergePolicy {
private final MergePolicy delegate;
@ -105,11 +104,11 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
fieldNumber = Math.max(fieldNumber, fi.number + 1);
}
newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, false, fieldNumber, false, true, false,
IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC, Collections.<String, String>emptyMap());
IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC, -1, Collections.<String, String>emptyMap());
} else {
newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, versionInfo.isIndexed(), versionInfo.number,
versionInfo.hasVectors(), versionInfo.omitsNorms(), versionInfo.hasPayloads(),
versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(), versionInfo.attributes());
versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(), versionInfo.getDocValuesGen(), versionInfo.attributes());
}
final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>();
for (FieldInfo info : fieldInfos) {
@ -189,13 +188,13 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
@Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger,
SegmentInfos segmentInfos) throws IOException {
return upgradedMergeSpecification(delegate.findMerges(mergeTrigger, segmentInfos));
SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
return upgradedMergeSpecification(delegate.findMerges(mergeTrigger, segmentInfos, writer));
}
@Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge)
int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer)
throws IOException {
if (force) {
List<SegmentCommitInfo> segments = Lists.newArrayList();
@ -210,18 +209,13 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
return spec;
}
}
return upgradedMergeSpecification(delegate.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge));
return upgradedMergeSpecification(delegate.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer));
}
@Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos)
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer)
throws IOException {
return upgradedMergeSpecification(delegate.findForcedDeletesMerges(segmentInfos));
}
@Override
public MergePolicy clone() {
return new ElasticsearchMergePolicy(delegate.clone());
return upgradedMergeSpecification(delegate.findForcedDeletesMerges(segmentInfos, writer));
}
@Override
@ -230,14 +224,8 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
}
@Override
public boolean useCompoundFile(SegmentInfos segments,
SegmentCommitInfo newSegment) throws IOException {
return delegate.useCompoundFile(segments, newSegment);
}
@Override
public void setIndexWriter(IndexWriter writer) {
delegate.setIndexWriter(writer);
public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, IndexWriter writer) throws IOException {
return delegate.useCompoundFile(segments, newSegment, writer);
}
/**

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.merge.policy;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Preconditions;
import org.elasticsearch.common.inject.Inject;
@ -157,13 +156,6 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
super.close();
provider.policies.remove(this);
}
@Override
public MergePolicy clone() {
// Lucene IW makes a clone internally but since we hold on to this instance
// the clone will just be the identity.
return this;
}
}
}

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.merge.policy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.TieredMergePolicy;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.inject.Inject;
@ -205,12 +204,5 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
super.close();
provider.policies.remove(this);
}
@Override
public MergePolicy clone() {
// Lucene IW makes a clone internally but since we hold on to this instance
// the clone will just be the identity.
return this;
}
}
}

View File

@ -173,11 +173,6 @@ public class ScriptFilterParser implements FilterParser {
this.searchScript = searchScript;
}
@Override
public boolean isCacheable() {
return true;
}
@Override
protected boolean matchDoc(int doc) {
searchScript.setNextDocId(doc);

View File

@ -315,11 +315,6 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
this.inclusiveUpperPoint = inclusiveUpperPoint;
this.values = values;
}
@Override
public boolean isCacheable() {
return true;
}
@Override
protected boolean matchDoc(int doc) {
@ -346,11 +341,6 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
this.inclusiveUpperPoint = inclusiveUpperPoint;
this.values = values;
}
@Override
public boolean isCacheable() {
return true;
}
@Override
protected boolean matchDoc(int doc) {

View File

@ -172,11 +172,6 @@ public class GeoDistanceFilter extends Filter {
this.distance = distance;
}
@Override
public boolean isCacheable() {
return true;
}
@Override
protected boolean matchDoc(int doc) {

View File

@ -188,11 +188,6 @@ public class GeoDistanceRangeFilter extends Filter {
this.inclusiveUpperPoint = inclusiveUpperPoint;
}
@Override
public boolean isCacheable() {
return true;
}
@Override
protected boolean matchDoc(int doc) {
final int length = values.setDocument(doc);

Some files were not shown because too many files have changed in this diff Show More