mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-08 03:49:38 +00:00
Upgrade to Lucene 4.9 (closes #6623)
This commit is contained in:
parent
b43b56a6a8
commit
b55ad98d73
@ -18,8 +18,6 @@ java.util.Collections#sort(java.util.List,java.util.Comparator)
|
||||
|
||||
java.io.StringReader#<init>(java.lang.String) @ Use FastStringReader instead
|
||||
|
||||
org.apache.lucene.util.RamUsageEstimator#sizeOf(java.lang.Object) @ This can be a perfromance trap
|
||||
|
||||
@defaultMessage Reference management is tricky, leave it to SearcherManager
|
||||
org.apache.lucene.index.IndexReader#decRef()
|
||||
org.apache.lucene.index.IndexReader#incRef()
|
||||
@ -55,9 +53,3 @@ java.lang.Math#abs(long)
|
||||
|
||||
@defaultMessage Use Long.compare instead we are on Java7
|
||||
com.google.common.primitives.Longs#compare(long,long)
|
||||
|
||||
@defaultMessage we have an optimized XStringField to reduce analysis creation overhead
|
||||
org.apache.lucene.document.Field#<init>(java.lang.String,java.lang.String,org.apache.lucene.document.FieldType)
|
||||
|
||||
@defaultMessage Use XNativeFSLockFactory instead of the buggy NativeFSLockFactory see LUCENE-5738 - remove once Lucene 4.9 is released
|
||||
org.apache.lucene.store.NativeFSLockFactory
|
||||
|
2
pom.xml
2
pom.xml
@ -31,7 +31,7 @@
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<lucene.version>4.8.1</lucene.version>
|
||||
<lucene.version>4.9.0</lucene.version>
|
||||
<tests.jvms>auto</tests.jvms>
|
||||
<tests.shuffle>true</tests.shuffle>
|
||||
<tests.output>onerror</tests.output>
|
||||
|
@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A string/text field that optimizes the case for non analyzed fields to reuse a thread local token
|
||||
* stream (instead of creating it each time). This reduces analysis chain overhead and object creation
|
||||
* (which is significant, yay Attributes).
|
||||
* <p/>
|
||||
* Not to be confused with Lucene StringField, this handles analyzed text as well, and relies on providing
|
||||
* the FieldType. Couldn't come up with a good name for this that is different from Text/String...
|
||||
*/
|
||||
public class XStringField extends Field {
|
||||
|
||||
private static final CloseableThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new CloseableThreadLocal<StringTokenStream>() {
|
||||
@Override
|
||||
protected StringTokenStream initialValue() {
|
||||
return new StringTokenStream();
|
||||
}
|
||||
};
|
||||
|
||||
public XStringField(String name, String value, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
if (!fieldType().indexed()) {
|
||||
return null;
|
||||
}
|
||||
// Only use the cached TokenStream if the value is indexed and not-tokenized
|
||||
if (fieldType().tokenized()) {
|
||||
return super.tokenStream(analyzer);
|
||||
}
|
||||
StringTokenStream nonAnalyzedTokenStream = NOT_ANALYZED_TOKENSTREAM.get();
|
||||
nonAnalyzedTokenStream.setValue((String) fieldsData);
|
||||
return nonAnalyzedTokenStream;
|
||||
}
|
||||
}
|
306
src/main/java/org/apache/lucene/index/XOrdinalMap.java
Normal file
306
src/main/java/org/apache/lucene/index/XOrdinalMap.java
Normal file
@ -0,0 +1,306 @@
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
|
||||
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** maps per-segment ordinals to/from global ordinal space */
|
||||
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
|
||||
// TODO: use more efficient packed ints structures?
|
||||
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
|
||||
public class XOrdinalMap implements Accountable {
|
||||
|
||||
static {
|
||||
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5780, LUCENE-5782)";
|
||||
}
|
||||
|
||||
private static class SegmentMap implements Accountable {
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
|
||||
|
||||
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
|
||||
private static int[] map(final long[] weights) {
|
||||
final int[] newToOld = new int[weights.length];
|
||||
for (int i = 0; i < weights.length; ++i) {
|
||||
newToOld[i] = i;
|
||||
}
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
final int tmp = newToOld[i];
|
||||
newToOld[i] = newToOld[j];
|
||||
newToOld[j] = tmp;
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
// j first since we actually want higher weights first
|
||||
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
|
||||
}
|
||||
}.sort(0, weights.length);
|
||||
return newToOld;
|
||||
}
|
||||
|
||||
/** Inverse the map. */
|
||||
private static int[] inverse(int[] map) {
|
||||
final int[] inverse = new int[map.length];
|
||||
for (int i = 0; i < map.length; ++i) {
|
||||
inverse[map[i]] = i;
|
||||
}
|
||||
return inverse;
|
||||
}
|
||||
|
||||
private final int[] newToOld, oldToNew;
|
||||
|
||||
SegmentMap(long[] weights) {
|
||||
newToOld = map(weights);
|
||||
oldToNew = inverse(newToOld);
|
||||
assert Arrays.equals(newToOld, inverse(oldToNew));
|
||||
}
|
||||
|
||||
int newToOld(int segment) {
|
||||
return newToOld[segment];
|
||||
}
|
||||
|
||||
int oldToNew(int segment) {
|
||||
return oldToNew[segment];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an ordinal map that uses the number of unique values of each
|
||||
* {@link SortedDocValues} instance as a weight.
|
||||
* @see #build(Object, TermsEnum[], long[], float)
|
||||
*/
|
||||
public static XOrdinalMap build(Object owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
|
||||
final TermsEnum[] subs = new TermsEnum[values.length];
|
||||
final long[] weights = new long[values.length];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
subs[i] = values[i].termsEnum();
|
||||
weights[i] = values[i].getValueCount();
|
||||
}
|
||||
return build(owner, subs, weights, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an ordinal map that uses the number of unique values of each
|
||||
* {@link SortedSetDocValues} instance as a weight.
|
||||
* @see #build(Object, TermsEnum[], long[], float)
|
||||
*/
|
||||
public static XOrdinalMap build(Object owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
|
||||
final TermsEnum[] subs = new TermsEnum[values.length];
|
||||
final long[] weights = new long[values.length];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
subs[i] = values[i].termsEnum();
|
||||
weights[i] = values[i].getValueCount();
|
||||
}
|
||||
return build(owner, subs, weights, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an ordinal map that allows mapping ords to/from a merged
|
||||
* space from <code>subs</code>.
|
||||
* @param owner a cache key
|
||||
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
|
||||
* not be dense (e.g. can be FilteredTermsEnums}.
|
||||
* @param weights a weight for each sub. This is ideally correlated with
|
||||
* the number of unique terms that each sub introduces compared
|
||||
* to the other subs
|
||||
* @throws IOException if an I/O error occurred.
|
||||
*/
|
||||
public static XOrdinalMap build(Object owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
|
||||
if (subs.length != weights.length) {
|
||||
throw new IllegalArgumentException("subs and weights must have the same length");
|
||||
}
|
||||
|
||||
// enums are not sorted, so let's sort to save memory
|
||||
final SegmentMap segmentMap = new SegmentMap(weights);
|
||||
return new XOrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(XOrdinalMap.class);
|
||||
|
||||
// cache key of whoever asked for this awful thing
|
||||
final Object owner;
|
||||
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
|
||||
final MonotonicAppendingLongBuffer globalOrdDeltas;
|
||||
// globalOrd -> first segment container
|
||||
final AppendingPackedLongBuffer firstSegments;
|
||||
// for every segment, segmentOrd -> globalOrd
|
||||
final LongValues segmentToGlobalOrds[];
|
||||
// the map from/to segment ids
|
||||
final SegmentMap segmentMap;
|
||||
// ram usage
|
||||
final long ramBytesUsed;
|
||||
|
||||
XOrdinalMap(Object owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
|
||||
// create the ordinal mappings by pulling a termsenum over each sub's
|
||||
// unique terms, and walking a multitermsenum over those
|
||||
this.owner = owner;
|
||||
this.segmentMap = segmentMap;
|
||||
// even though we accept an overhead ratio, we keep these ones with COMPACT
|
||||
// since they are only used to resolve values given a global ord, which is
|
||||
// slow anyway
|
||||
globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
|
||||
firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
|
||||
final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
|
||||
for (int i = 0; i < ordDeltas.length; i++) {
|
||||
ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
|
||||
}
|
||||
long[] ordDeltaBits = new long[subs.length];
|
||||
long segmentOrds[] = new long[subs.length];
|
||||
ReaderSlice slices[] = new ReaderSlice[subs.length];
|
||||
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
|
||||
for (int i = 0; i < slices.length; i++) {
|
||||
slices[i] = new ReaderSlice(0, 0, i);
|
||||
indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
|
||||
}
|
||||
MultiTermsEnum mte = new MultiTermsEnum(slices);
|
||||
mte.reset(indexes);
|
||||
long globalOrd = 0;
|
||||
while (mte.next() != null) {
|
||||
TermsEnumWithSlice matches[] = mte.getMatchArray();
|
||||
int firstSegmentIndex = Integer.MAX_VALUE;
|
||||
long globalOrdDelta = Long.MAX_VALUE;
|
||||
for (int i = 0; i < mte.getMatchCount(); i++) {
|
||||
int segmentIndex = matches[i].index;
|
||||
long segmentOrd = matches[i].terms.ord();
|
||||
long delta = globalOrd - segmentOrd;
|
||||
// We compute the least segment where the term occurs. In case the
|
||||
// first segment contains most (or better all) values, this will
|
||||
// help save significant memory
|
||||
if (segmentIndex < firstSegmentIndex) {
|
||||
firstSegmentIndex = segmentIndex;
|
||||
globalOrdDelta = delta;
|
||||
}
|
||||
// for each per-segment ord, map it back to the global term.
|
||||
while (segmentOrds[segmentIndex] <= segmentOrd) {
|
||||
ordDeltaBits[segmentIndex] |= delta;
|
||||
ordDeltas[segmentIndex].add(delta);
|
||||
segmentOrds[segmentIndex]++;
|
||||
}
|
||||
}
|
||||
// for each unique term, just mark the first segment index/delta where it occurs
|
||||
assert firstSegmentIndex < segmentOrds.length;
|
||||
firstSegments.add(firstSegmentIndex);
|
||||
globalOrdDeltas.add(globalOrdDelta);
|
||||
globalOrd++;
|
||||
}
|
||||
firstSegments.freeze();
|
||||
globalOrdDeltas.freeze();
|
||||
for (int i = 0; i < ordDeltas.length; ++i) {
|
||||
ordDeltas[i].freeze();
|
||||
}
|
||||
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
|
||||
segmentToGlobalOrds = new LongValues[subs.length];
|
||||
long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed()
|
||||
+ firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
|
||||
+ segmentMap.ramBytesUsed();
|
||||
for (int i = 0; i < ordDeltas.length; ++i) {
|
||||
final MonotonicAppendingLongBuffer deltas = ordDeltas[i];
|
||||
if (ordDeltaBits[i] == 0L) {
|
||||
// segment ords perfectly match global ordinals
|
||||
// likely in case of low cardinalities and large segments
|
||||
segmentToGlobalOrds[i] = LongValues.IDENTITY;
|
||||
} else {
|
||||
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
|
||||
final long monotonicBits = deltas.ramBytesUsed() * 8;
|
||||
final long packedBits = bitsRequired * deltas.size();
|
||||
if (deltas.size() <= Integer.MAX_VALUE
|
||||
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
|
||||
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
|
||||
final int size = (int) deltas.size();
|
||||
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
|
||||
final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator();
|
||||
for (int ord = 0; ord < size; ++ord) {
|
||||
newDeltas.set(ord, it.next());
|
||||
}
|
||||
assert !it.hasNext();
|
||||
segmentToGlobalOrds[i] = new LongValues() {
|
||||
@Override
|
||||
public long get(long ord) {
|
||||
return ord + newDeltas.get((int) ord);
|
||||
}
|
||||
};
|
||||
ramBytesUsed += newDeltas.ramBytesUsed();
|
||||
} else {
|
||||
segmentToGlobalOrds[i] = new LongValues() {
|
||||
@Override
|
||||
public long get(long ord) {
|
||||
return ord + deltas.get(ord);
|
||||
}
|
||||
};
|
||||
ramBytesUsed += deltas.ramBytesUsed();
|
||||
}
|
||||
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
|
||||
}
|
||||
}
|
||||
this.ramBytesUsed = ramBytesUsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a segment number, return a {@link LongValues} instance that maps
|
||||
* segment ordinals to global ordinals.
|
||||
*/
|
||||
public LongValues getGlobalOrds(int segmentIndex) {
|
||||
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Given global ordinal, returns the ordinal of the first segment which contains
|
||||
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
|
||||
*/
|
||||
public long getFirstSegmentOrd(long globalOrd) {
|
||||
return globalOrd - globalOrdDeltas.get(globalOrd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a global ordinal, returns the index of the first
|
||||
* segment that contains this term.
|
||||
*/
|
||||
public int getFirstSegmentNumber(long globalOrd) {
|
||||
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of unique terms in global ord space.
|
||||
*/
|
||||
public long getValueCount() {
|
||||
return globalOrdDeltas.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return ramBytesUsed;
|
||||
}
|
||||
}
|
@ -250,8 +250,8 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
}
|
||||
|
||||
/** Returns byte size of the underlying FST. */
|
||||
public long sizeInBytes() {
|
||||
return fst == null ? 0 : fst.sizeInBytes();
|
||||
public long ramBytesUsed() {
|
||||
return fst == null ? 0 : fst.ramBytesUsed();
|
||||
}
|
||||
|
||||
private static void copyDestTransitions(State from, State to, List<Transition> transitions) {
|
||||
@ -910,7 +910,7 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
// TODO: we could walk & add simultaneously, so we
|
||||
// don't have to alloc [possibly biggish]
|
||||
// intermediate HashSet in RAM:
|
||||
return XSpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
|
||||
return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
|
||||
}
|
||||
|
||||
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
|
||||
|
@ -219,7 +219,7 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
|
||||
}
|
||||
|
||||
Automaton toLevenshteinAutomata(Automaton automaton) {
|
||||
final Set<IntsRef> ref = XSpecialOperations.getFiniteStrings(automaton, -1);
|
||||
final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
|
||||
Automaton subs[] = new Automaton[ref.size()];
|
||||
int upto = 0;
|
||||
for (IntsRef path : ref) {
|
||||
|
@ -1,200 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.suggest.analyzing;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.State;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.elasticsearch.Version;
|
||||
|
||||
class XSpecialOperations {
|
||||
|
||||
// TODO Lucene 4.9: remove this once we upgrade; see
|
||||
// LUCENE-5628
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Remove this code once we upgrade to Lucene 4.9 where LUCENE-5628 is fixed";
|
||||
}
|
||||
|
||||
private static class PathNode {
|
||||
|
||||
/** Which state the path node ends on, whose
|
||||
* transitions we are enumerating. */
|
||||
public State state;
|
||||
|
||||
/** Which state the current transition leads to. */
|
||||
public State to;
|
||||
|
||||
/** Which transition we are on. */
|
||||
public int transition;
|
||||
|
||||
/** Which label we are on, in the min-max range of the
|
||||
* current Transition */
|
||||
public int label;
|
||||
|
||||
public void resetState(State state) {
|
||||
assert state.numTransitions() != 0;
|
||||
this.state = state;
|
||||
transition = 0;
|
||||
Transition t = state.transitionsArray[transition];
|
||||
label = t.getMin();
|
||||
to = t.getDest();
|
||||
}
|
||||
|
||||
/** Returns next label of current transition, or
|
||||
* advances to next transition and returns its first
|
||||
* label, if current one is exhausted. If there are
|
||||
* no more transitions, returns -1. */
|
||||
public int nextLabel() {
|
||||
if (label > state.transitionsArray[transition].getMax()) {
|
||||
// We've exhaused the current transition's labels;
|
||||
// move to next transitions:
|
||||
transition++;
|
||||
if (transition >= state.numTransitions()) {
|
||||
// We're done iterating transitions leaving this state
|
||||
return -1;
|
||||
}
|
||||
Transition t = state.transitionsArray[transition];
|
||||
label = t.getMin();
|
||||
to = t.getDest();
|
||||
}
|
||||
return label++;
|
||||
}
|
||||
}
|
||||
|
||||
private static PathNode getNode(PathNode[] nodes, int index) {
|
||||
assert index < nodes.length;
|
||||
if (nodes[index] == null) {
|
||||
nodes[index] = new PathNode();
|
||||
}
|
||||
return nodes[index];
|
||||
}
|
||||
|
||||
// TODO: this is a dangerous method ... Automaton could be
|
||||
// huge ... and it's better in general for caller to
|
||||
// enumerate & process in a single walk:
|
||||
|
||||
/** Returns the set of accepted strings, up to at most
|
||||
* <code>limit</code> strings. If more than <code>limit</code>
|
||||
* strings are accepted, the first limit strings found are returned. If <code>limit</code> == -1, then
|
||||
* the limit is infinite. If the {@link Automaton} has
|
||||
* cycles then this method might throw {@code
|
||||
* IllegalArgumentException} but that is not guaranteed
|
||||
* when the limit is set. */
|
||||
public static Set<IntsRef> getFiniteStrings(Automaton a, int limit) {
|
||||
Set<IntsRef> results = new HashSet<>();
|
||||
|
||||
if (limit == -1 || limit > 0) {
|
||||
// OK
|
||||
} else {
|
||||
throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit);
|
||||
}
|
||||
|
||||
if (a.getSingleton() != null) {
|
||||
// Easy case: automaton accepts only 1 string
|
||||
results.add(Util.toUTF32(a.getSingleton(), new IntsRef()));
|
||||
} else {
|
||||
|
||||
if (a.getInitialState().isAccept()) {
|
||||
// Special case the empty string, as usual:
|
||||
results.add(new IntsRef());
|
||||
}
|
||||
|
||||
if (a.getInitialState().numTransitions() > 0 && (limit == -1 || results.size() < limit)) {
|
||||
|
||||
// TODO: we could use state numbers here and just
|
||||
// alloc array, but asking for states array can be
|
||||
// costly (it's lazily computed):
|
||||
|
||||
// Tracks which states are in the current path, for
|
||||
// cycle detection:
|
||||
Set<State> pathStates = Collections.newSetFromMap(new IdentityHashMap<State,Boolean>());
|
||||
|
||||
// Stack to hold our current state in the
|
||||
// recursion/iteration:
|
||||
PathNode[] nodes = new PathNode[4];
|
||||
|
||||
pathStates.add(a.getInitialState());
|
||||
PathNode root = getNode(nodes, 0);
|
||||
root.resetState(a.getInitialState());
|
||||
|
||||
IntsRef string = new IntsRef(1);
|
||||
string.length = 1;
|
||||
|
||||
while (string.length > 0) {
|
||||
|
||||
PathNode node = nodes[string.length-1];
|
||||
|
||||
// Get next label leaving the current node:
|
||||
int label = node.nextLabel();
|
||||
|
||||
if (label != -1) {
|
||||
string.ints[string.length-1] = label;
|
||||
|
||||
if (node.to.isAccept()) {
|
||||
// This transition leads to an accept state,
|
||||
// so we save the current string:
|
||||
results.add(IntsRef.deepCopyOf(string));
|
||||
if (results.size() == limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (node.to.numTransitions() != 0) {
|
||||
// Now recurse: the destination of this transition has
|
||||
// outgoing transitions:
|
||||
if (pathStates.contains(node.to)) {
|
||||
throw new IllegalArgumentException("automaton has cycles");
|
||||
}
|
||||
pathStates.add(node.to);
|
||||
|
||||
// Push node onto stack:
|
||||
if (nodes.length == string.length) {
|
||||
PathNode[] newNodes = new PathNode[ArrayUtil.oversize(nodes.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(nodes, 0, newNodes, 0, nodes.length);
|
||||
nodes = newNodes;
|
||||
}
|
||||
getNode(nodes, string.length).resetState(node.to);
|
||||
string.length++;
|
||||
string.grow(string.length);
|
||||
}
|
||||
} else {
|
||||
// No more transitions leaving this state,
|
||||
// pop/return back to previous state:
|
||||
assert pathStates.contains(node.state);
|
||||
pathStates.remove(node.state);
|
||||
string.length--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
@ -89,25 +89,11 @@ public class BufferedChecksumIndexOutput extends BufferedIndexOutput {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long pos) throws IOException {
|
||||
// seek might be called on files, which means that the checksum is not file checksum
|
||||
// but a checksum of the bytes written to this stream, which is the same for each
|
||||
// type of file in lucene
|
||||
super.seek(pos);
|
||||
delegate.seek(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() throws IOException {
|
||||
return delegate.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setLength(long length) throws IOException {
|
||||
delegate.setLength(length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return delegate.toString();
|
||||
|
155
src/main/java/org/apache/lucene/store/BufferedIndexOutput.java
Normal file
155
src/main/java/org/apache/lucene/store/BufferedIndexOutput.java
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.store;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
/** Base implementation class for buffered {@link IndexOutput}. */
|
||||
public abstract class BufferedIndexOutput extends IndexOutput {
|
||||
/** The default buffer size in bytes ({@value #DEFAULT_BUFFER_SIZE}). */
|
||||
public static final int DEFAULT_BUFFER_SIZE = 16384;
|
||||
|
||||
private final int bufferSize;
|
||||
private final byte[] buffer;
|
||||
private long bufferStart = 0; // position in file of buffer
|
||||
private int bufferPosition = 0; // position in buffer
|
||||
private final CRC32 crc = new CRC32();
|
||||
|
||||
/**
|
||||
* Creates a new {@link BufferedIndexOutput} with the default buffer size
|
||||
* ({@value #DEFAULT_BUFFER_SIZE} bytes see {@link #DEFAULT_BUFFER_SIZE})
|
||||
*/
|
||||
public BufferedIndexOutput() {
|
||||
this(DEFAULT_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link BufferedIndexOutput} with the given buffer size.
|
||||
* @param bufferSize the buffer size in bytes used to buffer writes internally.
|
||||
* @throws IllegalArgumentException if the given buffer size is less or equal to <tt>0</tt>
|
||||
*/
|
||||
public BufferedIndexOutput(int bufferSize) {
|
||||
if (bufferSize <= 0) {
|
||||
throw new IllegalArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")");
|
||||
}
|
||||
this.bufferSize = bufferSize;
|
||||
buffer = new byte[bufferSize];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeByte(byte b) throws IOException {
|
||||
if (bufferPosition >= bufferSize)
|
||||
flush();
|
||||
buffer[bufferPosition++] = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
||||
int bytesLeft = bufferSize - bufferPosition;
|
||||
// is there enough space in the buffer?
|
||||
if (bytesLeft >= length) {
|
||||
// we add the data to the end of the buffer
|
||||
System.arraycopy(b, offset, buffer, bufferPosition, length);
|
||||
bufferPosition += length;
|
||||
// if the buffer is full, flush it
|
||||
if (bufferSize - bufferPosition == 0)
|
||||
flush();
|
||||
} else {
|
||||
// is data larger then buffer?
|
||||
if (length > bufferSize) {
|
||||
// we flush the buffer
|
||||
if (bufferPosition > 0)
|
||||
flush();
|
||||
// and write data at once
|
||||
crc.update(b, offset, length);
|
||||
flushBuffer(b, offset, length);
|
||||
bufferStart += length;
|
||||
} else {
|
||||
// we fill/flush the buffer (until the input is written)
|
||||
int pos = 0; // position in the input data
|
||||
int pieceLength;
|
||||
while (pos < length) {
|
||||
pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft;
|
||||
System.arraycopy(b, pos + offset, buffer, bufferPosition, pieceLength);
|
||||
pos += pieceLength;
|
||||
bufferPosition += pieceLength;
|
||||
// if the buffer is full, flush it
|
||||
bytesLeft = bufferSize - bufferPosition;
|
||||
if (bytesLeft == 0) {
|
||||
flush();
|
||||
bytesLeft = bufferSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
crc.update(buffer, 0, bufferPosition);
|
||||
flushBuffer(buffer, bufferPosition);
|
||||
bufferStart += bufferPosition;
|
||||
bufferPosition = 0;
|
||||
}
|
||||
|
||||
/** Expert: implements buffer write. Writes bytes at the current position in
|
||||
* the output.
|
||||
* @param b the bytes to write
|
||||
* @param len the number of bytes to write
|
||||
*/
|
||||
private void flushBuffer(byte[] b, int len) throws IOException {
|
||||
flushBuffer(b, 0, len);
|
||||
}
|
||||
|
||||
/** Expert: implements buffer write. Writes bytes at the current position in
|
||||
* the output.
|
||||
* @param b the bytes to write
|
||||
* @param offset the offset in the byte array
|
||||
* @param len the number of bytes to write
|
||||
*/
|
||||
protected abstract void flushBuffer(byte[] b, int offset, int len) throws IOException;
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract long length() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns size of the used output buffer in bytes.
|
||||
* */
|
||||
public final int getBufferSize() {
|
||||
return bufferSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() throws IOException {
|
||||
flush();
|
||||
return crc.getValue();
|
||||
}
|
||||
}
|
@ -117,12 +117,6 @@ public final class RateLimitedFSDirectory extends FilterDirectory{
|
||||
return delegate.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long pos) throws IOException {
|
||||
flush();
|
||||
delegate.seek(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
try {
|
||||
@ -132,11 +126,6 @@ public final class RateLimitedFSDirectory extends FilterDirectory{
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setLength(long length) throws IOException {
|
||||
delegate.setLength(length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
|
@ -1,246 +0,0 @@
|
||||
package org.apache.lucene.store;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.nio.channels.OverlappingFileLockException;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.Version;
|
||||
|
||||
/**
|
||||
* <p>Implements {@link LockFactory} using native OS file
|
||||
* locks. Note that because this LockFactory relies on
|
||||
* java.nio.* APIs for locking, any problems with those APIs
|
||||
* will cause locking to fail. Specifically, on certain NFS
|
||||
* environments the java.nio.* locks will fail (the lock can
|
||||
* incorrectly be double acquired) whereas {@link
|
||||
* SimpleFSLockFactory} worked perfectly in those same
|
||||
* environments. For NFS based access to an index, it's
|
||||
* recommended that you try {@link SimpleFSLockFactory}
|
||||
* first and work around the one limitation that a lock file
|
||||
* could be left when the JVM exits abnormally.</p>
|
||||
*
|
||||
* <p>The primary benefit of {@link XNativeFSLockFactory} is
|
||||
* that locks (not the lock file itsself) will be properly
|
||||
* removed (by the OS) if the JVM has an abnormal exit.</p>
|
||||
*
|
||||
* <p>Note that, unlike {@link SimpleFSLockFactory}, the existence of
|
||||
* leftover lock files in the filesystem is fine because the OS
|
||||
* will free the locks held against these files even though the
|
||||
* files still remain. Lucene will never actively remove the lock
|
||||
* files, so although you see them, the index may not be locked.</p>
|
||||
*
|
||||
* <p>Special care needs to be taken if you change the locking
|
||||
* implementation: First be certain that no writer is in fact
|
||||
* writing to the index otherwise you can easily corrupt
|
||||
* your index. Be sure to do the LockFactory change on all Lucene
|
||||
* instances and clean up all leftover lock files before starting
|
||||
* the new configuration for the first time. Different implementations
|
||||
* can not work together!</p>
|
||||
*
|
||||
* <p>If you suspect that this or any other LockFactory is
|
||||
* not working properly in your environment, you can easily
|
||||
* test it by using {@link VerifyingLockFactory}, {@link
|
||||
* LockVerifyServer} and {@link LockStressTest}.</p>
|
||||
*
|
||||
* @see LockFactory
|
||||
*/
|
||||
|
||||
public class XNativeFSLockFactory extends FSLockFactory {
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48 : "Remove this class in Lucene 4.9";
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a XNativeFSLockFactory instance, with null (unset)
|
||||
* lock directory. When you pass this factory to a {@link FSDirectory}
|
||||
* subclass, the lock directory is automatically set to the
|
||||
* directory itself. Be sure to create one instance for each directory
|
||||
* your create!
|
||||
*/
|
||||
public XNativeFSLockFactory() {
|
||||
this((File) null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a XNativeFSLockFactory instance, storing lock
|
||||
* files into the specified lockDirName:
|
||||
*
|
||||
* @param lockDirName where lock files are created.
|
||||
*/
|
||||
public XNativeFSLockFactory(String lockDirName) {
|
||||
this(new File(lockDirName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a XNativeFSLockFactory instance, storing lock
|
||||
* files into the specified lockDir:
|
||||
*
|
||||
* @param lockDir where lock files are created.
|
||||
*/
|
||||
public XNativeFSLockFactory(File lockDir) {
|
||||
setLockDir(lockDir);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Lock makeLock(String lockName) {
|
||||
if (lockPrefix != null)
|
||||
lockName = lockPrefix + "-" + lockName;
|
||||
return new NativeFSLock(lockDir, lockName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clearLock(String lockName) throws IOException {
|
||||
makeLock(lockName).close();
|
||||
}
|
||||
}
|
||||
|
||||
class NativeFSLock extends Lock {
|
||||
|
||||
private FileChannel channel;
|
||||
private FileLock lock;
|
||||
private File path;
|
||||
private File lockDir;
|
||||
private static final Set<String> LOCK_HELD = Collections.synchronizedSet(new HashSet<String>());
|
||||
|
||||
|
||||
public NativeFSLock(File lockDir, String lockFileName) {
|
||||
this.lockDir = lockDir;
|
||||
path = new File(lockDir, lockFileName);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public synchronized boolean obtain() throws IOException {
|
||||
|
||||
if (lock != null) {
|
||||
// Our instance is already locked:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ensure that lockDir exists and is a directory.
|
||||
if (!lockDir.exists()) {
|
||||
if (!lockDir.mkdirs())
|
||||
throw new IOException("Cannot create directory: " +
|
||||
lockDir.getAbsolutePath());
|
||||
} else if (!lockDir.isDirectory()) {
|
||||
// TODO: NoSuchDirectoryException instead?
|
||||
throw new IOException("Found regular file where directory expected: " +
|
||||
lockDir.getAbsolutePath());
|
||||
}
|
||||
final String canonicalPath = path.getCanonicalPath();
|
||||
// Make sure nobody else in-process has this lock held
|
||||
// already, and, mark it held if not:
|
||||
// This is a pretty crazy workaround for some documented
|
||||
// but yet awkward JVM behavior:
|
||||
//
|
||||
// On some systems, closing a channel releases all locks held by the Java virtual machine on the underlying file
|
||||
// regardless of whether the locks were acquired via that channel or via another channel open on the same file.
|
||||
// It is strongly recommended that, within a program, a unique channel be used to acquire all locks on any given
|
||||
// file.
|
||||
//
|
||||
// This essentially means if we close "A" channel for a given file all locks might be released... the odd part
|
||||
// is that we can't re-obtain the lock in the same JVM but from a different process if that happens. Nevertheless
|
||||
// this is super trappy. See LUCENE-5738
|
||||
boolean obtained = false;
|
||||
if (LOCK_HELD.add(canonicalPath)) {
|
||||
try {
|
||||
channel = FileChannel.open(path.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
|
||||
try {
|
||||
lock = channel.tryLock();
|
||||
obtained = lock != null;
|
||||
} catch (IOException | OverlappingFileLockException e) {
|
||||
// At least on OS X, we will sometimes get an
|
||||
// intermittent "Permission Denied" IOException,
|
||||
// which seems to simply mean "you failed to get
|
||||
// the lock". But other IOExceptions could be
|
||||
// "permanent" (eg, locking is not supported via
|
||||
// the filesystem). So, we record the failure
|
||||
// reason here; the timeout obtain (usually the
|
||||
// one calling us) will use this as "root cause"
|
||||
// if it fails to get the lock.
|
||||
failureReason = e;
|
||||
}
|
||||
} finally {
|
||||
if (obtained == false) { // not successful - clear up and move out
|
||||
clearLockHeld(path);
|
||||
final FileChannel toClose = channel;
|
||||
channel = null;
|
||||
IOUtils.closeWhileHandlingException(toClose);
|
||||
}
|
||||
}
|
||||
}
|
||||
return obtained;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
try {
|
||||
if (lock != null) {
|
||||
try {
|
||||
lock.release();
|
||||
lock = null;
|
||||
} finally {
|
||||
clearLockHeld(path);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
IOUtils.close(channel);
|
||||
channel = null;
|
||||
}
|
||||
}
|
||||
|
||||
private static final void clearLockHeld(File path) throws IOException {
|
||||
boolean remove = LOCK_HELD.remove(path.getCanonicalPath());
|
||||
assert remove : "Lock was cleared but never marked as held";
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean isLocked() {
|
||||
// The test for is isLocked is not directly possible with native file locks:
|
||||
|
||||
// First a shortcut, if a lock reference in this instance is available
|
||||
if (lock != null) return true;
|
||||
|
||||
// Look if lock file is present; if not, there can definitely be no lock!
|
||||
if (!path.exists()) return false;
|
||||
|
||||
// Try to obtain and release (if was locked) the lock
|
||||
try {
|
||||
boolean obtained = obtain();
|
||||
if (obtained) close();
|
||||
return !obtained;
|
||||
} catch (IOException ioe) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NativeFSLock@" + path;
|
||||
}
|
||||
}
|
@ -42,152 +42,152 @@ public class Version implements Serializable {
|
||||
// the (internal) format of the id is there so we can easily do after/before checks on the id
|
||||
|
||||
public static final int V_0_18_0_ID = /*00*/180099;
|
||||
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_1_ID = /*00*/180199;
|
||||
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_2_ID = /*00*/180299;
|
||||
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_3_ID = /*00*/180399;
|
||||
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_4_ID = /*00*/180499;
|
||||
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_5_ID = /*00*/180599;
|
||||
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_6_ID = /*00*/180699;
|
||||
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_7_ID = /*00*/180799;
|
||||
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_18_8_ID = /*00*/180899;
|
||||
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
|
||||
public static final int V_0_19_0_RC1_ID = /*00*/190051;
|
||||
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
|
||||
public static final int V_0_19_0_RC2_ID = /*00*/190052;
|
||||
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
|
||||
public static final int V_0_19_0_RC3_ID = /*00*/190053;
|
||||
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
|
||||
public static final int V_0_19_0_ID = /*00*/190099;
|
||||
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_1_ID = /*00*/190199;
|
||||
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_2_ID = /*00*/190299;
|
||||
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_3_ID = /*00*/190399;
|
||||
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_4_ID = /*00*/190499;
|
||||
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_5_ID = /*00*/190599;
|
||||
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_6_ID = /*00*/190699;
|
||||
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_7_ID = /*00*/190799;
|
||||
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_8_ID = /*00*/190899;
|
||||
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_9_ID = /*00*/190999;
|
||||
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_10_ID = /*00*/191099;
|
||||
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_11_ID = /*00*/191199;
|
||||
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_12_ID = /*00*/191299;
|
||||
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_19_13_ID = /*00*/191399;
|
||||
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
|
||||
public static final int V_0_20_0_RC1_ID = /*00*/200051;
|
||||
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_0_ID = /*00*/200099;
|
||||
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_1_ID = /*00*/200199;
|
||||
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_2_ID = /*00*/200299;
|
||||
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_3_ID = /*00*/200399;
|
||||
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_4_ID = /*00*/200499;
|
||||
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_5_ID = /*00*/200599;
|
||||
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_6_ID = /*00*/200699;
|
||||
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final int V_0_20_7_ID = /*00*/200799;
|
||||
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
|
||||
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
|
||||
public static final int V_0_90_0_Beta1_ID = /*00*/900001;
|
||||
public static final Version V_0_90_0_Beta1 = new Version(V_0_90_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_41);
|
||||
public static final Version V_0_90_0_Beta1 = new Version(V_0_90_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_4_1);
|
||||
public static final int V_0_90_0_RC1_ID = /*00*/900051;
|
||||
public static final Version V_0_90_0_RC1 = new Version(V_0_90_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_41);
|
||||
public static final Version V_0_90_0_RC1 = new Version(V_0_90_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_4_1);
|
||||
public static final int V_0_90_0_RC2_ID = /*00*/900052;
|
||||
public static final Version V_0_90_0_RC2 = new Version(V_0_90_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_42);
|
||||
public static final Version V_0_90_0_RC2 = new Version(V_0_90_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_4_2);
|
||||
public static final int V_0_90_0_ID = /*00*/900099;
|
||||
public static final Version V_0_90_0 = new Version(V_0_90_0_ID, false, org.apache.lucene.util.Version.LUCENE_42);
|
||||
public static final Version V_0_90_0 = new Version(V_0_90_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_2);
|
||||
public static final int V_0_90_1_ID = /*00*/900199;
|
||||
public static final Version V_0_90_1 = new Version(V_0_90_1_ID, false, org.apache.lucene.util.Version.LUCENE_43);
|
||||
public static final Version V_0_90_1 = new Version(V_0_90_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_3);
|
||||
public static final int V_0_90_2_ID = /*00*/900299;
|
||||
public static final Version V_0_90_2 = new Version(V_0_90_2_ID, false, org.apache.lucene.util.Version.LUCENE_43);
|
||||
public static final Version V_0_90_2 = new Version(V_0_90_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_3);
|
||||
public static final int V_0_90_3_ID = /*00*/900399;
|
||||
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_44);
|
||||
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_4);
|
||||
public static final int V_0_90_4_ID = /*00*/900499;
|
||||
public static final Version V_0_90_4 = new Version(V_0_90_4_ID, false, org.apache.lucene.util.Version.LUCENE_44);
|
||||
public static final Version V_0_90_4 = new Version(V_0_90_4_ID, false, org.apache.lucene.util.Version.LUCENE_4_4);
|
||||
public static final int V_0_90_5_ID = /*00*/900599;
|
||||
public static final Version V_0_90_5 = new Version(V_0_90_5_ID, false, org.apache.lucene.util.Version.LUCENE_44);
|
||||
public static final Version V_0_90_5 = new Version(V_0_90_5_ID, false, org.apache.lucene.util.Version.LUCENE_4_4);
|
||||
public static final int V_0_90_6_ID = /*00*/900699;
|
||||
public static final Version V_0_90_6 = new Version(V_0_90_6_ID, false, org.apache.lucene.util.Version.LUCENE_45);
|
||||
public static final Version V_0_90_6 = new Version(V_0_90_6_ID, false, org.apache.lucene.util.Version.LUCENE_4_5);
|
||||
public static final int V_0_90_7_ID = /*00*/900799;
|
||||
public static final Version V_0_90_7 = new Version(V_0_90_7_ID, false, org.apache.lucene.util.Version.LUCENE_45);
|
||||
public static final Version V_0_90_7 = new Version(V_0_90_7_ID, false, org.apache.lucene.util.Version.LUCENE_4_5);
|
||||
public static final int V_0_90_8_ID = /*00*/900899;
|
||||
public static final Version V_0_90_8 = new Version(V_0_90_8_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_0_90_8 = new Version(V_0_90_8_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_0_90_9_ID = /*00*/900999;
|
||||
public static final Version V_0_90_9 = new Version(V_0_90_9_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_0_90_9 = new Version(V_0_90_9_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_0_90_10_ID = /*00*/901099;
|
||||
public static final Version V_0_90_10 = new Version(V_0_90_10_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_0_90_10 = new Version(V_0_90_10_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_0_90_11_ID = /*00*/901199;
|
||||
public static final Version V_0_90_11 = new Version(V_0_90_11_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_0_90_11 = new Version(V_0_90_11_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_0_90_12_ID = /*00*/901299;
|
||||
public static final Version V_0_90_12 = new Version(V_0_90_12_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_0_90_12 = new Version(V_0_90_12_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_0_90_13_ID = /*00*/901399;
|
||||
public static final Version V_0_90_13 = new Version(V_0_90_13_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_0_90_13 = new Version(V_0_90_13_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_0_90_14_ID = /*00*/901499;
|
||||
public static final Version V_0_90_14 = new Version(V_0_90_14_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_0_90_14 = new Version(V_0_90_14_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
|
||||
public static final int V_1_0_0_Beta1_ID = /*00*/1000001;
|
||||
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_45);
|
||||
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_4_5);
|
||||
public static final int V_1_0_0_Beta2_ID = /*00*/1000002;
|
||||
public static final Version V_1_0_0_Beta2 = new Version(V_1_0_0_Beta2_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_0_Beta2 = new Version(V_1_0_0_Beta2_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_0_0_RC1_ID = /*00*/1000051;
|
||||
public static final Version V_1_0_0_RC1 = new Version(V_1_0_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_0_RC1 = new Version(V_1_0_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_0_0_RC2_ID = /*00*/1000052;
|
||||
public static final Version V_1_0_0_RC2 = new Version(V_1_0_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_0_RC2 = new Version(V_1_0_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_0_0_ID = /*00*/1000099;
|
||||
public static final Version V_1_0_0 = new Version(V_1_0_0_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_0 = new Version(V_1_0_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_0_1_ID = /*00*/1000199;
|
||||
public static final Version V_1_0_1 = new Version(V_1_0_1_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_1 = new Version(V_1_0_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_0_2_ID = /*00*/1000299;
|
||||
public static final Version V_1_0_2 = new Version(V_1_0_2_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_2 = new Version(V_1_0_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_0_3_ID = /*00*/1000399;
|
||||
public static final Version V_1_0_3 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_3 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_0_4_ID = /*00*/1000499;
|
||||
public static final Version V_1_0_4 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_46);
|
||||
public static final Version V_1_0_4 = new Version(V_1_0_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_6);
|
||||
public static final int V_1_1_0_ID = /*00*/1010099;
|
||||
public static final Version V_1_1_0 = new Version(V_1_1_0_ID, false, org.apache.lucene.util.Version.LUCENE_47);
|
||||
public static final Version V_1_1_0 = new Version(V_1_1_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_7);
|
||||
public static final int V_1_1_1_ID = /*00*/1010199;
|
||||
public static final Version V_1_1_1 = new Version(V_1_1_1_ID, false, org.apache.lucene.util.Version.LUCENE_47);
|
||||
public static final Version V_1_1_1 = new Version(V_1_1_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_7);
|
||||
public static final int V_1_1_2_ID = /*00*/1010299;
|
||||
public static final Version V_1_1_2 = new Version(V_1_1_2_ID, false, org.apache.lucene.util.Version.LUCENE_47);
|
||||
public static final Version V_1_1_2 = new Version(V_1_1_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_7);
|
||||
public static final int V_1_2_0_ID = /*00*/1020099;
|
||||
public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_48);
|
||||
public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_8);
|
||||
public static final int V_1_2_1_ID = /*00*/1020199;
|
||||
public static final Version V_1_2_1 = new Version(V_1_2_1_ID, false, org.apache.lucene.util.Version.LUCENE_48);
|
||||
public static final Version V_1_2_1 = new Version(V_1_2_1_ID, false, org.apache.lucene.util.Version.LUCENE_4_8);
|
||||
public static final int V_1_2_2_ID = /*00*/1020299;
|
||||
public static final Version V_1_2_2 = new Version(V_1_2_2_ID, false, org.apache.lucene.util.Version.LUCENE_48);
|
||||
public static final Version V_1_2_2 = new Version(V_1_2_2_ID, false, org.apache.lucene.util.Version.LUCENE_4_8);
|
||||
public static final int V_1_3_0_ID = /*00*/1030099;
|
||||
public static final Version V_1_3_0 = new Version(V_1_3_0_ID, false, org.apache.lucene.util.Version.LUCENE_48);
|
||||
public static final Version V_1_3_0 = new Version(V_1_3_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_9);
|
||||
public static final int V_2_0_0_ID = /*00*/2000099;
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_48);
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_9);
|
||||
|
||||
public static final Version CURRENT = V_2_0_0;
|
||||
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.common.compress.lzf;
|
||||
|
||||
import com.ning.compress.lzf.ChunkDecoder;
|
||||
import com.ning.compress.lzf.LZFChunk;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.elasticsearch.common.compress.CompressedIndexInput;
|
||||
import org.elasticsearch.common.lucene.store.InputStreamIndexInput;
|
||||
@ -71,4 +72,9 @@ public class LZFCompressedIndexInput extends CompressedIndexInput<LZFCompressorC
|
||||
cloned.inputBuffer = new byte[LZFChunk.MAX_CHUNK_LEN];
|
||||
return cloned;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput slice(String description, long offset, long length) throws IOException {
|
||||
return BufferedIndexInput.wrap(description, this, offset, length);
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ import java.io.IOException;
|
||||
*/
|
||||
public class Lucene {
|
||||
|
||||
public static final Version VERSION = Version.LUCENE_48;
|
||||
public static final Version VERSION = Version.LUCENE_4_9;
|
||||
public static final Version ANALYZER_VERSION = VERSION;
|
||||
public static final Version QUERYPARSER_VERSION = VERSION;
|
||||
|
||||
@ -63,56 +63,28 @@ public class Lucene {
|
||||
if (version == null) {
|
||||
return defaultVersion;
|
||||
}
|
||||
if ("4.8".equals(version)) {
|
||||
return VERSION.LUCENE_48;
|
||||
switch(version) {
|
||||
case "4.9": return VERSION.LUCENE_4_9;
|
||||
case "4.8": return VERSION.LUCENE_4_8;
|
||||
case "4.7": return VERSION.LUCENE_4_7;
|
||||
case "4.6": return VERSION.LUCENE_4_6;
|
||||
case "4.5": return VERSION.LUCENE_4_5;
|
||||
case "4.4": return VERSION.LUCENE_4_4;
|
||||
case "4.3": return VERSION.LUCENE_4_3;
|
||||
case "4.2": return VERSION.LUCENE_4_2;
|
||||
case "4.1": return VERSION.LUCENE_4_1;
|
||||
case "4.0": return VERSION.LUCENE_4_0;
|
||||
case "3.6": return VERSION.LUCENE_3_6;
|
||||
case "3.5": return VERSION.LUCENE_3_5;
|
||||
case "3.4": return VERSION.LUCENE_3_4;
|
||||
case "3.3": return VERSION.LUCENE_3_3;
|
||||
case "3.2": return VERSION.LUCENE_3_2;
|
||||
case "3.1": return VERSION.LUCENE_3_1;
|
||||
case "3.0": return VERSION.LUCENE_3_0;
|
||||
default:
|
||||
logger.warn("no version match {}, default to {}", version, defaultVersion);
|
||||
return defaultVersion;
|
||||
}
|
||||
if ("4.7".equals(version)) {
|
||||
return VERSION.LUCENE_47;
|
||||
}
|
||||
if ("4.6".equals(version)) {
|
||||
return VERSION.LUCENE_46;
|
||||
}
|
||||
if ("4.5".equals(version)) {
|
||||
return VERSION.LUCENE_45;
|
||||
}
|
||||
if ("4.4".equals(version)) {
|
||||
return VERSION.LUCENE_44;
|
||||
}
|
||||
if ("4.3".equals(version)) {
|
||||
return Version.LUCENE_43;
|
||||
}
|
||||
if ("4.2".equals(version)) {
|
||||
return Version.LUCENE_42;
|
||||
}
|
||||
if ("4.1".equals(version)) {
|
||||
return Version.LUCENE_41;
|
||||
}
|
||||
if ("4.0".equals(version)) {
|
||||
return Version.LUCENE_40;
|
||||
}
|
||||
if ("3.6".equals(version)) {
|
||||
return Version.LUCENE_36;
|
||||
}
|
||||
if ("3.5".equals(version)) {
|
||||
return Version.LUCENE_35;
|
||||
}
|
||||
if ("3.4".equals(version)) {
|
||||
return Version.LUCENE_34;
|
||||
}
|
||||
if ("3.3".equals(version)) {
|
||||
return Version.LUCENE_33;
|
||||
}
|
||||
if ("3.2".equals(version)) {
|
||||
return Version.LUCENE_32;
|
||||
}
|
||||
if ("3.1".equals(version)) {
|
||||
return Version.LUCENE_31;
|
||||
}
|
||||
if ("3.0".equals(version)) {
|
||||
return Version.LUCENE_30;
|
||||
}
|
||||
logger.warn("no version match {}, default to {}", version, defaultVersion);
|
||||
return defaultVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -21,7 +21,6 @@ package org.elasticsearch.common.lucene;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.FilterAtomicReader;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
@ -45,17 +44,9 @@ public class SegmentReaderUtils {
|
||||
return internalSegmentReader(reader, false);
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.LUCENE_48.onOrAfter(Lucene.VERSION) : "Use AtomicReader.addCoreClosedListener instead of trying to unwrap the atomic reader: https://issues.apache.org/jira/browse/LUCENE-5701";
|
||||
}
|
||||
|
||||
public static boolean registerCoreListener(AtomicReader reader, SegmentReader.CoreClosedListener listener) {
|
||||
SegmentReader segReader = SegmentReaderUtils.segmentReaderOrNull(reader);
|
||||
if (segReader != null) {
|
||||
segReader.addCoreClosedListener(listener);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
reader.addCoreClosedListener(listener);
|
||||
return true;
|
||||
}
|
||||
|
||||
private static SegmentReader internalSegmentReader(AtomicReader reader, boolean fail) {
|
||||
|
@ -25,7 +25,6 @@ import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
@ -63,15 +62,14 @@ public class AllField extends Field {
|
||||
return allEntries;
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Re-use the incoming AllTokenStream once we upgrade to Lucene 4.9";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
try {
|
||||
allEntries.reset(); // reset the all entries, just in case it was read already
|
||||
if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
|
||||
// TODO: we should be able to reuse "previous" if its instanceof AllTokenStream?
|
||||
// but we need to be careful this optimization is safe (and tested)...
|
||||
|
||||
// AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
|
||||
// positions are indexed:
|
||||
return AllTokenStream.allTokenStream(name, allEntries, analyzer);
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@ -44,6 +45,11 @@ public class AllDocIdSet extends DocIdSet {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_INT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return new Iterator(maxDoc);
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@ -48,6 +49,15 @@ public class AndDocIdSet extends DocIdSet {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long ramBytesUsed = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
|
||||
for (DocIdSet set : sets) {
|
||||
ramBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF + set.ramBytesUsed();
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
Bits[] bits = new Bits[sets.length];
|
||||
|
@ -24,6 +24,7 @@ import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -32,19 +33,18 @@ import java.io.IOException;
|
||||
*/
|
||||
public class DocIdSets {
|
||||
|
||||
/**
|
||||
* Return the size of the doc id set, plus a reference to it.
|
||||
*/
|
||||
public static long sizeInBytes(DocIdSet docIdSet) {
|
||||
if (docIdSet instanceof FixedBitSet) {
|
||||
return ((FixedBitSet) docIdSet).getBits().length * 8 + 16;
|
||||
}
|
||||
// only for empty ones and unknowns...
|
||||
return 1;
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + docIdSet.ramBytesUsed();
|
||||
}
|
||||
|
||||
/**
|
||||
* Is it an empty {@link DocIdSet}?
|
||||
*/
|
||||
public static boolean isEmpty(@Nullable DocIdSet set) {
|
||||
return set == null || set == EMPTY_DOCIDSET;
|
||||
return set == null || set == DocIdSet.EMPTY;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,16 +63,16 @@ public class DocIdSets {
|
||||
* always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
|
||||
*/
|
||||
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
|
||||
if (set == null || set == EMPTY_DOCIDSET) {
|
||||
return EMPTY_DOCIDSET;
|
||||
if (set == null || set == DocIdSet.EMPTY) {
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
DocIdSetIterator it = set.iterator();
|
||||
if (it == null) {
|
||||
return EMPTY_DOCIDSET;
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
int doc = it.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return EMPTY_DOCIDSET;
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
if (set instanceof FixedBitSet) {
|
||||
return set;
|
||||
@ -85,26 +85,6 @@ public class DocIdSets {
|
||||
} while (doc != DocIdSetIterator.NO_MORE_DOCS);
|
||||
return fixedBitSet;
|
||||
}
|
||||
|
||||
/** An empty {@code DocIdSet} instance */
|
||||
protected static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() {
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return DocIdSetIterator.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// we explicitly provide no random access, as this filter is 100% sparse and iterator exits faster
|
||||
@Override
|
||||
public Bits bits() {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets a set to bits.
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@ -43,6 +44,11 @@ public class NotDocIdSet extends DocIdSet {
|
||||
return set.isCacheable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + set.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
Bits bits = set.bits();
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.common.lucene.docset;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@ -46,6 +47,15 @@ public class OrDocIdSet extends DocIdSet {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long ramBytesUsed = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
|
||||
for (DocIdSet set : sets) {
|
||||
ramBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF + set.ramBytesUsed();
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
Bits[] bits = new Bits[sets.length];
|
||||
|
@ -23,6 +23,7 @@ import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -90,6 +91,11 @@ public class ApplyAcceptedDocsFilter extends Filter {
|
||||
return innerSet.isCacheable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + innerSet.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
Bits bits = innerSet.bits();
|
||||
@ -202,5 +208,10 @@ public class ApplyAcceptedDocsFilter extends Filter {
|
||||
public boolean isCacheable() {
|
||||
return delegate.isCacheable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + delegate.ramBytesUsed();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
@ -49,14 +50,14 @@ public class MoreLikeThisQuery extends Query {
|
||||
private String[] moreLikeFields;
|
||||
private Analyzer analyzer;
|
||||
private float percentTermsToMatch = DEFAULT_PERCENT_TERMS_TO_MATCH;
|
||||
private int minTermFrequency = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
|
||||
private int maxQueryTerms = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
|
||||
private Set<?> stopWords = XMoreLikeThis.DEFAULT_STOP_WORDS;
|
||||
private int minDocFreq = XMoreLikeThis.DEFAULT_MIN_DOC_FREQ;
|
||||
private int maxDocFreq = XMoreLikeThis.DEFAULT_MAX_DOC_FREQ;
|
||||
private int minWordLen = XMoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
|
||||
private int maxWordLen = XMoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
|
||||
private boolean boostTerms = XMoreLikeThis.DEFAULT_BOOST;
|
||||
private int minTermFrequency = MoreLikeThis.DEFAULT_MIN_TERM_FREQ;
|
||||
private int maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
|
||||
private Set<?> stopWords = MoreLikeThis.DEFAULT_STOP_WORDS;
|
||||
private int minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ;
|
||||
private int maxDocFreq = MoreLikeThis.DEFAULT_MAX_DOC_FREQ;
|
||||
private int minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
|
||||
private int maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
|
||||
private boolean boostTerms = MoreLikeThis.DEFAULT_BOOST;
|
||||
private float boostTermsFactor = 1;
|
||||
|
||||
|
||||
@ -134,7 +135,7 @@ public class MoreLikeThisQuery extends Query {
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
|
||||
MoreLikeThis mlt = new MoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
|
||||
|
||||
mlt.setFieldNames(moreLikeFields);
|
||||
mlt.setAnalyzer(analyzer);
|
||||
|
@ -1,964 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Copyright 2004-2005 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
|
||||
/**
|
||||
* Generate "more like this" similarity queries.
|
||||
* Based on this mail:
|
||||
* <code><pre>
|
||||
* Lucene does let you access the document frequency of terms, with IndexReader.docFreq().
|
||||
* Term frequencies can be computed by re-tokenizing the text, which, for a single document,
|
||||
* is usually fast enough. But looking up the docFreq() of every term in the document is
|
||||
* probably too slow.
|
||||
* <p/>
|
||||
* You can use some heuristics to prune the set of terms, to avoid calling docFreq() too much,
|
||||
* or at all. Since you're trying to maximize a tf*idf score, you're probably most interested
|
||||
* in terms with a high tf. Choosing a tf threshold even as low as two or three will radically
|
||||
* reduce the number of terms under consideration. Another heuristic is that terms with a
|
||||
* high idf (i.e., a low df) tend to be longer. So you could threshold the terms by the
|
||||
* number of characters, not selecting anything less than, e.g., six or seven characters.
|
||||
* With these sorts of heuristics you can usually find small set of, e.g., ten or fewer terms
|
||||
* that do a pretty good job of characterizing a document.
|
||||
* <p/>
|
||||
* It all depends on what you're trying to do. If you're trying to eek out that last percent
|
||||
* of precision and recall regardless of computational difficulty so that you can win a TREC
|
||||
* competition, then the techniques I mention above are useless. But if you're trying to
|
||||
* provide a "more like this" button on a search results page that does a decent job and has
|
||||
* good performance, such techniques might be useful.
|
||||
* <p/>
|
||||
* An efficient, effective "more-like-this" query generator would be a great contribution, if
|
||||
* anyone's interested. I'd imagine that it would take a Reader or a String (the document's
|
||||
* text), analyzer Analyzer, and return a set of representative terms using heuristics like those
|
||||
* above. The frequency and length thresholds could be parameters, etc.
|
||||
* <p/>
|
||||
* Doug
|
||||
* </pre></code>
|
||||
* <p/>
|
||||
* <p/>
|
||||
* <p/>
|
||||
* <h3>Initial Usage</h3>
|
||||
* <p/>
|
||||
* This class has lots of options to try to make it efficient and flexible.
|
||||
* The simplest possible usage is as follows. The bold
|
||||
* fragment is specific to this class.
|
||||
* <p/>
|
||||
* <pre class="prettyprint">
|
||||
* <p/>
|
||||
* IndexReader ir = ...
|
||||
* IndexSearcher is = ...
|
||||
* <p/>
|
||||
* MoreLikeThis mlt = new MoreLikeThis(ir);
|
||||
* Reader target = ... // orig source of doc you want to find similarities to
|
||||
* Query query = mlt.like( target);
|
||||
* <p/>
|
||||
* Hits hits = is.search(query);
|
||||
* // now the usual iteration thru 'hits' - the only thing to watch for is to make sure
|
||||
* //you ignore the doc if it matches your 'target' document, as it should be similar to itself
|
||||
* <p/>
|
||||
* </pre>
|
||||
* <p/>
|
||||
* Thus you:
|
||||
* <ol>
|
||||
* <li> do your normal, Lucene setup for searching,
|
||||
* <li> create a MoreLikeThis,
|
||||
* <li> get the text of the doc you want to find similarities to
|
||||
* <li> then call one of the like() calls to generate a similarity query
|
||||
* <li> call the searcher to find the similar docs
|
||||
* </ol>
|
||||
* <p/>
|
||||
* <h3>More Advanced Usage</h3>
|
||||
* <p/>
|
||||
* You may want to use {@link #setFieldNames setFieldNames(...)} so you can examine
|
||||
* multiple fields (e.g. body and title) for similarity.
|
||||
* <p/>
|
||||
* <p/>
|
||||
* Depending on the size of your index and the size and makeup of your documents you
|
||||
* may want to call the other set methods to control how the similarity queries are
|
||||
* generated:
|
||||
* <ul>
|
||||
* <li> {@link #setMinTermFreq setMinTermFreq(...)}
|
||||
* <li> {@link #setMinDocFreq setMinDocFreq(...)}
|
||||
* <li> {@link #setMaxDocFreq setMaxDocFreq(...)}
|
||||
* <li> {@link #setMaxDocFreqPct setMaxDocFreqPct(...)}
|
||||
* <li> {@link #setMinWordLen setMinWordLen(...)}
|
||||
* <li> {@link #setMaxWordLen setMaxWordLen(...)}
|
||||
* <li> {@link #setMaxQueryTerms setMaxQueryTerms(...)}
|
||||
* <li> {@link #setMaxNumTokensParsed setMaxNumTokensParsed(...)}
|
||||
* <li> {@link #setStopWords setStopWord(...)}
|
||||
* </ul>
|
||||
* <p/>
|
||||
* <hr>
|
||||
* <pre>
|
||||
* Changes: Mark Harwood 29/02/04
|
||||
* Some bugfixing, some refactoring, some optimisation.
|
||||
* - bugfix: retrieveTerms(int docNum) was not working for indexes without a termvector -added missing code
|
||||
* - bugfix: No significant terms being created for fields with a termvector - because
|
||||
* was only counting one occurrence per term/field pair in calculations(ie not including frequency info from TermVector)
|
||||
* - refactor: moved common code into isNoiseWord()
|
||||
* - optimise: when no termvector support available - used maxNumTermsParsed to limit amount of tokenization
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
public final class XMoreLikeThis {
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Remove this class once we upgrade to Lucene 4.9";
|
||||
}
|
||||
|
||||
/**
|
||||
* Default maximum number of tokens to parse in each example doc field that is not stored with TermVector support.
|
||||
*
|
||||
* @see #getMaxNumTokensParsed
|
||||
*/
|
||||
public static final int DEFAULT_MAX_NUM_TOKENS_PARSED = 5000;
|
||||
|
||||
/**
|
||||
* Ignore terms with less than this frequency in the source doc.
|
||||
*
|
||||
* @see #getMinTermFreq
|
||||
* @see #setMinTermFreq
|
||||
*/
|
||||
public static final int DEFAULT_MIN_TERM_FREQ = 2;
|
||||
|
||||
/**
|
||||
* Ignore words which do not occur in at least this many docs.
|
||||
*
|
||||
* @see #getMinDocFreq
|
||||
* @see #setMinDocFreq
|
||||
*/
|
||||
public static final int DEFAULT_MIN_DOC_FREQ = 5;
|
||||
|
||||
/**
|
||||
* Ignore words which occur in more than this many docs.
|
||||
*
|
||||
* @see #getMaxDocFreq
|
||||
* @see #setMaxDocFreq
|
||||
* @see #setMaxDocFreqPct
|
||||
*/
|
||||
public static final int DEFAULT_MAX_DOC_FREQ = Integer.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Boost terms in query based on score.
|
||||
*
|
||||
* @see #isBoost
|
||||
* @see #setBoost
|
||||
*/
|
||||
public static final boolean DEFAULT_BOOST = false;
|
||||
|
||||
/**
|
||||
* Default field names. Null is used to specify that the field names should be looked
|
||||
* up at runtime from the provided reader.
|
||||
*/
|
||||
public static final String[] DEFAULT_FIELD_NAMES = new String[]{"contents"};
|
||||
|
||||
/**
|
||||
* Ignore words less than this length or if 0 then this has no effect.
|
||||
*
|
||||
* @see #getMinWordLen
|
||||
* @see #setMinWordLen
|
||||
*/
|
||||
public static final int DEFAULT_MIN_WORD_LENGTH = 0;
|
||||
|
||||
/**
|
||||
* Ignore words greater than this length or if 0 then this has no effect.
|
||||
*
|
||||
* @see #getMaxWordLen
|
||||
* @see #setMaxWordLen
|
||||
*/
|
||||
public static final int DEFAULT_MAX_WORD_LENGTH = 0;
|
||||
|
||||
/**
|
||||
* Default set of stopwords.
|
||||
* If null means to allow stop words.
|
||||
*
|
||||
* @see #setStopWords
|
||||
* @see #getStopWords
|
||||
*/
|
||||
public static final Set<?> DEFAULT_STOP_WORDS = null;
|
||||
|
||||
/**
|
||||
* Current set of stop words.
|
||||
*/
|
||||
private Set<?> stopWords = DEFAULT_STOP_WORDS;
|
||||
|
||||
/**
|
||||
* Return a Query with no more than this many terms.
|
||||
*
|
||||
* @see BooleanQuery#getMaxClauseCount
|
||||
* @see #getMaxQueryTerms
|
||||
* @see #setMaxQueryTerms
|
||||
*/
|
||||
public static final int DEFAULT_MAX_QUERY_TERMS = 25;
|
||||
|
||||
/**
|
||||
* Analyzer that will be used to parse the doc.
|
||||
*/
|
||||
private Analyzer analyzer = null;
|
||||
|
||||
/**
|
||||
* Ignore words less frequent that this.
|
||||
*/
|
||||
private int minTermFreq = DEFAULT_MIN_TERM_FREQ;
|
||||
|
||||
/**
|
||||
* Ignore words which do not occur in at least this many docs.
|
||||
*/
|
||||
private int minDocFreq = DEFAULT_MIN_DOC_FREQ;
|
||||
|
||||
/**
|
||||
* Ignore words which occur in more than this many docs.
|
||||
*/
|
||||
private int maxDocFreq = DEFAULT_MAX_DOC_FREQ;
|
||||
|
||||
/**
|
||||
* Should we apply a boost to the Query based on the scores?
|
||||
*/
|
||||
private boolean boost = DEFAULT_BOOST;
|
||||
|
||||
/**
|
||||
* Field name we'll analyze.
|
||||
*/
|
||||
private String[] fieldNames = DEFAULT_FIELD_NAMES;
|
||||
|
||||
/**
|
||||
* The maximum number of tokens to parse in each example doc field that is not stored with TermVector support
|
||||
*/
|
||||
private int maxNumTokensParsed = DEFAULT_MAX_NUM_TOKENS_PARSED;
|
||||
|
||||
/**
|
||||
* Ignore words if less than this len.
|
||||
*/
|
||||
private int minWordLen = DEFAULT_MIN_WORD_LENGTH;
|
||||
|
||||
/**
|
||||
* Ignore words if greater than this len.
|
||||
*/
|
||||
private int maxWordLen = DEFAULT_MAX_WORD_LENGTH;
|
||||
|
||||
/**
|
||||
* Don't return a query longer than this.
|
||||
*/
|
||||
private int maxQueryTerms = DEFAULT_MAX_QUERY_TERMS;
|
||||
|
||||
/**
|
||||
* For idf() calculations.
|
||||
*/
|
||||
private TFIDFSimilarity similarity;// = new DefaultSimilarity();
|
||||
|
||||
/**
|
||||
* IndexReader to use
|
||||
*/
|
||||
private final IndexReader ir;
|
||||
|
||||
/**
|
||||
* Boost factor to use when boosting the terms
|
||||
*/
|
||||
private float boostFactor = 1;
|
||||
|
||||
/**
|
||||
* Returns the boost factor used when boosting terms
|
||||
*
|
||||
* @return the boost factor used when boosting terms
|
||||
* @see #setBoostFactor(float)
|
||||
*/
|
||||
public float getBoostFactor() {
|
||||
return boostFactor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boost factor to use when boosting terms
|
||||
*
|
||||
* @see #getBoostFactor()
|
||||
*/
|
||||
public void setBoostFactor(float boostFactor) {
|
||||
this.boostFactor = boostFactor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor requiring an IndexReader.
|
||||
*/
|
||||
public XMoreLikeThis(IndexReader ir) {
|
||||
this(ir, new DefaultSimilarity());
|
||||
}
|
||||
|
||||
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
|
||||
this.ir = ir;
|
||||
this.similarity = sim;
|
||||
}
|
||||
|
||||
|
||||
public TFIDFSimilarity getSimilarity() {
|
||||
return similarity;
|
||||
}
|
||||
|
||||
public void setSimilarity(TFIDFSimilarity similarity) {
|
||||
this.similarity = similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an analyzer that will be used to parse source doc with. The default analyzer
|
||||
* is not set.
|
||||
*
|
||||
* @return the analyzer that will be used to parse source doc with.
|
||||
*/
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the analyzer to use. An analyzer is not required for generating a query with the
|
||||
* {@link #like(int)} method, all other 'like' methods require an analyzer.
|
||||
*
|
||||
* @param analyzer the analyzer to use to tokenize text.
|
||||
*/
|
||||
public void setAnalyzer(Analyzer analyzer) {
|
||||
this.analyzer = analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the frequency below which terms will be ignored in the source doc. The default
|
||||
* frequency is the {@link #DEFAULT_MIN_TERM_FREQ}.
|
||||
*
|
||||
* @return the frequency below which terms will be ignored in the source doc.
|
||||
*/
|
||||
public int getMinTermFreq() {
|
||||
return minTermFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the frequency below which terms will be ignored in the source doc.
|
||||
*
|
||||
* @param minTermFreq the frequency below which terms will be ignored in the source doc.
|
||||
*/
|
||||
public void setMinTermFreq(int minTermFreq) {
|
||||
this.minTermFreq = minTermFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the frequency at which words will be ignored which do not occur in at least this
|
||||
* many docs. The default frequency is {@link #DEFAULT_MIN_DOC_FREQ}.
|
||||
*
|
||||
* @return the frequency at which words will be ignored which do not occur in at least this
|
||||
* many docs.
|
||||
*/
|
||||
public int getMinDocFreq() {
|
||||
return minDocFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the frequency at which words will be ignored which do not occur in at least this
|
||||
* many docs.
|
||||
*
|
||||
* @param minDocFreq the frequency at which words will be ignored which do not occur in at
|
||||
* least this many docs.
|
||||
*/
|
||||
public void setMinDocFreq(int minDocFreq) {
|
||||
this.minDocFreq = minDocFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum frequency in which words may still appear.
|
||||
* Words that appear in more than this many docs will be ignored. The default frequency is
|
||||
* {@link #DEFAULT_MAX_DOC_FREQ}.
|
||||
*
|
||||
* @return get the maximum frequency at which words are still allowed,
|
||||
* words which occur in more docs than this are ignored.
|
||||
*/
|
||||
public int getMaxDocFreq() {
|
||||
return maxDocFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum frequency in which words may still appear. Words that appear
|
||||
* in more than this many docs will be ignored.
|
||||
*
|
||||
* @param maxFreq the maximum count of documents that a term may appear
|
||||
* in to be still considered relevant
|
||||
*/
|
||||
public void setMaxDocFreq(int maxFreq) {
|
||||
this.maxDocFreq = maxFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum percentage in which words may still appear. Words that appear
|
||||
* in more than this many percent of all docs will be ignored.
|
||||
*
|
||||
* @param maxPercentage the maximum percentage of documents (0-100) that a term may appear
|
||||
* in to be still considered relevant
|
||||
*/
|
||||
public void setMaxDocFreqPct(int maxPercentage) {
|
||||
this.maxDocFreq = maxPercentage * ir.numDocs() / 100;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether to boost terms in query based on "score" or not. The default is
|
||||
* {@link #DEFAULT_BOOST}.
|
||||
*
|
||||
* @return whether to boost terms in query based on "score" or not.
|
||||
* @see #setBoost
|
||||
*/
|
||||
public boolean isBoost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to boost terms in query based on "score" or not.
|
||||
*
|
||||
* @param boost true to boost terms in query based on "score", false otherwise.
|
||||
* @see #isBoost
|
||||
*/
|
||||
public void setBoost(boolean boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field names that will be used when generating the 'More Like This' query.
|
||||
* The default field names that will be used is {@link #DEFAULT_FIELD_NAMES}.
|
||||
*
|
||||
* @return the field names that will be used when generating the 'More Like This' query.
|
||||
*/
|
||||
public String[] getFieldNames() {
|
||||
return fieldNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the field names that will be used when generating the 'More Like This' query.
|
||||
* Set this to null for the field names to be determined at runtime from the IndexReader
|
||||
* provided in the constructor.
|
||||
*
|
||||
* @param fieldNames the field names that will be used when generating the 'More Like This'
|
||||
* query.
|
||||
*/
|
||||
public void setFieldNames(String[] fieldNames) {
|
||||
this.fieldNames = fieldNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the minimum word length below which words will be ignored. Set this to 0 for no
|
||||
* minimum word length. The default is {@link #DEFAULT_MIN_WORD_LENGTH}.
|
||||
*
|
||||
* @return the minimum word length below which words will be ignored.
|
||||
*/
|
||||
public int getMinWordLen() {
|
||||
return minWordLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the minimum word length below which words will be ignored.
|
||||
*
|
||||
* @param minWordLen the minimum word length below which words will be ignored.
|
||||
*/
|
||||
public void setMinWordLen(int minWordLen) {
|
||||
this.minWordLen = minWordLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum word length above which words will be ignored. Set this to 0 for no
|
||||
* maximum word length. The default is {@link #DEFAULT_MAX_WORD_LENGTH}.
|
||||
*
|
||||
* @return the maximum word length above which words will be ignored.
|
||||
*/
|
||||
public int getMaxWordLen() {
|
||||
return maxWordLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum word length above which words will be ignored.
|
||||
*
|
||||
* @param maxWordLen the maximum word length above which words will be ignored.
|
||||
*/
|
||||
public void setMaxWordLen(int maxWordLen) {
|
||||
this.maxWordLen = maxWordLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the set of stopwords.
|
||||
* Any word in this set is considered "uninteresting" and ignored.
|
||||
* Even if your Analyzer allows stopwords, you might want to tell the MoreLikeThis code to ignore them, as
|
||||
* for the purposes of document similarity it seems reasonable to assume that "a stop word is never interesting".
|
||||
*
|
||||
* @param stopWords set of stopwords, if null it means to allow stop words
|
||||
* @see #getStopWords
|
||||
*/
|
||||
public void setStopWords(Set<?> stopWords) {
|
||||
this.stopWords = stopWords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current stop words being used.
|
||||
*
|
||||
* @see #setStopWords
|
||||
*/
|
||||
public Set<?> getStopWords() {
|
||||
return stopWords;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the maximum number of query terms that will be included in any generated query.
|
||||
* The default is {@link #DEFAULT_MAX_QUERY_TERMS}.
|
||||
*
|
||||
* @return the maximum number of query terms that will be included in any generated query.
|
||||
*/
|
||||
public int getMaxQueryTerms() {
|
||||
return maxQueryTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum number of query terms that will be included in any generated query.
|
||||
*
|
||||
* @param maxQueryTerms the maximum number of query terms that will be included in any
|
||||
* generated query.
|
||||
*/
|
||||
public void setMaxQueryTerms(int maxQueryTerms) {
|
||||
this.maxQueryTerms = maxQueryTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The maximum number of tokens to parse in each example doc field that is not stored with TermVector support
|
||||
* @see #DEFAULT_MAX_NUM_TOKENS_PARSED
|
||||
*/
|
||||
public int getMaxNumTokensParsed() {
|
||||
return maxNumTokensParsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param i The maximum number of tokens to parse in each example doc field that is not stored with TermVector support
|
||||
*/
|
||||
public void setMaxNumTokensParsed(int i) {
|
||||
maxNumTokensParsed = i;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a query that will return docs like the passed lucene document ID.
|
||||
*
|
||||
* @param docNum the documentID of the lucene doc to generate the 'More Like This" query for.
|
||||
* @return a query that will return docs like the passed lucene document ID.
|
||||
*/
|
||||
public Query like(int docNum) throws IOException {
|
||||
if (fieldNames == null) {
|
||||
// gather list of valid fields from lucene
|
||||
Collection<String> fields = MultiFields.getIndexedFields(ir);
|
||||
fieldNames = fields.toArray(new String[fields.size()]);
|
||||
}
|
||||
|
||||
return createQuery(retrieveTerms(docNum));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a query that will return docs like the passed Reader.
|
||||
*
|
||||
* @return a query that will return docs like the passed Reader.
|
||||
*/
|
||||
@Deprecated
|
||||
public Query like(Reader r, String fieldName) throws IOException {
|
||||
return like(fieldName, r);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a query that will return docs like the passed Readers.
|
||||
* This was added in order to treat multi-value fields.
|
||||
*
|
||||
* @return a query that will return docs like the passed Readers.
|
||||
*/
|
||||
public Query like(String fieldName, Reader... readers) throws IOException {
|
||||
Map<String, Int> words = new HashMap<>();
|
||||
for (Reader r : readers) {
|
||||
addTermFrequencies(r, words, fieldName);
|
||||
}
|
||||
return createQuery(createQueue(words));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the More like query from a PriorityQueue
|
||||
*/
|
||||
private Query createQuery(PriorityQueue<Object[]> q) {
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
Object cur;
|
||||
int qterms = 0;
|
||||
float bestScore = 0;
|
||||
|
||||
while ((cur = q.pop()) != null) {
|
||||
Object[] ar = (Object[]) cur;
|
||||
TermQuery tq = new TermQuery(new Term((String) ar[1], (String) ar[0]));
|
||||
|
||||
if (boost) {
|
||||
if (qterms == 0) {
|
||||
bestScore = ((Float) ar[2]);
|
||||
}
|
||||
float myScore = ((Float) ar[2]);
|
||||
|
||||
tq.setBoost(boostFactor * myScore / bestScore);
|
||||
}
|
||||
|
||||
try {
|
||||
query.add(tq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
catch (BooleanQuery.TooManyClauses ignore) {
|
||||
break;
|
||||
}
|
||||
|
||||
qterms++;
|
||||
if (maxQueryTerms > 0 && qterms >= maxQueryTerms) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a PriorityQueue from a word->tf map.
|
||||
*
|
||||
* @param words a map of words keyed on the word(String) with Int objects as the values.
|
||||
*/
|
||||
private PriorityQueue<Object[]> createQueue(Map<String, Int> words) throws IOException {
|
||||
// have collected all words in doc and their freqs
|
||||
int numDocs = ir.numDocs();
|
||||
FreqQ res = new FreqQ(words.size()); // will order words by score
|
||||
|
||||
for (String word : words.keySet()) { // for every word
|
||||
int tf = words.get(word).x; // term freq in the source doc
|
||||
if (minTermFreq > 0 && tf < minTermFreq) {
|
||||
continue; // filter out words that don't occur enough times in the source
|
||||
}
|
||||
|
||||
// go through all the fields and find the largest document frequency
|
||||
String topField = fieldNames[0];
|
||||
int docFreq = 0;
|
||||
for (String fieldName : fieldNames) {
|
||||
int freq = ir.docFreq(new Term(fieldName, word));
|
||||
topField = (freq > docFreq) ? fieldName : topField;
|
||||
docFreq = (freq > docFreq) ? freq : docFreq;
|
||||
}
|
||||
|
||||
if (minDocFreq > 0 && docFreq < minDocFreq) {
|
||||
continue; // filter out words that don't occur in enough docs
|
||||
}
|
||||
|
||||
if (docFreq > maxDocFreq) {
|
||||
continue; // filter out words that occur in too many docs
|
||||
}
|
||||
|
||||
if (docFreq == 0) {
|
||||
continue; // index update problem?
|
||||
}
|
||||
|
||||
float idf = similarity.idf(docFreq, numDocs);
|
||||
float score = tf * idf;
|
||||
|
||||
// only really need 1st 3 entries, other ones are for troubleshooting
|
||||
res.insertWithOverflow(new Object[]{word, // the word
|
||||
topField, // the top field
|
||||
score, // overall score
|
||||
idf, // idf
|
||||
docFreq, // freq in all docs
|
||||
tf
|
||||
});
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Describe the parameters that control how the "more like this" query is formed.
|
||||
*/
|
||||
public String describeParams() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("\t").append("maxQueryTerms : ").append(maxQueryTerms).append("\n");
|
||||
sb.append("\t").append("minWordLen : ").append(minWordLen).append("\n");
|
||||
sb.append("\t").append("maxWordLen : ").append(maxWordLen).append("\n");
|
||||
sb.append("\t").append("fieldNames : ");
|
||||
String delim = "";
|
||||
for (String fieldName : fieldNames) {
|
||||
sb.append(delim).append(fieldName);
|
||||
delim = ", ";
|
||||
}
|
||||
sb.append("\n");
|
||||
sb.append("\t").append("boost : ").append(boost).append("\n");
|
||||
sb.append("\t").append("minTermFreq : ").append(minTermFreq).append("\n");
|
||||
sb.append("\t").append("minDocFreq : ").append(minDocFreq).append("\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find words for a more-like-this query former.
|
||||
*
|
||||
* @param docNum the id of the lucene document from which to find terms
|
||||
*/
|
||||
public PriorityQueue<Object[]> retrieveTerms(int docNum) throws IOException {
|
||||
Map<String, Int> termFreqMap = new HashMap<>();
|
||||
for (String fieldName : fieldNames) {
|
||||
final Fields vectors = ir.getTermVectors(docNum);
|
||||
final Terms vector;
|
||||
if (vectors != null) {
|
||||
vector = vectors.terms(fieldName);
|
||||
} else {
|
||||
vector = null;
|
||||
}
|
||||
|
||||
// field does not store term vector info
|
||||
if (vector == null) {
|
||||
Document d = ir.document(docNum);
|
||||
IndexableField fields[] = d.getFields(fieldName);
|
||||
for (IndexableField field : fields) {
|
||||
final String stringValue = field.stringValue();
|
||||
if (stringValue != null) {
|
||||
addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
addTermFrequencies(termFreqMap, vector);
|
||||
}
|
||||
}
|
||||
|
||||
return createQueue(termFreqMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds terms and frequencies found in vector into the Map termFreqMap
|
||||
*
|
||||
* @param termFreqMap a Map of terms and their frequencies
|
||||
* @param vector List of terms and their frequencies for a doc/field
|
||||
*/
|
||||
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
|
||||
final TermsEnum termsEnum = vector.iterator(null);
|
||||
final CharsRef spare = new CharsRef();
|
||||
BytesRef text;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
final String term = spare.toString();
|
||||
if (isNoiseWord(term)) {
|
||||
continue;
|
||||
}
|
||||
final int freq = (int) termsEnum.totalTermFreq();
|
||||
|
||||
// increment frequency
|
||||
Int cnt = termFreqMap.get(term);
|
||||
if (cnt == null) {
|
||||
cnt = new Int();
|
||||
termFreqMap.put(term, cnt);
|
||||
cnt.x = freq;
|
||||
} else {
|
||||
cnt.x += freq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds term frequencies found by tokenizing text from reader into the Map words
|
||||
*
|
||||
* @param r a source of text to be tokenized
|
||||
* @param termFreqMap a Map of terms and their frequencies
|
||||
* @param fieldName Used by analyzer for any special per-field analysis
|
||||
*/
|
||||
private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName)
|
||||
throws IOException {
|
||||
if (analyzer == null) {
|
||||
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
||||
"term vectors, you must provide an Analyzer");
|
||||
}
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, r);
|
||||
try {
|
||||
int tokenCount = 0;
|
||||
// for every token
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
String word = termAtt.toString();
|
||||
tokenCount++;
|
||||
if (tokenCount > maxNumTokensParsed) {
|
||||
break;
|
||||
}
|
||||
if (isNoiseWord(word)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// increment frequency
|
||||
Int cnt = termFreqMap.get(word);
|
||||
if (cnt == null) {
|
||||
termFreqMap.put(word, new Int());
|
||||
} else {
|
||||
cnt.x++;
|
||||
}
|
||||
}
|
||||
ts.end();
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ts);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* determines if the passed term is likely to be of interest in "more like" comparisons
|
||||
*
|
||||
* @param term The word being considered
|
||||
* @return true if should be ignored, false if should be used in further analysis
|
||||
*/
|
||||
private boolean isNoiseWord(String term) {
|
||||
int len = term.length();
|
||||
if (minWordLen > 0 && len < minWordLen) {
|
||||
return true;
|
||||
}
|
||||
if (maxWordLen > 0 && len > maxWordLen) {
|
||||
return true;
|
||||
}
|
||||
return stopWords != null && stopWords.contains(term);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find words for a more-like-this query former.
|
||||
* The result is a priority queue of arrays with one entry for <b>every word</b> in the document.
|
||||
* Each array has 6 elements.
|
||||
* The elements are:
|
||||
* <ol>
|
||||
* <li> The word (String)
|
||||
* <li> The top field that this word comes from (String)
|
||||
* <li> The score for this word (Float)
|
||||
* <li> The IDF value (Float)
|
||||
* <li> The frequency of this word in the index (Integer)
|
||||
* <li> The frequency of this word in the source document (Integer)
|
||||
* </ol>
|
||||
* This is a somewhat "advanced" routine, and in general only the 1st entry in the array is of interest.
|
||||
* This method is exposed so that you can identify the "interesting words" in a document.
|
||||
* For an easier method to call see {@link #retrieveInterestingTerms retrieveInterestingTerms()}.
|
||||
*
|
||||
* @param r the reader that has the content of the document
|
||||
* @param fieldName field passed to the analyzer to use when analyzing the content
|
||||
* @return the most interesting words in the document ordered by score, with the highest scoring, or best entry, first
|
||||
* @see #retrieveInterestingTerms
|
||||
*/
|
||||
public PriorityQueue<Object[]> retrieveTerms(Reader r, String fieldName) throws IOException {
|
||||
Map<String, Int> words = new HashMap<>();
|
||||
addTermFrequencies(r, words, fieldName);
|
||||
return createQueue(words);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #retrieveInterestingTerms(java.io.Reader, String)
|
||||
*/
|
||||
public String[] retrieveInterestingTerms(int docNum) throws IOException {
|
||||
ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
|
||||
PriorityQueue<Object[]> pq = retrieveTerms(docNum);
|
||||
Object cur;
|
||||
int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
|
||||
// we just want to return the top words
|
||||
while (((cur = pq.pop()) != null) && lim-- > 0) {
|
||||
Object[] ar = (Object[]) cur;
|
||||
al.add(ar[0]); // the 1st entry is the interesting word
|
||||
}
|
||||
String[] res = new String[al.size()];
|
||||
return al.toArray(res);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience routine to make it easy to return the most interesting words in a document.
|
||||
* More advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly.
|
||||
*
|
||||
* @param r the source document
|
||||
* @param fieldName field passed to analyzer to use when analyzing the content
|
||||
* @return the most interesting words in the document
|
||||
* @see #retrieveTerms(java.io.Reader, String)
|
||||
* @see #setMaxQueryTerms
|
||||
*/
|
||||
public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException {
|
||||
ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
|
||||
PriorityQueue<Object[]> pq = retrieveTerms(r, fieldName);
|
||||
Object cur;
|
||||
int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
|
||||
// we just want to return the top words
|
||||
while (((cur = pq.pop()) != null) && lim-- > 0) {
|
||||
Object[] ar = (Object[]) cur;
|
||||
al.add(ar[0]); // the 1st entry is the interesting word
|
||||
}
|
||||
String[] res = new String[al.size()];
|
||||
return al.toArray(res);
|
||||
}
|
||||
|
||||
/**
|
||||
* PriorityQueue that orders words by score.
|
||||
*/
|
||||
private static class FreqQ extends PriorityQueue<Object[]> {
|
||||
FreqQ(int s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(Object[] aa, Object[] bb) {
|
||||
Float fa = (Float) aa[2];
|
||||
Float fb = (Float) bb[2];
|
||||
return fa > fb;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use for frequencies and to avoid renewing Integers.
|
||||
*/
|
||||
private static class Int {
|
||||
int x;
|
||||
|
||||
Int() {
|
||||
x = 1;
|
||||
}
|
||||
}
|
||||
}
|
@ -33,7 +33,7 @@ abstract class AbstractArray implements BigArray {
|
||||
|
||||
@Override
|
||||
public final void close() {
|
||||
bigArrays.ramBytesUsed.addAndGet(-sizeInBytes());
|
||||
bigArrays.ramBytesUsed.addAndGet(-ramBytesUsed());
|
||||
assert !released : "double release";
|
||||
released = true;
|
||||
doClose();
|
||||
|
@ -82,7 +82,7 @@ abstract class AbstractBigArray extends AbstractArray {
|
||||
|
||||
protected abstract int numBytesPerElement();
|
||||
|
||||
public final long sizeInBytes() {
|
||||
public final long ramBytesUsed() {
|
||||
// rough approximate, we only take into account the size of the values, not the overhead of the array objects
|
||||
return ((long) pageIndex(size - 1) + 1) * pageSize() * numBytesPerElement();
|
||||
}
|
||||
|
@ -19,17 +19,13 @@
|
||||
|
||||
package org.elasticsearch.common.util;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.elasticsearch.common.lease.Releasable;
|
||||
|
||||
/** Base abstraction of an array. */
|
||||
public interface BigArray extends Releasable {
|
||||
public interface BigArray extends Releasable, Accountable {
|
||||
|
||||
/** Return the length of this array. */
|
||||
public long size();
|
||||
|
||||
/**
|
||||
* Return an estimated memory usage of this instance.
|
||||
*/
|
||||
public long sizeInBytes();
|
||||
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ public class BigArrays extends AbstractComponent {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
|
||||
}
|
||||
|
||||
@ -169,7 +169,7 @@ public class BigArrays extends AbstractComponent {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
|
||||
}
|
||||
|
||||
@ -212,7 +212,7 @@ public class BigArrays extends AbstractComponent {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
|
||||
}
|
||||
|
||||
@ -254,7 +254,7 @@ public class BigArrays extends AbstractComponent {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
|
||||
}
|
||||
|
||||
@ -297,7 +297,7 @@ public class BigArrays extends AbstractComponent {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
|
||||
}
|
||||
|
||||
@ -340,7 +340,7 @@ public class BigArrays extends AbstractComponent {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return SHALLOW_SIZE + RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_OBJECT_REF * size());
|
||||
}
|
||||
|
||||
@ -386,16 +386,16 @@ public class BigArrays extends AbstractComponent {
|
||||
}
|
||||
|
||||
private <T extends AbstractBigArray> T resizeInPlace(T array, long newSize) {
|
||||
final long oldMemSize = array.sizeInBytes();
|
||||
final long oldMemSize = array.ramBytesUsed();
|
||||
array.resize(newSize);
|
||||
validate(array.sizeInBytes() - oldMemSize);
|
||||
validate(array.ramBytesUsed() - oldMemSize);
|
||||
return array;
|
||||
}
|
||||
|
||||
private <T extends BigArray> T validate(T array) {
|
||||
boolean success = false;
|
||||
try {
|
||||
validate(array.sizeInBytes());
|
||||
validate(array.ramBytesUsed());
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
@ -22,7 +22,7 @@ package org.elasticsearch.env;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.primitives.Ints;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.store.XNativeFSLockFactory;
|
||||
import org.apache.lucene.store.NativeFSLockFactory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
@ -78,7 +78,7 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
}
|
||||
logger.trace("obtaining node lock on {} ...", dir.getAbsolutePath());
|
||||
try {
|
||||
XNativeFSLockFactory lockFactory = new XNativeFSLockFactory(dir);
|
||||
NativeFSLockFactory lockFactory = new NativeFSLockFactory(dir);
|
||||
Lock tmpLock = lockFactory.makeLock("node.lock");
|
||||
boolean obtained = tmpLock.obtain();
|
||||
if (obtained) {
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.analysis;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.elasticsearch.common.io.Streams;
|
||||
|
@ -88,7 +88,7 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
if (version.onOrAfter(Version.LUCENE_48)) {
|
||||
if (version.onOrAfter(Version.LUCENE_4_8)) {
|
||||
return new WordDelimiterFilter(version, tokenStream,
|
||||
charTypeTable,
|
||||
flags,
|
||||
|
@ -21,7 +21,7 @@ package org.elasticsearch.index.codec;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
|
||||
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
|
||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
||||
@ -37,7 +37,7 @@ import org.elasticsearch.index.mapper.MapperService;
|
||||
* configured for a specific field the default postings format is used.
|
||||
*/
|
||||
// LUCENE UPGRADE: make sure to move to a new codec depending on the lucene version
|
||||
public class PerFieldMappingPostingFormatCodec extends Lucene46Codec {
|
||||
public class PerFieldMappingPostingFormatCodec extends Lucene49Codec {
|
||||
private final ESLogger logger;
|
||||
private final MapperService mapperService;
|
||||
private final PostingsFormat defaultPostingFormat;
|
||||
|
@ -20,7 +20,7 @@
|
||||
package org.elasticsearch.index.codec.docvaluesformat;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
@ -35,7 +35,8 @@ public class DiskDocValuesFormatProvider extends AbstractDocValuesFormatProvider
|
||||
@Inject
|
||||
public DiskDocValuesFormatProvider(@Assisted String name, @Assisted Settings docValuesFormatSettings) {
|
||||
super(name);
|
||||
this.docValuesFormat = new DiskDocValuesFormat();
|
||||
// TODO: log a warning if someone chooses this? just remove this together and map it to the 4.9 provider?
|
||||
this.docValuesFormat = new Lucene49DocValuesFormat();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -38,9 +38,10 @@ public class DocValuesFormats {
|
||||
builtInDocValuesFormatsX.put(name, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormat.forName(name)));
|
||||
}
|
||||
// LUCENE UPGRADE: update those DVF if necessary
|
||||
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene45")));
|
||||
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene49")));
|
||||
builtInDocValuesFormatsX.put("memory", new PreBuiltDocValuesFormatProvider.Factory("memory", DocValuesFormat.forName("Memory")));
|
||||
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Disk")));
|
||||
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Lucene49")));
|
||||
builtInDocValuesFormatsX.put("Disk", new PreBuiltDocValuesFormatProvider.Factory("Disk", DocValuesFormat.forName("Lucene49")));
|
||||
builtInDocValuesFormats = builtInDocValuesFormatsX.immutableMap();
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,8 @@
|
||||
|
||||
package org.elasticsearch.index.codec.postingsformat;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -19,8 +19,8 @@
|
||||
|
||||
package org.elasticsearch.index.codec.postingsformat;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -19,13 +19,14 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues.Strings;
|
||||
|
||||
/**
|
||||
* The thread safe {@link org.apache.lucene.index.AtomicReader} level cache of the data.
|
||||
*/
|
||||
public interface AtomicFieldData<Script extends ScriptDocValues> extends RamUsage {
|
||||
public interface AtomicFieldData<Script extends ScriptDocValues> extends Accountable {
|
||||
|
||||
/**
|
||||
* Use a non thread safe (lightweight) view of the values as bytes.
|
||||
@ -56,7 +57,7 @@ public interface AtomicFieldData<Script extends ScriptDocValues> extends RamUsag
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,7 @@ import com.google.common.cache.RemovalNotification;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.lucene.SegmentReaderUtils;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
|
||||
@ -63,7 +64,7 @@ public interface IndexFieldDataCache {
|
||||
|
||||
interface Listener {
|
||||
|
||||
void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, RamUsage ramUsage);
|
||||
void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, Accountable ramUsage);
|
||||
|
||||
void onUnload(FieldMapper.Names fieldNames, FieldDataType fieldDataType, boolean wasEvicted, long sizeInBytes);
|
||||
}
|
||||
@ -71,11 +72,11 @@ public interface IndexFieldDataCache {
|
||||
/**
|
||||
* The resident field data cache is a *per field* cache that keeps all the values in memory.
|
||||
*/
|
||||
static abstract class FieldBased implements IndexFieldDataCache, SegmentReader.CoreClosedListener, RemovalListener<FieldBased.Key, RamUsage>, IndexReader.ReaderClosedListener {
|
||||
static abstract class FieldBased implements IndexFieldDataCache, SegmentReader.CoreClosedListener, RemovalListener<FieldBased.Key, Accountable>, IndexReader.ReaderClosedListener {
|
||||
private final IndexService indexService;
|
||||
private final FieldMapper.Names fieldNames;
|
||||
private final FieldDataType fieldDataType;
|
||||
private final Cache<Key, RamUsage> cache;
|
||||
private final Cache<Key, Accountable> cache;
|
||||
private final IndicesFieldDataCacheListener indicesFieldDataCacheListener;
|
||||
private final ESLogger logger;
|
||||
|
||||
@ -92,15 +93,15 @@ public interface IndexFieldDataCache {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRemoval(RemovalNotification<Key, RamUsage> notification) {
|
||||
public void onRemoval(RemovalNotification<Key, Accountable> notification) {
|
||||
final Key key = notification.getKey();
|
||||
assert key != null && key.listeners != null;
|
||||
|
||||
final RamUsage value = notification.getValue();
|
||||
final Accountable value = notification.getValue();
|
||||
long sizeInBytes = key.sizeInBytes;
|
||||
assert sizeInBytes >= 0 || value != null : "Expected size [" + sizeInBytes + "] to be positive or value [" + value + "] to be non-null";
|
||||
if (sizeInBytes == -1 && value != null) {
|
||||
sizeInBytes = value.getMemorySizeInBytes();
|
||||
sizeInBytes = value.ramBytesUsed();
|
||||
}
|
||||
for (Listener listener : key.listeners) {
|
||||
try {
|
||||
@ -129,7 +130,7 @@ public interface IndexFieldDataCache {
|
||||
}
|
||||
}
|
||||
final AtomicFieldData fieldData = indexFieldData.loadDirect(context);
|
||||
key.sizeInBytes = fieldData.getMemorySizeInBytes();
|
||||
key.sizeInBytes = fieldData.ramBytesUsed();
|
||||
for (Listener listener : key.listeners) {
|
||||
try {
|
||||
listener.onLoad(fieldNames, fieldDataType, fieldData);
|
||||
@ -146,7 +147,7 @@ public interface IndexFieldDataCache {
|
||||
public <IFD extends IndexFieldData.WithOrdinals<?>> IFD load(final IndexReader indexReader, final IFD indexFieldData) throws Exception {
|
||||
final Key key = new Key(indexReader.getCoreCacheKey());
|
||||
//noinspection unchecked
|
||||
return (IFD) cache.get(key, new Callable<RamUsage>() {
|
||||
return (IFD) cache.get(key, new Callable<Accountable>() {
|
||||
@Override
|
||||
public GlobalOrdinalsIndexFieldData call() throws Exception {
|
||||
indexReader.addReaderClosedListener(FieldBased.this);
|
||||
@ -160,7 +161,7 @@ public interface IndexFieldDataCache {
|
||||
}
|
||||
}
|
||||
GlobalOrdinalsIndexFieldData ifd = (GlobalOrdinalsIndexFieldData) indexFieldData.localGlobalDirect(indexReader);
|
||||
key.sizeInBytes = ifd.getMemorySizeInBytes();
|
||||
key.sizeInBytes = ifd.ramBytesUsed();
|
||||
for (Listener listener : key.listeners) {
|
||||
try {
|
||||
listener.onLoad(fieldNames, fieldDataType, ifd);
|
||||
|
@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.fielddata;
|
||||
|
||||
/**
|
||||
*/
|
||||
public interface RamUsage {
|
||||
|
||||
/**
|
||||
* Size (in bytes) of memory used by this particular instance.
|
||||
*/
|
||||
long getMemorySizeInBytes();
|
||||
|
||||
}
|
@ -20,6 +20,7 @@
|
||||
package org.elasticsearch.index.fielddata;
|
||||
|
||||
import com.carrotsearch.hppc.ObjectLongOpenHashMap;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.metrics.CounterMetric;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
@ -69,18 +70,18 @@ public class ShardFieldData extends AbstractIndexShardComponent implements Index
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, RamUsage ramUsage) {
|
||||
totalMetric.inc(ramUsage.getMemorySizeInBytes());
|
||||
public void onLoad(FieldMapper.Names fieldNames, FieldDataType fieldDataType, Accountable ramUsage) {
|
||||
totalMetric.inc(ramUsage.ramBytesUsed());
|
||||
String keyFieldName = fieldNames.indexName();
|
||||
CounterMetric total = perFieldTotals.get(keyFieldName);
|
||||
if (total != null) {
|
||||
total.inc(ramUsage.getMemorySizeInBytes());
|
||||
total.inc(ramUsage.ramBytesUsed());
|
||||
} else {
|
||||
total = new CounterMetric();
|
||||
total.inc(ramUsage.getMemorySizeInBytes());
|
||||
total.inc(ramUsage.ramBytesUsed());
|
||||
CounterMetric prev = perFieldTotals.putIfAbsent(keyFieldName, total);
|
||||
if (prev != null) {
|
||||
prev.inc(ramUsage.getMemorySizeInBytes());
|
||||
prev.inc(ramUsage.ramBytesUsed());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,8 +19,9 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.XOrdinalMap;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
|
||||
/**
|
||||
@ -29,32 +30,38 @@ import org.elasticsearch.index.fielddata.BytesValues;
|
||||
public class GlobalOrdinalMapping extends BytesValues.WithOrdinals {
|
||||
|
||||
private final BytesValues.WithOrdinals values;
|
||||
private final OrdinalMap ordinalMap;
|
||||
private final XOrdinalMap ordinalMap;
|
||||
private final LongValues mapping;
|
||||
private final BytesValues.WithOrdinals[] bytesValues;
|
||||
private final int segmentIndex;
|
||||
|
||||
GlobalOrdinalMapping(OrdinalMap ordinalMap, BytesValues.WithOrdinals[] bytesValues, int segmentIndex) {
|
||||
GlobalOrdinalMapping(XOrdinalMap ordinalMap, BytesValues.WithOrdinals[] bytesValues, int segmentIndex) {
|
||||
super(bytesValues[segmentIndex].isMultiValued());
|
||||
this.values = bytesValues[segmentIndex];
|
||||
this.segmentIndex = segmentIndex;
|
||||
this.bytesValues = bytesValues;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.mapping = ordinalMap.getGlobalOrds(segmentIndex);
|
||||
}
|
||||
|
||||
int readerIndex;
|
||||
|
||||
@Override
|
||||
public long getMaxOrd() {
|
||||
return ordinalMap.getValueCount();
|
||||
}
|
||||
|
||||
// NOTE: careful if we change the API here: unnecessary branch for < 0 here hurts a lot.
|
||||
// so if we already know the count (from setDocument), its bad to do it redundantly.
|
||||
|
||||
public long getGlobalOrd(long segmentOrd) {
|
||||
return segmentOrd == MISSING_ORDINAL ? MISSING_ORDINAL : ordinalMap.getGlobalOrd(segmentIndex, segmentOrd);
|
||||
return mapping.get(segmentOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getOrd(int docId) {
|
||||
return getGlobalOrd(values.getOrd(docId));
|
||||
long v = values.getOrd(docId);
|
||||
if (v < 0) {
|
||||
return v;
|
||||
} else {
|
||||
return getGlobalOrd(v);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -70,7 +77,7 @@ public class GlobalOrdinalMapping extends BytesValues.WithOrdinals {
|
||||
@Override
|
||||
public BytesRef getValueByOrd(long globalOrd) {
|
||||
final long segmentOrd = ordinalMap.getFirstSegmentOrd(globalOrd);
|
||||
readerIndex = ordinalMap.getFirstSegmentNumber(globalOrd);
|
||||
int readerIndex = ordinalMap.getFirstSegmentNumber(globalOrd);
|
||||
return bytesValues[readerIndex].getValueByOrd(segmentOrd);
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
@ -27,14 +28,13 @@ import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.RamUsage;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
|
||||
/**
|
||||
* {@link IndexFieldData} base class for concrete global ordinals implementations.
|
||||
*/
|
||||
public abstract class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexFieldData.WithOrdinals, RamUsage {
|
||||
public abstract class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexFieldData.WithOrdinals, Accountable {
|
||||
|
||||
private final FieldMapper.Names fieldNames;
|
||||
private final FieldDataType fieldDataType;
|
||||
@ -93,7 +93,7 @@ public abstract class GlobalOrdinalsIndexFieldData extends AbstractIndexComponen
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return memorySizeInBytes;
|
||||
}
|
||||
|
||||
|
@ -20,12 +20,14 @@
|
||||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.XOrdinalMap;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
|
||||
@ -47,11 +49,14 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
||||
|
||||
final AtomicFieldData.WithOrdinals<?>[] atomicFD = new AtomicFieldData.WithOrdinals[indexReader.leaves().size()];
|
||||
final TermsEnum[] subs = new TermsEnum[indexReader.leaves().size()];
|
||||
final long[] weights = new long[subs.length];
|
||||
for (int i = 0; i < indexReader.leaves().size(); ++i) {
|
||||
atomicFD[i] = indexFieldData.load(indexReader.leaves().get(i));
|
||||
subs[i] = atomicFD[i].getBytesValues().getTermsEnum();
|
||||
BytesValues.WithOrdinals v = atomicFD[i].getBytesValues();
|
||||
subs[i] = v.getTermsEnum();
|
||||
weights[i] = v.getMaxOrd();
|
||||
}
|
||||
final OrdinalMap ordinalMap = new OrdinalMap(null, subs);
|
||||
final XOrdinalMap ordinalMap = XOrdinalMap.build(null, subs, weights, PackedInts.DEFAULT);
|
||||
final long memorySizeInBytes = ordinalMap.ramBytesUsed();
|
||||
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.XOrdinalMap;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
@ -35,7 +35,7 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
|
||||
|
||||
private final Atomic[] atomicReaders;
|
||||
|
||||
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicFieldData.WithOrdinals[] segmentAfd, OrdinalMap ordinalMap, long memorySizeInBytes) {
|
||||
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicFieldData.WithOrdinals[] segmentAfd, XOrdinalMap ordinalMap, long memorySizeInBytes) {
|
||||
super(index, settings, fieldNames, fieldDataType, memorySizeInBytes);
|
||||
this.atomicReaders = new Atomic[segmentAfd.length];
|
||||
for (int i = 0; i < segmentAfd.length; i++) {
|
||||
@ -51,10 +51,10 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
|
||||
private final class Atomic implements AtomicFieldData.WithOrdinals {
|
||||
|
||||
private final WithOrdinals afd;
|
||||
private final OrdinalMap ordinalMap;
|
||||
private final XOrdinalMap ordinalMap;
|
||||
private final int segmentIndex;
|
||||
|
||||
private Atomic(WithOrdinals afd, OrdinalMap ordinalMap, int segmentIndex) {
|
||||
private Atomic(WithOrdinals afd, XOrdinalMap ordinalMap, int segmentIndex) {
|
||||
this.afd = afd;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.segmentIndex = segmentIndex;
|
||||
@ -75,8 +75,8 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
return afd.getMemorySizeInBytes();
|
||||
public long ramBytesUsed() {
|
||||
return afd.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -77,7 +77,8 @@ public class MultiOrdinals extends Ordinals {
|
||||
assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds();
|
||||
}
|
||||
|
||||
public long getMemorySizeInBytes() {
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return endOffsets.ramBytesUsed() + ords.ramBytesUsed();
|
||||
}
|
||||
|
||||
|
@ -19,15 +19,14 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* A thread safe ordinals abstraction. Ordinals can only be positive integers.
|
||||
*/
|
||||
public abstract class Ordinals {
|
||||
public abstract class Ordinals implements Accountable {
|
||||
|
||||
public static final ValuesHolder NO_VALUES = new ValuesHolder() {
|
||||
@Override
|
||||
@ -39,7 +38,7 @@ public abstract class Ordinals {
|
||||
/**
|
||||
* The memory size this ordinals take.
|
||||
*/
|
||||
public abstract long getMemorySizeInBytes();
|
||||
public abstract long ramBytesUsed();
|
||||
|
||||
public abstract BytesValues.WithOrdinals ordinals(ValuesHolder values);
|
||||
|
||||
|
@ -45,7 +45,7 @@ public class SinglePackedOrdinals extends Ordinals {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_OBJECT_REF + reader.ramBytesUsed();
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ abstract class AbstractGeoPointIndexFieldData extends AbstractIndexFieldData<Ato
|
||||
protected static class Empty extends AtomicGeoPointFieldData<ScriptDocValues> {
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -44,47 +44,33 @@ public class BinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocValues.
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
// TODO: Lucene doesn't expose it right now
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues getBytesValues() {
|
||||
final BinaryDocValues values;
|
||||
final Bits docsWithField;
|
||||
try {
|
||||
final BinaryDocValues v = reader.getBinaryDocValues(field);
|
||||
if (v == null) {
|
||||
// segment has no value
|
||||
values = DocValues.EMPTY_BINARY;
|
||||
docsWithField = new Bits.MatchNoBits(reader.maxDoc());
|
||||
} else {
|
||||
values = v;
|
||||
final Bits b = reader.getDocsWithField(field);
|
||||
docsWithField = b == null ? new Bits.MatchAllBits(reader.maxDoc()) : b;
|
||||
}
|
||||
final BinaryDocValues values = DocValues.getBinary(reader, field);
|
||||
final Bits docsWithField = DocValues.getDocsWithField(reader, field);
|
||||
return new BytesValues(false) {
|
||||
int docId;
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
this.docId = docId;
|
||||
return docsWithField.get(docId) ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef nextValue() {
|
||||
return values.get(docId);
|
||||
}
|
||||
};
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||
}
|
||||
|
||||
return new BytesValues(false) {
|
||||
|
||||
final BytesRef scratch = new BytesRef();
|
||||
int docId;
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
this.docId = docId;
|
||||
return docsWithField.get(docId) ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef nextValue() {
|
||||
values.get(docId, scratch);
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -20,7 +20,6 @@
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
@ -37,7 +36,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
|
||||
|
||||
BinaryDVNumericAtomicFieldData(BinaryDocValues values, NumericType numericType) {
|
||||
super(numericType.isFloatingPoint());
|
||||
this.values = values == null ? DocValues.EMPTY_BINARY : values;
|
||||
this.values = values;
|
||||
this.numericType = numericType;
|
||||
}
|
||||
|
||||
@ -48,7 +47,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
|
||||
}
|
||||
return new LongValues(true) {
|
||||
|
||||
final BytesRef bytes = new BytesRef();
|
||||
BytesRef bytes;
|
||||
final ByteArrayDataInput in = new ByteArrayDataInput();
|
||||
long[] longs = new long[8];
|
||||
int i = Integer.MAX_VALUE;
|
||||
@ -56,7 +55,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
values.get(docId, bytes);
|
||||
bytes = values.get(docId);
|
||||
in.reset(bytes.bytes, bytes.offset, bytes.length);
|
||||
if (!in.eof()) {
|
||||
// first value uses vLong on top of zig-zag encoding, then deltas are encoded using vLong
|
||||
@ -91,13 +90,13 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
|
||||
case FLOAT:
|
||||
return new DoubleValues(true) {
|
||||
|
||||
final BytesRef bytes = new BytesRef();
|
||||
BytesRef bytes;
|
||||
int i = Integer.MAX_VALUE;
|
||||
int valueCount = 0;
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
values.get(docId, bytes);
|
||||
bytes = values.get(docId);
|
||||
assert bytes.length % 4 == 0;
|
||||
i = 0;
|
||||
return valueCount = bytes.length / 4;
|
||||
@ -113,13 +112,13 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
|
||||
case DOUBLE:
|
||||
return new DoubleValues(true) {
|
||||
|
||||
final BytesRef bytes = new BytesRef();
|
||||
BytesRef bytes;
|
||||
int i = Integer.MAX_VALUE;
|
||||
int valueCount = 0;
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
values.get(docId, bytes);
|
||||
bytes = values.get(docId);
|
||||
assert bytes.length % 8 == 0;
|
||||
i = 0;
|
||||
return valueCount = bytes.length / 8;
|
||||
@ -138,7 +137,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return -1; // Lucene doesn't expose it
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
@ -63,7 +64,7 @@ public class BinaryDVNumericIndexFieldData extends DocValuesIndexFieldData imple
|
||||
@Override
|
||||
public BinaryDVNumericAtomicFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return new BinaryDVNumericAtomicFieldData(context.reader().getBinaryDocValues(fieldNames.indexName()), numericType);
|
||||
return new BinaryDVNumericAtomicFieldData(DocValues.getBinary(context.reader(), fieldNames.indexName()), numericType);
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||
}
|
||||
|
@ -20,7 +20,6 @@
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
@ -33,11 +32,11 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocVal
|
||||
|
||||
BytesBinaryDVAtomicFieldData(BinaryDocValues values) {
|
||||
super();
|
||||
this.values = values == null ? DocValues.EMPTY_BINARY : values;
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return -1; // not exposed by Lucene
|
||||
}
|
||||
|
||||
@ -45,13 +44,13 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocVal
|
||||
public BytesValues getBytesValues() {
|
||||
return new BytesValues(true) {
|
||||
|
||||
final BytesRef bytes = new BytesRef();
|
||||
BytesRef bytes;
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final ByteArrayDataInput in = new ByteArrayDataInput();
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
values.get(docId, bytes);
|
||||
bytes = values.get(docId);
|
||||
in.reset(bytes.bytes, bytes.offset, bytes.length);
|
||||
if (bytes.length == 0) {
|
||||
return 0;
|
||||
|
@ -20,6 +20,7 @@
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
@ -56,7 +57,7 @@ public class BytesBinaryDVIndexFieldData extends DocValuesIndexFieldData impleme
|
||||
@Override
|
||||
public BytesBinaryDVAtomicFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return new BytesBinaryDVAtomicFieldData(context.reader().getBinaryDocValues(fieldNames.indexName()));
|
||||
return new BytesBinaryDVAtomicFieldData(DocValues.getBinary(context.reader(), fieldNames.indexName()));
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -87,9 +87,9 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -155,9 +155,9 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + RamUsageEstimator.sizeOf(set.getBits());
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -236,9 +236,9 @@ public abstract class DoubleArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
||||
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
|
||||
if (terms == null) {
|
||||
data = DoubleArrayAtomicFieldData.empty();
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
return data;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
@ -108,8 +108,8 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
||||
|
||||
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
|
||||
long singleValuesArraySize = reader.maxDoc() * RamUsageEstimator.NUM_BYTES_DOUBLE + (set == null ? 0 : RamUsageEstimator.sizeOf(set.getBits()) + RamUsageEstimator.NUM_BYTES_INT);
|
||||
long uniqueValuesArraySize = values.sizeInBytes();
|
||||
long ordinalsSize = build.getMemorySizeInBytes();
|
||||
long uniqueValuesArraySize = values.ramBytesUsed();
|
||||
long ordinalsSize = build.ramBytesUsed();
|
||||
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
||||
data = new DoubleArrayAtomicFieldData.WithOrdinals(values, build);
|
||||
success = true;
|
||||
@ -135,7 +135,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
||||
return data;
|
||||
} finally {
|
||||
if (success) {
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -52,11 +52,11 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
long size = ordinals.getMemorySizeInBytes();
|
||||
long size = ordinals.ramBytesUsed();
|
||||
// FST
|
||||
size += fst == null ? 0 : fst.sizeInBytes();
|
||||
size += fst == null ? 0 : fst.ramBytesUsed();
|
||||
this.size = size;
|
||||
}
|
||||
return size;
|
||||
|
@ -67,7 +67,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<AtomicFi
|
||||
// TODO: Use an actual estimator to estimate before loading.
|
||||
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
|
||||
if (terms == null) {
|
||||
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.ramBytesUsed());
|
||||
return AtomicFieldData.WithOrdinals.EMPTY;
|
||||
}
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
@ -106,7 +106,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<AtomicFi
|
||||
return data;
|
||||
} finally {
|
||||
if (success) {
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -86,9 +86,9 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -151,9 +151,9 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + RamUsageEstimator.sizeOf(set.getBits());
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -234,9 +234,9 @@ public abstract class FloatArrayAtomicFieldData extends AbstractAtomicNumericFie
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
|
||||
size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
||||
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
|
||||
if (terms == null) {
|
||||
data = FloatArrayAtomicFieldData.empty();
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
return data;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
@ -106,8 +106,8 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
||||
|
||||
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
|
||||
long singleValuesArraySize = reader.maxDoc() * RamUsageEstimator.NUM_BYTES_FLOAT + (set == null ? 0 : RamUsageEstimator.sizeOf(set.getBits()) + RamUsageEstimator.NUM_BYTES_INT);
|
||||
long uniqueValuesArraySize = values.sizeInBytes();
|
||||
long ordinalsSize = build.getMemorySizeInBytes();
|
||||
long uniqueValuesArraySize = values.ramBytesUsed();
|
||||
long ordinalsSize = build.ramBytesUsed();
|
||||
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
||||
data = new FloatArrayAtomicFieldData.WithOrdinals(values, build);
|
||||
success = true;
|
||||
@ -133,7 +133,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
||||
return data;
|
||||
} finally {
|
||||
if (success) {
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -20,7 +20,6 @@
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.common.util.ByteUtils;
|
||||
@ -34,11 +33,11 @@ final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData<Scri
|
||||
|
||||
GeoPointBinaryDVAtomicFieldData(BinaryDocValues values) {
|
||||
super();
|
||||
this.values = values == null ? DocValues.EMPTY_BINARY : values;
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return -1; // not exposed by Lucene
|
||||
}
|
||||
|
||||
@ -56,14 +55,14 @@ final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData<Scri
|
||||
public GeoPointValues getGeoPointValues() {
|
||||
return new GeoPointValues(true) {
|
||||
|
||||
final BytesRef bytes = new BytesRef();
|
||||
BytesRef bytes;
|
||||
int i = Integer.MAX_VALUE;
|
||||
int valueCount = 0;
|
||||
final GeoPoint point = new GeoPoint();
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
values.get(docId, bytes);
|
||||
bytes = values.get(docId);
|
||||
assert bytes.length % 16 == 0;
|
||||
i = 0;
|
||||
return valueCount = (bytes.length >>> 4);
|
||||
|
@ -20,18 +20,19 @@
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.FieldMapper.Names;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@ -54,7 +55,7 @@ public class GeoPointBinaryDVIndexFieldData extends DocValuesIndexFieldData impl
|
||||
@Override
|
||||
public AtomicGeoPointFieldData<ScriptDocValues> load(AtomicReaderContext context) {
|
||||
try {
|
||||
return new GeoPointBinaryDVAtomicFieldData(context.reader().getBinaryDocValues(fieldNames.indexName()));
|
||||
return new GeoPointBinaryDVAtomicFieldData(DocValues.getBinary(context.reader(), fieldNames.indexName()));
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ public abstract class GeoPointCompressedAtomicFieldData extends AtomicGeoPointFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed();
|
||||
}
|
||||
@ -120,7 +120,7 @@ public abstract class GeoPointCompressedAtomicFieldData extends AtomicGeoPointFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
|
||||
}
|
||||
@ -178,7 +178,7 @@ public abstract class GeoPointCompressedAtomicFieldData extends AtomicGeoPointFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + (lon.ramBytesUsed() + lat.ramBytesUsed());
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ public class GeoPointCompressedIndexFieldData extends AbstractGeoPointIndexField
|
||||
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
|
||||
if (terms == null) {
|
||||
data = new Empty();
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
return data;
|
||||
}
|
||||
final long initialSize;
|
||||
@ -147,7 +147,7 @@ public class GeoPointCompressedIndexFieldData extends AbstractGeoPointIndexField
|
||||
return data;
|
||||
} finally {
|
||||
if (success) {
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -56,9 +56,9 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.sizeInBytes() + lat.sizeInBytes();
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -112,9 +112,9 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.sizeInBytes() + lat.sizeInBytes() + RamUsageEstimator.sizeOf(set.getBits());
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed() + RamUsageEstimator.sizeOf(set.getBits());
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -167,9 +167,9 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + RamUsageEstimator.NUM_BYTES_INT/*numDocs*/ + (lon.sizeInBytes() + lat.sizeInBytes());
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + RamUsageEstimator.NUM_BYTES_INT/*numDocs*/ + (lon.ramBytesUsed() + lat.ramBytesUsed());
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractGeoPointIndexFiel
|
||||
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
|
||||
if (terms == null) {
|
||||
data = new Empty();
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
return data;
|
||||
}
|
||||
DoubleArray lat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
|
||||
@ -114,7 +114,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractGeoPointIndexFiel
|
||||
return data;
|
||||
} finally {
|
||||
if (success) {
|
||||
estimator.afterLoad(null, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, data.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ public class IndexIndexFieldData implements IndexFieldData.WithOrdinals<AtomicFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ public class NumericDVAtomicFieldData extends AbstractAtomicNumericFieldData {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
// TODO: cannot be computed from Lucene
|
||||
return -1;
|
||||
}
|
||||
@ -66,23 +66,13 @@ public class NumericDVAtomicFieldData extends AbstractAtomicNumericFieldData {
|
||||
}
|
||||
|
||||
private DocValuesAndBits getDocValues() {
|
||||
final NumericDocValues values;
|
||||
final Bits docsWithField;
|
||||
try {
|
||||
final NumericDocValues v = reader.getNumericDocValues(field);
|
||||
if (v == null) {
|
||||
// segment has no value
|
||||
values = DocValues.EMPTY_NUMERIC;
|
||||
docsWithField = new Bits.MatchNoBits(reader.maxDoc());
|
||||
} else {
|
||||
values = v;
|
||||
final Bits b = reader.getDocsWithField(field);
|
||||
docsWithField = b == null ? new Bits.MatchAllBits(reader.maxDoc()) : b;
|
||||
}
|
||||
final NumericDocValues values = DocValues.getNumeric(reader, field);
|
||||
final Bits docsWithField = DocValues.getDocsWithField(reader, field);
|
||||
return new DocValuesAndBits(values, docsWithField);
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||
}
|
||||
return new DocValuesAndBits(values, docsWithField);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -59,7 +59,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -86,9 +86,9 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.getMemorySizeInBytes();
|
||||
size = RamUsageEstimator.NUM_BYTES_INT/*size*/ + values.ramBytesUsed() + ordinals.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -156,7 +156,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = values.ramBytesUsed() + 2 * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
}
|
||||
@ -243,7 +243,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = values.ramBytesUsed();
|
||||
}
|
||||
@ -322,7 +322,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = values.ramBytesUsed();
|
||||
}
|
||||
@ -398,7 +398,7 @@ public abstract class PackedArrayAtomicFieldData extends AbstractAtomicNumericFi
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
size = values.ramBytesUsed() + 2 * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
}
|
||||
|
@ -101,7 +101,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
||||
PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType(), getFieldNames().fullName());
|
||||
if (terms == null) {
|
||||
data = PackedArrayAtomicFieldData.empty();
|
||||
estimator.adjustForNoTerms(data.getMemorySizeInBytes());
|
||||
estimator.adjustForNoTerms(data.ramBytesUsed());
|
||||
return data;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
@ -231,7 +231,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
||||
estimator.afterLoad(termsEnum, 0);
|
||||
} else {
|
||||
// Adjust as usual, based on the actual size of the field data
|
||||
estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(termsEnum, data.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
@ -251,7 +251,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
||||
final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
|
||||
|
||||
// ordinal memory usage
|
||||
final long ordinalsSize = build.getMemorySizeInBytes() + values.ramBytesUsed();
|
||||
final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed();
|
||||
|
||||
// estimate the memory signature of paged packing
|
||||
long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages
|
||||
|
@ -49,9 +49,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
if (size == -1) {
|
||||
long size = ordinals.getMemorySizeInBytes();
|
||||
long size = ordinals.ramBytesUsed();
|
||||
// PackedBytes
|
||||
size += readerBytesSize;
|
||||
// PackedInts
|
||||
|
@ -18,7 +18,8 @@
|
||||
*/
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.FieldReader;
|
||||
import org.apache.lucene.codecs.blocktree.Stats;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
@ -65,7 +66,7 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<Atomic
|
||||
PagedBytesEstimator estimator = new PagedBytesEstimator(context, breakerService.getBreaker(), getFieldNames().fullName());
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.getMemorySizeInBytes());
|
||||
estimator.afterLoad(null, AtomicFieldData.WithOrdinals.EMPTY.ramBytesUsed());
|
||||
return AtomicFieldData.WithOrdinals.EMPTY;
|
||||
}
|
||||
|
||||
@ -115,7 +116,7 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<Atomic
|
||||
estimator.afterLoad(termsEnum, 0);
|
||||
} else {
|
||||
// Call .afterLoad() to adjust the breaker now that we have an exact size
|
||||
estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(termsEnum, data.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
@ -165,8 +166,8 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<Atomic
|
||||
Fields fields = reader.fields();
|
||||
final Terms fieldTerms = fields.terms(getFieldNames().indexName());
|
||||
|
||||
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
|
||||
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
|
||||
if (fieldTerms instanceof FieldReader) {
|
||||
final Stats stats = ((FieldReader) fieldTerms).computeStats();
|
||||
long totalTermBytes = stats.totalTermBytes;
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
|
||||
|
@ -37,13 +37,13 @@ public class ParentChildAtomicFieldData implements AtomicFieldData {
|
||||
this.typeToIds = typeToIds;
|
||||
long size = 0;
|
||||
for (ObjectCursor<PagedBytesAtomicFieldData> cursor : typeToIds.values()) {
|
||||
size += cursor.value.getMemorySizeInBytes();
|
||||
size += cursor.value.ramBytesUsed();
|
||||
}
|
||||
this.memorySizeInBytes = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
return memorySizeInBytes;
|
||||
}
|
||||
|
||||
|
@ -152,7 +152,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<ParentChil
|
||||
return data;
|
||||
} finally {
|
||||
if (success) {
|
||||
estimator.afterLoad(estimatedTermsEnum, data.getMemorySizeInBytes());
|
||||
estimator.afterLoad(estimatedTermsEnum, data.ramBytesUsed());
|
||||
} else {
|
||||
estimator.afterLoad(estimatedTermsEnum, 0);
|
||||
}
|
||||
@ -309,7 +309,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<ParentChil
|
||||
PerType perType = new PerType(parentType.utf8ToString());
|
||||
GlobalOrdinalsIndexFieldData globalIfd = (GlobalOrdinalsIndexFieldData) globalOrdinalsBuilder.build(indexReader, perType, indexSettings, breakerService);
|
||||
globalIfdPerType.put(perType.type, globalIfd);
|
||||
memorySizeInBytes += globalIfd.getMemorySizeInBytes();
|
||||
memorySizeInBytes += globalIfd.ramBytesUsed();
|
||||
}
|
||||
return new ParentChildGlobalOrdinalsIndexFieldData(globalIfdPerType.build(), memorySizeInBytes);
|
||||
}
|
||||
|
@ -19,10 +19,7 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
@ -40,24 +37,26 @@ abstract class SortedSetDVAtomicFieldData {
|
||||
|
||||
private final AtomicReader reader;
|
||||
private final String field;
|
||||
private final boolean multiValued;
|
||||
private final long valueCount;
|
||||
|
||||
SortedSetDVAtomicFieldData(AtomicReader reader, String field) {
|
||||
this.reader = reader;
|
||||
this.field = field;
|
||||
SortedSetDocValues dv = getValuesNoException(reader, field);
|
||||
this.multiValued = DocValues.unwrapSingleton(dv) == null;
|
||||
this.valueCount = dv.getValueCount();
|
||||
}
|
||||
|
||||
public boolean isMultiValued() {
|
||||
// we could compute it when loading the values for the first time and then cache it but it would defeat the point of
|
||||
// doc values which is to make loading faster
|
||||
return true;
|
||||
return multiValued;
|
||||
}
|
||||
|
||||
public long getNumberUniqueValues() {
|
||||
final SortedSetDocValues values = getValuesNoException(reader, field);
|
||||
return values.getValueCount();
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
public long getMemorySizeInBytes() {
|
||||
public long ramBytesUsed() {
|
||||
// There is no API to access memory usage per-field and RamUsageEstimator can't help since there are often references
|
||||
// from a per-field instance to all other instances handled by the same format
|
||||
return -1L;
|
||||
@ -69,7 +68,11 @@ abstract class SortedSetDVAtomicFieldData {
|
||||
|
||||
public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getBytesValues() {
|
||||
final SortedSetDocValues values = getValuesNoException(reader, field);
|
||||
return new SortedSetValues(values);
|
||||
if (values instanceof RandomAccessOrds) {
|
||||
return new RandomAccessSortedSetValues((RandomAccessOrds)values, multiValued);
|
||||
} else {
|
||||
return new SortedSetValues(values, multiValued);
|
||||
}
|
||||
}
|
||||
|
||||
public TermsEnum getTermsEnum() {
|
||||
@ -78,27 +81,58 @@ abstract class SortedSetDVAtomicFieldData {
|
||||
|
||||
private static SortedSetDocValues getValuesNoException(AtomicReader reader, String field) {
|
||||
try {
|
||||
SortedSetDocValues values = reader.getSortedSetDocValues(field);
|
||||
if (values == null) {
|
||||
// This field has not been populated
|
||||
assert reader.getFieldInfos().fieldInfo(field) == null;
|
||||
values = DocValues.EMPTY_SORTED_SET;
|
||||
}
|
||||
return values;
|
||||
return DocValues.getSortedSet(reader, field);
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchIllegalStateException("Couldn't load doc values", e);
|
||||
}
|
||||
}
|
||||
|
||||
private final static class RandomAccessSortedSetValues extends BytesValues.WithOrdinals {
|
||||
private final RandomAccessOrds values;
|
||||
private int index = 0;
|
||||
|
||||
RandomAccessSortedSetValues(RandomAccessOrds values, boolean multiValued) {
|
||||
super(multiValued);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
static class SortedSetValues extends BytesValues.WithOrdinals {
|
||||
@Override
|
||||
public long getMaxOrd() {
|
||||
return values.getValueCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getOrd(int docId) {
|
||||
values.setDocument(docId);
|
||||
return values.nextOrd();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
return values.ordAt(index++);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueByOrd(long ord) {
|
||||
return values.lookupOrd(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
values.setDocument(docId);
|
||||
index = 0;
|
||||
return values.cardinality();
|
||||
}
|
||||
}
|
||||
|
||||
private final static class SortedSetValues extends BytesValues.WithOrdinals {
|
||||
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private final SortedSetDocValues values;
|
||||
private long[] ords;
|
||||
private int ordIndex = Integer.MAX_VALUE;
|
||||
|
||||
SortedSetValues(SortedSetDocValues values) {
|
||||
super(DocValues.unwrapSingleton(values) == null);
|
||||
SortedSetValues(SortedSetDocValues values, boolean multiValued) {
|
||||
super(multiValued);
|
||||
this.values = values;
|
||||
ords = new long[0];
|
||||
}
|
||||
@ -136,8 +170,7 @@ abstract class SortedSetDVAtomicFieldData {
|
||||
|
||||
@Override
|
||||
public BytesRef getValueByOrd(long ord) {
|
||||
values.lookupOrd(ord, scratch);
|
||||
return scratch;
|
||||
return values.lookupOrd(ord);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
import org.apache.lucene.search.Filter;
|
||||
@ -222,7 +221,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper<Boolean> {
|
||||
if (value == null) {
|
||||
return;
|
||||
}
|
||||
fields.add(new XStringField(names.indexName(), value ? "T" : "F", fieldType));
|
||||
fields.add(new Field(names.indexName(), value ? "T" : "F", fieldType));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -372,7 +372,7 @@ public class ByteFieldMapper extends NumberFieldMapper<Byte> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
|
||||
if (fieldType().indexed()) {
|
||||
return mapper.popCachedStream().setIntValue(number);
|
||||
}
|
||||
|
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
@ -47,7 +46,6 @@ import org.elasticsearch.search.suggest.context.ContextMapping;
|
||||
import org.elasticsearch.search.suggest.context.ContextMapping.ContextConfig;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.index.mapper.MapperBuilders.completionField;
|
||||
@ -388,7 +386,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
|
||||
surfaceForm, weight, payload);
|
||||
}
|
||||
|
||||
private static final class SuggestField extends XStringField {
|
||||
private static final class SuggestField extends Field {
|
||||
private final BytesRef payload;
|
||||
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
|
||||
private final ContextMapping.Context ctx;
|
||||
@ -401,8 +399,8 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer));
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer, previous));
|
||||
return new CompletionTokenStream(ts, payload, toFiniteStrings);
|
||||
}
|
||||
}
|
||||
|
@ -375,7 +375,7 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
if (fieldType().indexed()) {
|
||||
return mapper.popCachedStream().setDoubleValue(number);
|
||||
}
|
||||
|
@ -381,7 +381,7 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
if (fieldType().indexed()) {
|
||||
return mapper.popCachedStream().setFloatValue(number);
|
||||
}
|
||||
|
@ -376,7 +376,7 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
if (fieldType().indexed()) {
|
||||
return mapper.popCachedStream().setIntValue(number);
|
||||
}
|
||||
|
@ -357,7 +357,7 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
if (fieldType().indexed()) {
|
||||
return mapper.popCachedStream().setLongValue(number);
|
||||
}
|
||||
|
@ -459,7 +459,7 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -374,7 +374,7 @@ public class ShortFieldMapper extends NumberFieldMapper<Short> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
if (fieldType().indexed()) {
|
||||
return mapper.popCachedStream().setIntValue(number);
|
||||
}
|
||||
|
@ -20,13 +20,9 @@
|
||||
package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
@ -287,7 +283,7 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
|
||||
}
|
||||
|
||||
if (fieldType.indexed() || fieldType.stored()) {
|
||||
Field field = new XStringField(names.indexName(), valueAndBoost.value(), fieldType);
|
||||
Field field = new Field(names.indexName(), valueAndBoost.value(), fieldType);
|
||||
field.setBoost(valueAndBoost.boost());
|
||||
fields.add(field);
|
||||
}
|
||||
|
@ -24,7 +24,6 @@ import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||
import com.google.common.base.Objects;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
@ -571,7 +570,7 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
|
||||
}
|
||||
|
||||
if (fieldType.indexed() || fieldType.stored()) {
|
||||
Field field = new XStringField(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
|
||||
Field field = new Field(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
|
||||
context.doc().add(field);
|
||||
}
|
||||
if (enableGeoHash) {
|
||||
|
@ -23,7 +23,6 @@ import com.google.common.collect.UnmodifiableIterator;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
@ -221,7 +220,7 @@ public class FieldNamesFieldMapper extends AbstractFieldMapper<String> implement
|
||||
for (String path : paths) {
|
||||
for (String fieldName : extractFieldNames(path)) {
|
||||
if (fieldType.indexed() || fieldType.stored()) {
|
||||
document.add(new XStringField(names().indexName(), fieldName, fieldType));
|
||||
document.add(new Field(names().indexName(), fieldName, fieldType));
|
||||
}
|
||||
if (hasDocValues()) {
|
||||
document.add(new SortedSetDocValuesField(names().indexName(), new BytesRef(fieldName)));
|
||||
|
@ -23,7 +23,6 @@ import com.google.common.collect.Iterables;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
@ -310,7 +309,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
||||
} // else we are in the pre/post parse phase
|
||||
|
||||
if (fieldType.indexed() || fieldType.stored()) {
|
||||
fields.add(new XStringField(names.indexName(), context.id(), fieldType));
|
||||
fields.add(new Field(names.indexName(), context.id(), fieldType));
|
||||
}
|
||||
if (hasDocValues()) {
|
||||
fields.add(new BinaryDocValuesField(names.indexName(), new BytesRef(context.id())));
|
||||
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
@ -182,7 +181,7 @@ public class IndexFieldMapper extends AbstractFieldMapper<String> implements Int
|
||||
if (!enabledState.enabled) {
|
||||
return;
|
||||
}
|
||||
fields.add(new XStringField(names.indexName(), context.index(), fieldType));
|
||||
fields.add(new Field(names.indexName(), context.index(), fieldType));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -20,7 +20,6 @@ package org.elasticsearch.index.mapper.internal;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
@ -185,7 +184,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
|
||||
// we are in the parsing of _parent phase
|
||||
String parentId = context.parser().text();
|
||||
context.sourceToParse().parent(parentId);
|
||||
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
} else {
|
||||
// otherwise, we are running it post processing of the xcontent
|
||||
String parsedParentId = context.doc().get(Defaults.NAME);
|
||||
@ -196,7 +195,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
|
||||
throw new MapperParsingException("No parent id provided, not within the document, and not externally");
|
||||
}
|
||||
// we did not add it in the parsing phase, add it now
|
||||
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
} else if (parentId != null && !parsedParentId.equals(Uid.createUid(context.stringBuilder(), type, parentId))) {
|
||||
throw new MapperParsingException("Parent id mismatch, document value is [" + Uid.createUid(parsedParentId).id() + "], while external value is [" + parentId + "]");
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
@ -201,7 +200,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper<String> implements I
|
||||
context.ignoredValue(names.indexName(), routing);
|
||||
return;
|
||||
}
|
||||
fields.add(new XStringField(names.indexName(), routing, fieldType));
|
||||
fields.add(new Field(names.indexName(), routing, fieldType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
@ -178,7 +177,7 @@ public class TypeFieldMapper extends AbstractFieldMapper<String> implements Inte
|
||||
if (!fieldType.indexed() && !fieldType.stored()) {
|
||||
return;
|
||||
}
|
||||
fields.add(new XStringField(names.indexName(), context.type(), fieldType));
|
||||
fields.add(new Field(names.indexName(), context.type(), fieldType));
|
||||
if (hasDocValues()) {
|
||||
fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(context.type())));
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.mapper.internal;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
@ -154,7 +153,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
|
||||
// we need to go over the docs and add it...
|
||||
for (int i = 1; i < context.docs().size(); i++) {
|
||||
final Document doc = context.docs().get(i);
|
||||
doc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
|
||||
doc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -172,7 +171,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
|
||||
|
||||
@Override
|
||||
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
|
||||
Field uid = new XStringField(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
|
||||
Field uid = new Field(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
|
||||
context.uid(uid);
|
||||
fields.add(uid);
|
||||
if (hasDocValues()) {
|
||||
|
@ -20,7 +20,7 @@
|
||||
package org.elasticsearch.index.mapper.object;
|
||||
|
||||
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
@ -453,12 +453,12 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll {
|
||||
// we also rely on this for UidField#loadVersion
|
||||
|
||||
// this is a deeply nested field
|
||||
nestedDoc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
|
||||
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
|
||||
}
|
||||
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
|
||||
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
|
||||
// across types (for example, with similar nested objects)
|
||||
nestedDoc.add(new XStringField(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
|
||||
nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
|
||||
restoreDoc = context.switchDoc(nestedDoc);
|
||||
context.addDoc(nestedDoc);
|
||||
}
|
||||
|
@ -51,7 +51,6 @@ import java.util.Map;
|
||||
* For now, this {@link MergePolicy} takes care of moving versions that used to
|
||||
* be stored as payloads to numeric doc values.
|
||||
*/
|
||||
@SuppressWarnings("PMD.ProperCloneImplementation")
|
||||
public final class ElasticsearchMergePolicy extends MergePolicy {
|
||||
|
||||
private final MergePolicy delegate;
|
||||
@ -105,11 +104,11 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
|
||||
fieldNumber = Math.max(fieldNumber, fi.number + 1);
|
||||
}
|
||||
newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, false, fieldNumber, false, true, false,
|
||||
IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC, Collections.<String, String>emptyMap());
|
||||
IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC, -1, Collections.<String, String>emptyMap());
|
||||
} else {
|
||||
newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, versionInfo.isIndexed(), versionInfo.number,
|
||||
versionInfo.hasVectors(), versionInfo.omitsNorms(), versionInfo.hasPayloads(),
|
||||
versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(), versionInfo.attributes());
|
||||
versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(), versionInfo.getDocValuesGen(), versionInfo.attributes());
|
||||
}
|
||||
final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>();
|
||||
for (FieldInfo info : fieldInfos) {
|
||||
@ -189,13 +188,13 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
|
||||
|
||||
@Override
|
||||
public MergeSpecification findMerges(MergeTrigger mergeTrigger,
|
||||
SegmentInfos segmentInfos) throws IOException {
|
||||
return upgradedMergeSpecification(delegate.findMerges(mergeTrigger, segmentInfos));
|
||||
SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
|
||||
return upgradedMergeSpecification(delegate.findMerges(mergeTrigger, segmentInfos, writer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
|
||||
int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge)
|
||||
int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer)
|
||||
throws IOException {
|
||||
if (force) {
|
||||
List<SegmentCommitInfo> segments = Lists.newArrayList();
|
||||
@ -210,18 +209,13 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
|
||||
return spec;
|
||||
}
|
||||
}
|
||||
return upgradedMergeSpecification(delegate.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge));
|
||||
return upgradedMergeSpecification(delegate.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos)
|
||||
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer)
|
||||
throws IOException {
|
||||
return upgradedMergeSpecification(delegate.findForcedDeletesMerges(segmentInfos));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MergePolicy clone() {
|
||||
return new ElasticsearchMergePolicy(delegate.clone());
|
||||
return upgradedMergeSpecification(delegate.findForcedDeletesMerges(segmentInfos, writer));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -230,14 +224,8 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean useCompoundFile(SegmentInfos segments,
|
||||
SegmentCommitInfo newSegment) throws IOException {
|
||||
return delegate.useCompoundFile(segments, newSegment);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setIndexWriter(IndexWriter writer) {
|
||||
delegate.setIndexWriter(writer);
|
||||
public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, IndexWriter writer) throws IOException {
|
||||
return delegate.useCompoundFile(segments, newSegment, writer);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -20,7 +20,6 @@
|
||||
package org.elasticsearch.index.merge.policy;
|
||||
|
||||
import org.apache.lucene.index.LogByteSizeMergePolicy;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.Preconditions;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
@ -157,13 +156,6 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
|
||||
super.close();
|
||||
provider.policies.remove(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MergePolicy clone() {
|
||||
// Lucene IW makes a clone internally but since we hold on to this instance
|
||||
// the clone will just be the identity.
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -19,7 +19,6 @@
|
||||
|
||||
package org.elasticsearch.index.merge.policy;
|
||||
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
@ -205,12 +204,5 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
|
||||
super.close();
|
||||
provider.policies.remove(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MergePolicy clone() {
|
||||
// Lucene IW makes a clone internally but since we hold on to this instance
|
||||
// the clone will just be the identity.
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
@ -173,11 +173,6 @@ public class ScriptFilterParser implements FilterParser {
|
||||
this.searchScript = searchScript;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDoc(int doc) {
|
||||
searchScript.setNextDocId(doc);
|
||||
|
@ -315,11 +315,6 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
|
||||
this.inclusiveUpperPoint = inclusiveUpperPoint;
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDoc(int doc) {
|
||||
@ -346,11 +341,6 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
|
||||
this.inclusiveUpperPoint = inclusiveUpperPoint;
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDoc(int doc) {
|
||||
|
@ -172,11 +172,6 @@ public class GeoDistanceFilter extends Filter {
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDoc(int doc) {
|
||||
|
||||
|
@ -188,11 +188,6 @@ public class GeoDistanceRangeFilter extends Filter {
|
||||
this.inclusiveUpperPoint = inclusiveUpperPoint;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchDoc(int doc) {
|
||||
final int length = values.setDocument(doc);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user