mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-09 06:25:07 +00:00
Simplify class hierarchy for ordinals field data. (#60606)
This PR simplifies the hierarchy for ordinals field data classes: * Remove `AbstractIndexFieldData`, since only `AbstractIndexOrdinalsFieldData` inherits directly from it. * Make `SortedSetOrdinalsIndexFieldData` extend `AbstractIndexOrdinalsFieldData`. This lets us remove some redundant code.
This commit is contained in:
parent
c5f4f91ac4
commit
fc63f8224f
@ -22,7 +22,7 @@ import org.apache.lucene.index.FilteredTermsEnum;
|
|||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.common.breaker.CircuitBreaker;
|
import org.elasticsearch.common.breaker.CircuitBreaker;
|
||||||
import org.elasticsearch.index.fielddata.plain.AbstractIndexFieldData;
|
import org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
@ -38,13 +38,14 @@ public final class RamAccountingTermsEnum extends FilteredTermsEnum {
|
|||||||
|
|
||||||
private final CircuitBreaker breaker;
|
private final CircuitBreaker breaker;
|
||||||
private final TermsEnum termsEnum;
|
private final TermsEnum termsEnum;
|
||||||
private final AbstractIndexFieldData.PerValueEstimator estimator;
|
private final AbstractIndexOrdinalsFieldData.PerValueEstimator estimator;
|
||||||
private final String fieldName;
|
private final String fieldName;
|
||||||
private long totalBytes;
|
private long totalBytes;
|
||||||
private long flushBuffer;
|
private long flushBuffer;
|
||||||
|
|
||||||
|
|
||||||
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker, AbstractIndexFieldData.PerValueEstimator estimator,
|
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker,
|
||||||
|
AbstractIndexOrdinalsFieldData.PerValueEstimator estimator,
|
||||||
String fieldName) {
|
String fieldName) {
|
||||||
super(termsEnum);
|
super(termsEnum);
|
||||||
this.breaker = breaker;
|
this.breaker = breaker;
|
||||||
|
@ -1,125 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to Elasticsearch under one or more contributor
|
|
||||||
* license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright
|
|
||||||
* ownership. Elasticsearch licenses this file to you under
|
|
||||||
* the Apache License, Version 2.0 (the "License"); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing,
|
|
||||||
* software distributed under the License is distributed on an
|
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
* KIND, either express or implied. See the License for the
|
|
||||||
* specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.elasticsearch.ElasticsearchException;
|
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
|
||||||
import org.elasticsearch.index.fielddata.LeafFieldData;
|
|
||||||
import org.elasticsearch.index.fielddata.RamAccountingTermsEnum;
|
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
public abstract class AbstractIndexFieldData<FD extends LeafFieldData> implements IndexFieldData<FD> {
|
|
||||||
|
|
||||||
private final String fieldName;
|
|
||||||
private ValuesSourceType valuesSourceType;
|
|
||||||
protected final IndexFieldDataCache cache;
|
|
||||||
|
|
||||||
public AbstractIndexFieldData(
|
|
||||||
String fieldName,
|
|
||||||
ValuesSourceType valuesSourceType,
|
|
||||||
IndexFieldDataCache cache
|
|
||||||
) {
|
|
||||||
this.fieldName = fieldName;
|
|
||||||
this.valuesSourceType = valuesSourceType;
|
|
||||||
this.cache = cache;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getFieldName() {
|
|
||||||
return this.fieldName;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ValuesSourceType getValuesSourceType() {
|
|
||||||
return valuesSourceType;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FD load(LeafReaderContext context) {
|
|
||||||
if (context.reader().getFieldInfos().fieldInfo(fieldName) == null) {
|
|
||||||
// Some leaf readers may be wrapped and report different set of fields and use the same cache key.
|
|
||||||
// If a field can't be found then it doesn't mean it isn't there,
|
|
||||||
// so if a field doesn't exist then we don't cache it and just return an empty field data instance.
|
|
||||||
// The next time the field is found, we do cache.
|
|
||||||
return empty(context.reader().maxDoc());
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
FD fd = cache.load(context, this);
|
|
||||||
return fd;
|
|
||||||
} catch (Exception e) {
|
|
||||||
if (e instanceof ElasticsearchException) {
|
|
||||||
throw (ElasticsearchException) e;
|
|
||||||
} else {
|
|
||||||
throw new ElasticsearchException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param maxDoc of the current reader
|
|
||||||
* @return an empty field data instances for field data lookups of empty segments (returning no values)
|
|
||||||
*/
|
|
||||||
protected abstract FD empty(int maxDoc);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@code PerValueEstimator} is a sub-class that can be used to estimate
|
|
||||||
* the memory overhead for loading the data. Each field data
|
|
||||||
* implementation should implement its own {@code PerValueEstimator} if it
|
|
||||||
* intends to take advantage of the CircuitBreaker.
|
|
||||||
* <p>
|
|
||||||
* Note that the .beforeLoad(...) and .afterLoad(...) methods must be
|
|
||||||
* manually called.
|
|
||||||
*/
|
|
||||||
public interface PerValueEstimator {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the number of bytes for the given term
|
|
||||||
*/
|
|
||||||
long bytesPerValue(BytesRef term);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Execute any pre-loading estimations for the terms. May also
|
|
||||||
* optionally wrap a {@link TermsEnum} in a
|
|
||||||
* {@link RamAccountingTermsEnum}
|
|
||||||
* which will estimate the memory on a per-term basis.
|
|
||||||
*
|
|
||||||
* @param terms terms to be estimated
|
|
||||||
* @return A TermsEnum for the given terms
|
|
||||||
*/
|
|
||||||
TermsEnum beforeLoad(Terms terms) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Possibly adjust a circuit breaker after field data has been loaded,
|
|
||||||
* now that the actual amount of memory used by the field data is known
|
|
||||||
*
|
|
||||||
* @param termsEnum terms that were loaded
|
|
||||||
* @param actualUsed actual field data memory usage
|
|
||||||
*/
|
|
||||||
void afterLoad(TermsEnum termsEnum, long actualUsed);
|
|
||||||
}
|
|
||||||
}
|
|
@ -21,10 +21,9 @@ package org.elasticsearch.index.fielddata.plain;
|
|||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
|
||||||
import org.apache.lucene.index.LeafReader;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.OrdinalMap;
|
import org.apache.lucene.index.OrdinalMap;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
@ -32,35 +31,47 @@ import org.elasticsearch.ElasticsearchException;
|
|||||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||||
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
||||||
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
|
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
|
||||||
|
import org.elasticsearch.index.fielddata.RamAccountingTermsEnum;
|
||||||
|
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
|
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
|
||||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldData<LeafOrdinalsFieldData>
|
public abstract class AbstractIndexOrdinalsFieldData implements IndexOrdinalsFieldData {
|
||||||
implements IndexOrdinalsFieldData {
|
|
||||||
private static final Logger logger = LogManager.getLogger(AbstractBinaryDVLeafFieldData.class);
|
private static final Logger logger = LogManager.getLogger(AbstractBinaryDVLeafFieldData.class);
|
||||||
|
|
||||||
private final double minFrequency, maxFrequency;
|
private final String fieldName;
|
||||||
private final int minSegmentSize;
|
private final ValuesSourceType valuesSourceType;
|
||||||
|
private final IndexFieldDataCache cache;
|
||||||
protected final CircuitBreakerService breakerService;
|
protected final CircuitBreakerService breakerService;
|
||||||
|
protected final Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction;
|
||||||
|
|
||||||
protected AbstractIndexOrdinalsFieldData(
|
protected AbstractIndexOrdinalsFieldData(
|
||||||
String fieldName,
|
String fieldName,
|
||||||
ValuesSourceType valuesSourceType,
|
ValuesSourceType valuesSourceType,
|
||||||
IndexFieldDataCache cache,
|
IndexFieldDataCache cache,
|
||||||
CircuitBreakerService breakerService,
|
CircuitBreakerService breakerService,
|
||||||
double minFrequency,
|
Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction
|
||||||
double maxFrequency,
|
|
||||||
int minSegmentSize
|
|
||||||
) {
|
) {
|
||||||
super(fieldName, valuesSourceType, cache);
|
this.fieldName = fieldName;
|
||||||
|
this.valuesSourceType = valuesSourceType;
|
||||||
|
this.cache = cache;
|
||||||
this.breakerService = breakerService;
|
this.breakerService = breakerService;
|
||||||
this.minFrequency = minFrequency;
|
this.scriptFunction = scriptFunction;
|
||||||
this.maxFrequency = maxFrequency;
|
}
|
||||||
this.minSegmentSize = minSegmentSize;
|
|
||||||
|
@Override
|
||||||
|
public String getFieldName() {
|
||||||
|
return this.fieldName;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesSourceType getValuesSourceType() {
|
||||||
|
return valuesSourceType;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -68,6 +79,27 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafOrdinalsFieldData load(LeafReaderContext context) {
|
||||||
|
if (context.reader().getFieldInfos().fieldInfo(fieldName) == null) {
|
||||||
|
// Some leaf readers may be wrapped and report different set of fields and use the same cache key.
|
||||||
|
// If a field can't be found then it doesn't mean it isn't there,
|
||||||
|
// so if a field doesn't exist then we don't cache it and just return an empty field data instance.
|
||||||
|
// The next time the field is found, we do cache.
|
||||||
|
return AbstractLeafOrdinalsFieldData.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return cache.load(context, this);
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (e instanceof ElasticsearchException) {
|
||||||
|
throw (ElasticsearchException) e;
|
||||||
|
} else {
|
||||||
|
throw new ElasticsearchException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
||||||
IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader);
|
IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader);
|
||||||
@ -121,60 +153,49 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
|
|||||||
this,
|
this,
|
||||||
breakerService,
|
breakerService,
|
||||||
logger,
|
logger,
|
||||||
AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION
|
scriptFunction
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected LeafOrdinalsFieldData empty(int maxDoc) {
|
|
||||||
return AbstractLeafOrdinalsFieldData.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
|
|
||||||
if (iterator == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
int docCount = terms.getDocCount();
|
|
||||||
if (docCount == -1) {
|
|
||||||
docCount = reader.maxDoc();
|
|
||||||
}
|
|
||||||
if (docCount >= minSegmentSize) {
|
|
||||||
final int minFreq = minFrequency > 1.0
|
|
||||||
? (int) minFrequency
|
|
||||||
: (int)(docCount * minFrequency);
|
|
||||||
final int maxFreq = maxFrequency > 1.0
|
|
||||||
? (int) maxFrequency
|
|
||||||
: (int)(docCount * maxFrequency);
|
|
||||||
if (minFreq > 1 || maxFreq < docCount) {
|
|
||||||
iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return iterator;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean supportsGlobalOrdinalsMapping() {
|
public boolean supportsGlobalOrdinalsMapping() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class FrequencyFilter extends FilteredTermsEnum {
|
/**
|
||||||
|
* A {@code PerValueEstimator} is a sub-class that can be used to estimate
|
||||||
|
* the memory overhead for loading the data. Each field data
|
||||||
|
* implementation should implement its own {@code PerValueEstimator} if it
|
||||||
|
* intends to take advantage of the CircuitBreaker.
|
||||||
|
* <p>
|
||||||
|
* Note that the .beforeLoad(...) and .afterLoad(...) methods must be
|
||||||
|
* manually called.
|
||||||
|
*/
|
||||||
|
public interface PerValueEstimator {
|
||||||
|
|
||||||
private int minFreq;
|
/**
|
||||||
private int maxFreq;
|
* @return the number of bytes for the given term
|
||||||
FrequencyFilter(TermsEnum delegate, int minFreq, int maxFreq) {
|
*/
|
||||||
super(delegate, false);
|
long bytesPerValue(BytesRef term);
|
||||||
this.minFreq = minFreq;
|
|
||||||
this.maxFreq = maxFreq;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
protected AcceptStatus accept(BytesRef arg0) throws IOException {
|
* Execute any pre-loading estimations for the terms. May also
|
||||||
int docFreq = docFreq();
|
* optionally wrap a {@link TermsEnum} in a
|
||||||
if (docFreq >= minFreq && docFreq <= maxFreq) {
|
* {@link RamAccountingTermsEnum}
|
||||||
return AcceptStatus.YES;
|
* which will estimate the memory on a per-term basis.
|
||||||
}
|
*
|
||||||
return AcceptStatus.NO;
|
* @param terms terms to be estimated
|
||||||
}
|
* @return A TermsEnum for the given terms
|
||||||
|
*/
|
||||||
|
TermsEnum beforeLoad(Terms terms) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Possibly adjust a circuit breaker after field data has been loaded,
|
||||||
|
* now that the actual amount of memory used by the field data is known
|
||||||
|
*
|
||||||
|
* @param termsEnum terms that were loaded
|
||||||
|
* @param actualUsed actual field data memory usage
|
||||||
|
*/
|
||||||
|
void afterLoad(TermsEnum termsEnum, long actualUsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -37,7 +37,6 @@ import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
|||||||
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
|
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
|
||||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.index.mapper.TextFieldMapper;
|
|
||||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||||
import org.elasticsearch.search.DocValueFormat;
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
import org.elasticsearch.search.MultiValueMode;
|
import org.elasticsearch.search.MultiValueMode;
|
||||||
@ -139,10 +138,7 @@ public class ConstantIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
|||||||
private final ConstantLeafFieldData atomicFieldData;
|
private final ConstantLeafFieldData atomicFieldData;
|
||||||
|
|
||||||
private ConstantIndexFieldData(String name, String value, ValuesSourceType valuesSourceType) {
|
private ConstantIndexFieldData(String name, String value, ValuesSourceType valuesSourceType) {
|
||||||
super(name, valuesSourceType, null, null,
|
super(name, valuesSourceType, null, null, AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION);
|
||||||
TextFieldMapper.Defaults.FIELDDATA_MIN_FREQUENCY,
|
|
||||||
TextFieldMapper.Defaults.FIELDDATA_MAX_FREQUENCY,
|
|
||||||
TextFieldMapper.Defaults.FIELDDATA_MIN_SEGMENT_SIZE);
|
|
||||||
atomicFieldData = new ConstantLeafFieldData(value);
|
atomicFieldData = new ConstantLeafFieldData(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ import org.apache.logging.log4j.LogManager;
|
|||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.apache.lucene.codecs.blocktree.FieldReader;
|
import org.apache.lucene.codecs.blocktree.FieldReader;
|
||||||
import org.apache.lucene.codecs.blocktree.Stats;
|
import org.apache.lucene.codecs.blocktree.Stats;
|
||||||
|
import org.apache.lucene.index.FilteredTermsEnum;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
@ -58,6 +59,9 @@ import java.io.IOException;
|
|||||||
public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
||||||
private static final Logger logger = LogManager.getLogger(PagedBytesIndexFieldData.class);
|
private static final Logger logger = LogManager.getLogger(PagedBytesIndexFieldData.class);
|
||||||
|
|
||||||
|
private final double minFrequency, maxFrequency;
|
||||||
|
private final int minSegmentSize;
|
||||||
|
|
||||||
public static class Builder implements IndexFieldData.Builder {
|
public static class Builder implements IndexFieldData.Builder {
|
||||||
private final String name;
|
private final String name;
|
||||||
private final double minFrequency, maxFrequency;
|
private final double minFrequency, maxFrequency;
|
||||||
@ -88,7 +92,10 @@ public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
|||||||
double maxFrequency,
|
double maxFrequency,
|
||||||
int minSegmentSize
|
int minSegmentSize
|
||||||
) {
|
) {
|
||||||
super(fieldName, valuesSourceType, cache, breakerService, minFrequency, maxFrequency, minSegmentSize);
|
super(fieldName, valuesSourceType, cache, breakerService, AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION);
|
||||||
|
this.minFrequency = minFrequency;
|
||||||
|
this.maxFrequency = maxFrequency;
|
||||||
|
this.minSegmentSize = minSegmentSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -255,6 +262,28 @@ public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
|
||||||
|
if (iterator == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
int docCount = terms.getDocCount();
|
||||||
|
if (docCount == -1) {
|
||||||
|
docCount = reader.maxDoc();
|
||||||
|
}
|
||||||
|
if (docCount >= minSegmentSize) {
|
||||||
|
final int minFreq = minFrequency > 1.0
|
||||||
|
? (int) minFrequency
|
||||||
|
: (int)(docCount * minFrequency);
|
||||||
|
final int maxFreq = maxFrequency > 1.0
|
||||||
|
? (int) maxFrequency
|
||||||
|
: (int)(docCount * maxFrequency);
|
||||||
|
if (minFreq > 1 || maxFreq < docCount) {
|
||||||
|
iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return iterator;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adjust the circuit breaker now that terms have been loaded, getting
|
* Adjust the circuit breaker now that terms have been loaded, getting
|
||||||
* the actual used either from the parameter (if estimation worked for
|
* the actual used either from the parameter (if estimation worked for
|
||||||
@ -271,6 +300,25 @@ public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
|||||||
}
|
}
|
||||||
breaker.addWithoutBreaking(-(estimatedBytes - actualUsed));
|
breaker.addWithoutBreaking(-(estimatedBytes - actualUsed));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class FrequencyFilter extends FilteredTermsEnum {
|
||||||
|
private final int minFreq;
|
||||||
|
private final int maxFreq;
|
||||||
|
|
||||||
|
FrequencyFilter(TermsEnum delegate, int minFreq, int maxFreq) {
|
||||||
|
super(delegate, false);
|
||||||
|
this.minFreq = minFreq;
|
||||||
|
this.maxFreq = maxFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AcceptStatus accept(BytesRef arg0) throws IOException {
|
||||||
|
int docFreq = docFreq();
|
||||||
|
if (docFreq >= minFreq && docFreq <= maxFreq) {
|
||||||
|
return AcceptStatus.YES;
|
||||||
|
}
|
||||||
|
return AcceptStatus.NO;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,27 +19,20 @@
|
|||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.OrdinalMap;
|
import org.apache.lucene.index.OrdinalMap;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.SortedSetSelector;
|
import org.apache.lucene.search.SortedSetSelector;
|
||||||
import org.apache.lucene.search.SortedSetSortField;
|
import org.apache.lucene.search.SortedSetSortField;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.util.BigArrays;
|
import org.elasticsearch.common.util.BigArrays;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||||
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
|
||||||
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
|
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
|
||||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
|
||||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
|
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||||
import org.elasticsearch.search.DocValueFormat;
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
@ -48,10 +41,9 @@ import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
|||||||
import org.elasticsearch.search.sort.BucketedSort;
|
import org.elasticsearch.search.sort.BucketedSort;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
public class SortedSetOrdinalsIndexFieldData implements IndexOrdinalsFieldData {
|
public class SortedSetOrdinalsIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
||||||
|
|
||||||
public static class Builder implements IndexFieldData.Builder {
|
public static class Builder implements IndexFieldData.Builder {
|
||||||
private final String name;
|
private final String name;
|
||||||
@ -78,13 +70,6 @@ public class SortedSetOrdinalsIndexFieldData implements IndexOrdinalsFieldData {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final String fieldName;
|
|
||||||
private final IndexFieldDataCache cache;
|
|
||||||
private final CircuitBreakerService breakerService;
|
|
||||||
private final Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction;
|
|
||||||
private final ValuesSourceType valuesSourceType;
|
|
||||||
private static final Logger logger = LogManager.getLogger(SortedSetOrdinalsIndexFieldData.class);
|
|
||||||
|
|
||||||
public SortedSetOrdinalsIndexFieldData(
|
public SortedSetOrdinalsIndexFieldData(
|
||||||
IndexFieldDataCache cache,
|
IndexFieldDataCache cache,
|
||||||
String fieldName,
|
String fieldName,
|
||||||
@ -92,21 +77,7 @@ public class SortedSetOrdinalsIndexFieldData implements IndexOrdinalsFieldData {
|
|||||||
CircuitBreakerService breakerService,
|
CircuitBreakerService breakerService,
|
||||||
Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction
|
Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction
|
||||||
) {
|
) {
|
||||||
this.fieldName = fieldName;
|
super(fieldName, valuesSourceType, cache, breakerService, scriptFunction);
|
||||||
this.valuesSourceType = valuesSourceType;
|
|
||||||
this.cache = cache;
|
|
||||||
this.breakerService = breakerService;
|
|
||||||
this.scriptFunction = scriptFunction;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final String getFieldName() {
|
|
||||||
return fieldName;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ValuesSourceType getValuesSourceType() {
|
|
||||||
return valuesSourceType;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -121,7 +92,7 @@ public class SortedSetOrdinalsIndexFieldData implements IndexOrdinalsFieldData {
|
|||||||
(source.sortMissingLast(missingValue) == false && source.sortMissingFirst(missingValue) == false)) {
|
(source.sortMissingLast(missingValue) == false && source.sortMissingFirst(missingValue) == false)) {
|
||||||
return new SortField(getFieldName(), source, reverse);
|
return new SortField(getFieldName(), source, reverse);
|
||||||
}
|
}
|
||||||
SortField sortField = new SortedSetSortField(fieldName, reverse,
|
SortField sortField = new SortedSetSortField(getFieldName(), reverse,
|
||||||
sortMode == MultiValueMode.MAX ? SortedSetSelector.Type.MAX : SortedSetSelector.Type.MIN);
|
sortMode == MultiValueMode.MAX ? SortedSetSelector.Type.MAX : SortedSetSelector.Type.MIN);
|
||||||
sortField.setMissingValue(source.sortMissingLast(missingValue) ^ reverse ?
|
sortField.setMissingValue(source.sortMissingLast(missingValue) ^ reverse ?
|
||||||
SortedSetSortField.STRING_LAST : SortedSetSortField.STRING_FIRST);
|
SortedSetSortField.STRING_LAST : SortedSetSortField.STRING_FIRST);
|
||||||
@ -136,65 +107,14 @@ public class SortedSetOrdinalsIndexFieldData implements IndexOrdinalsFieldData {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LeafOrdinalsFieldData load(LeafReaderContext context) {
|
public LeafOrdinalsFieldData load(LeafReaderContext context) {
|
||||||
return new SortedSetBytesLeafFieldData(context.reader(), fieldName, scriptFunction);
|
return new SortedSetBytesLeafFieldData(context.reader(), getFieldName(), scriptFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LeafOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
|
public LeafOrdinalsFieldData loadDirect(LeafReaderContext context) {
|
||||||
return load(context);
|
return load(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
|
||||||
IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader);
|
|
||||||
if (fieldData instanceof GlobalOrdinalsIndexFieldData) {
|
|
||||||
// we create a new instance of the cached value for each consumer in order
|
|
||||||
// to avoid creating new TermsEnums for each segment in the cached instance
|
|
||||||
return ((GlobalOrdinalsIndexFieldData) fieldData).newConsumer(indexReader);
|
|
||||||
} else {
|
|
||||||
return fieldData;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private IndexOrdinalsFieldData loadGlobalInternal(DirectoryReader indexReader) {
|
|
||||||
if (indexReader.leaves().size() <= 1) {
|
|
||||||
// ordinals are already global
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
boolean fieldFound = false;
|
|
||||||
for (LeafReaderContext context : indexReader.leaves()) {
|
|
||||||
if (context.reader().getFieldInfos().fieldInfo(getFieldName()) != null) {
|
|
||||||
fieldFound = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (fieldFound == false) {
|
|
||||||
// Some directory readers may be wrapped and report different set of fields and use the same cache key.
|
|
||||||
// If a field can't be found then it doesn't mean it isn't there,
|
|
||||||
// so if a field doesn't exist then we don't cache it and just return an empty field data instance.
|
|
||||||
// The next time the field is found, we do cache.
|
|
||||||
try {
|
|
||||||
return GlobalOrdinalsBuilder.buildEmpty(indexReader, this);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
return cache.load(indexReader, this);
|
|
||||||
} catch (Exception e) {
|
|
||||||
if (e instanceof ElasticsearchException) {
|
|
||||||
throw (ElasticsearchException) e;
|
|
||||||
} else {
|
|
||||||
throw new ElasticsearchException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexOrdinalsFieldData loadGlobalDirect(DirectoryReader indexReader) throws Exception {
|
|
||||||
return GlobalOrdinalsBuilder.build(indexReader, this, breakerService, logger, scriptFunction);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public OrdinalMap getOrdinalMap() {
|
public OrdinalMap getOrdinalMap() {
|
||||||
return null;
|
return null;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user