mirror of https://github.com/apache/lucene.git
SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
This commit is contained in:
parent
aec3654fb8
commit
5525f42928
|
@ -661,7 +661,7 @@ public class MultiDocValues {
|
|||
public final OrdinalMap mapping;
|
||||
|
||||
/** Creates a new MultiSortedDocValues over <code>values</code> */
|
||||
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||
public MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||
assert docStarts.length == values.length + 1;
|
||||
this.values = values;
|
||||
this.docStarts = docStarts;
|
||||
|
|
|
@ -303,6 +303,9 @@ Other Changes
|
|||
|
||||
* SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev)
|
||||
|
||||
* SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
|
||||
(yonik)
|
||||
|
||||
================== 6.0.1 ==================
|
||||
(No Changes)
|
||||
|
||||
|
|
|
@ -36,12 +36,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
|
|
|
@ -26,16 +26,6 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.carrotsearch.hppc.IntHashSet;
|
||||
import com.carrotsearch.hppc.IntObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongHashSet;
|
||||
import com.carrotsearch.hppc.LongObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongObjectMap;
|
||||
import com.carrotsearch.hppc.cursors.IntObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -53,7 +43,6 @@ import org.apache.lucene.search.Collector;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -61,7 +50,6 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -87,12 +75,24 @@ import org.apache.solr.search.DocIterator;
|
|||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocSlice;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QueryWrapperFilter;
|
||||
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SortSpecParsing;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
import com.carrotsearch.hppc.IntHashSet;
|
||||
import com.carrotsearch.hppc.IntObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongHashSet;
|
||||
import com.carrotsearch.hppc.LongObjectHashMap;
|
||||
import com.carrotsearch.hppc.LongObjectMap;
|
||||
import com.carrotsearch.hppc.cursors.IntObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongCursor;
|
||||
import com.carrotsearch.hppc.cursors.LongObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||
|
||||
/**
|
||||
* The ExpandComponent is designed to work with the CollapsingPostFilter.
|
||||
* The CollapsingPostFilter collapses a result set on a field.
|
||||
|
|
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CompositeReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader.CoreClosedListener;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* This class forces a composite reader (eg a {@link
|
||||
* MultiReader} or {@link DirectoryReader}) to emulate a
|
||||
* {@link LeafReader}. This requires implementing the postings
|
||||
* APIs on-the-fly, using the static methods in {@link
|
||||
* MultiFields}, {@link MultiDocValues}, by stepping through
|
||||
* the sub-readers to merge fields/terms, appending docs, etc.
|
||||
*
|
||||
* <p><b>NOTE</b>: this class almost always results in a
|
||||
* performance hit. If this is important to your use case,
|
||||
* you'll get better performance by gathering the sub readers using
|
||||
* {@link IndexReader#getContext()} to get the
|
||||
* leaves and then operate per-LeafReader,
|
||||
* instead of using this class.
|
||||
*/
|
||||
|
||||
public final class SlowCompositeReaderWrapper extends LeafReader {
|
||||
|
||||
private final CompositeReader in;
|
||||
private final Fields fields;
|
||||
private final boolean merging;
|
||||
|
||||
/** This method is sugar for getting an {@link LeafReader} from
|
||||
* an {@link IndexReader} of any kind. If the reader is already atomic,
|
||||
* it is returned unchanged, otherwise wrapped by this class.
|
||||
*/
|
||||
public static LeafReader wrap(IndexReader reader) throws IOException {
|
||||
if (reader instanceof CompositeReader) {
|
||||
return new SlowCompositeReaderWrapper((CompositeReader) reader, false);
|
||||
} else {
|
||||
assert reader instanceof LeafReader;
|
||||
return (LeafReader) reader;
|
||||
}
|
||||
}
|
||||
|
||||
SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException {
|
||||
super();
|
||||
in = reader;
|
||||
if (getFieldInfos().hasPointValues()) {
|
||||
throw new IllegalArgumentException("cannot wrap points");
|
||||
}
|
||||
fields = MultiFields.getFields(in);
|
||||
in.registerParentReader(this);
|
||||
this.merging = merging;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SlowCompositeReaderWrapper(" + in + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addCoreClosedListener(CoreClosedListener listener) {
|
||||
addCoreClosedListenerAsReaderClosedListener(in, listener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeCoreClosedListener(CoreClosedListener listener) {
|
||||
removeCoreClosedListenerAsReaderClosedListener(in, listener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() {
|
||||
ensureOpen();
|
||||
return fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getNumericValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getDocsWithField(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getBinaryValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getSortedNumericValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
OrdinalMap map = null;
|
||||
synchronized (cachedOrdMaps) {
|
||||
map = cachedOrdMaps.get(field);
|
||||
if (map == null) {
|
||||
// uncached, or not a multi dv
|
||||
SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
|
||||
if (dv instanceof MultiSortedDocValues) {
|
||||
map = ((MultiSortedDocValues)dv).mapping;
|
||||
if (map.owner == getCoreCacheKey() && merging == false) {
|
||||
cachedOrdMaps.put(field, map);
|
||||
}
|
||||
}
|
||||
return dv;
|
||||
}
|
||||
}
|
||||
int size = in.leaves().size();
|
||||
final SortedDocValues[] values = new SortedDocValues[size];
|
||||
final int[] starts = new int[size+1];
|
||||
for (int i = 0; i < size; i++) {
|
||||
LeafReaderContext context = in.leaves().get(i);
|
||||
final LeafReader reader = context.reader();
|
||||
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
|
||||
return null;
|
||||
}
|
||||
SortedDocValues v = reader.getSortedDocValues(field);
|
||||
if (v == null) {
|
||||
v = DocValues.emptySorted();
|
||||
}
|
||||
values[i] = v;
|
||||
starts[i] = context.docBase;
|
||||
}
|
||||
starts[size] = maxDoc();
|
||||
return new MultiSortedDocValues(values, starts, map);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
OrdinalMap map = null;
|
||||
synchronized (cachedOrdMaps) {
|
||||
map = cachedOrdMaps.get(field);
|
||||
if (map == null) {
|
||||
// uncached, or not a multi dv
|
||||
SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
|
||||
if (dv instanceof MultiSortedSetDocValues) {
|
||||
map = ((MultiSortedSetDocValues)dv).mapping;
|
||||
if (map.owner == getCoreCacheKey() && merging == false) {
|
||||
cachedOrdMaps.put(field, map);
|
||||
}
|
||||
}
|
||||
return dv;
|
||||
}
|
||||
}
|
||||
|
||||
assert map != null;
|
||||
int size = in.leaves().size();
|
||||
final SortedSetDocValues[] values = new SortedSetDocValues[size];
|
||||
final int[] starts = new int[size+1];
|
||||
for (int i = 0; i < size; i++) {
|
||||
LeafReaderContext context = in.leaves().get(i);
|
||||
final LeafReader reader = context.reader();
|
||||
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
if(fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET){
|
||||
return null;
|
||||
}
|
||||
SortedSetDocValues v = reader.getSortedSetDocValues(field);
|
||||
if (v == null) {
|
||||
v = DocValues.emptySortedSet();
|
||||
}
|
||||
values[i] = v;
|
||||
starts[i] = context.docBase;
|
||||
}
|
||||
starts[size] = maxDoc();
|
||||
return new MultiSortedSetDocValues(values, starts, map);
|
||||
}
|
||||
|
||||
// TODO: this could really be a weak map somewhere else on the coreCacheKey,
|
||||
// but do we really need to optimize slow-wrapper any more?
|
||||
private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>();
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return MultiDocValues.getNormValues(in, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
ensureOpen();
|
||||
return in.getTermVectors(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
// Don't call ensureOpen() here (it could affect performance)
|
||||
return in.numDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDoc() {
|
||||
// Don't call ensureOpen() here (it could affect performance)
|
||||
return in.maxDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
||||
ensureOpen();
|
||||
in.document(docID, visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getLiveDocs() {
|
||||
ensureOpen();
|
||||
return MultiFields.getLiveDocs(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues getPointValues() {
|
||||
ensureOpen();
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
ensureOpen();
|
||||
return MultiFields.getMergedFieldInfos(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCoreCacheKey() {
|
||||
return in.getCoreCacheKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCombinedCoreAndDeletesKey() {
|
||||
return in.getCombinedCoreAndDeletesKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() throws IOException {
|
||||
// TODO: as this is a wrapper, should we really close the delegate?
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
ensureOpen();
|
||||
for (LeafReaderContext ctx : in.leaves()) {
|
||||
ctx.reader().checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.MergePolicyWrapper;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.MergeTrigger;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
|
||||
// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction?
|
||||
|
||||
public final class SortingMergePolicy extends MergePolicyWrapper {
|
||||
|
||||
private final Sort sort;
|
||||
|
||||
/** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
|
||||
public SortingMergePolicy(MergePolicy in, Sort sort) {
|
||||
super(in);
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
/** Return the {@link Sort} order that is used to sort segments when merging. */
|
||||
public Sort getSort() {
|
||||
return sort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SortingMergePolicy(" + in + ", sort=" + sort + ")";
|
||||
}
|
||||
}
|
|
@ -28,6 +28,7 @@ import java.util.Set;
|
|||
import java.util.TreeSet;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -41,22 +42,16 @@ import org.apache.lucene.spatial.SpatialStrategy;
|
|||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialArgsParser;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.DistanceUnits;
|
||||
import org.apache.solr.util.MapListener;
|
||||
import org.apache.solr.util.SpatialUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.context.SpatialContextFactory;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
|
@ -66,6 +61,12 @@ import org.locationtech.spatial4j.io.SupportedFormats;
|
|||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
|
||||
/**
|
||||
* Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}.
|
||||
|
|
|
@ -23,10 +23,10 @@ import java.nio.ByteBuffer;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -22,15 +22,14 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
|
@ -40,6 +39,7 @@ import org.apache.solr.analysis.SolrAnalyzer;
|
|||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.function.OrdFieldSource;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -40,12 +40,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/**
|
||||
* Field for collated sort keys.
|
||||
|
|
|
@ -44,7 +44,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.FieldValueQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
|
|
|
@ -16,12 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
|
@ -31,6 +25,12 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
|
@ -45,7 +45,6 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
|||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -55,6 +54,7 @@ import org.apache.solr.common.EnumFieldValue;
|
|||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Document;
|
||||
|
|
|
@ -16,17 +16,17 @@
|
|||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.function.FileFloatSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/** Get values from an external file instead of the index.
|
||||
*
|
||||
|
|
|
@ -49,7 +49,6 @@ import org.apache.lucene.search.SortedNumericSelector;
|
|||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -66,6 +65,7 @@ import org.apache.solr.query.SolrRangeQuery;
|
|||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.Sorting;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -18,23 +18,23 @@ package org.apache.solr.schema;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.locationtech.spatial4j.io.GeohashUtils;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
||||
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.SpatialUtils;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.locationtech.spatial4j.io.GeohashUtils;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
|
||||
/**
|
||||
* This is a class that represents a <a
|
||||
|
|
|
@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
|
||||
|
@ -37,7 +37,6 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.DelegatingCollector;
|
||||
|
@ -45,8 +44,8 @@ import org.apache.solr.search.ExtendedQueryBase;
|
|||
import org.apache.solr.search.PostFilter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.SpatialUtils;
|
||||
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -30,13 +29,14 @@ import org.apache.lucene.search.BooleanClause;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SpatialOptions;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||
|
||||
/**
|
||||
* A point type that indexes a point in an n-dimensional space as separate fields and supports range queries.
|
||||
|
|
|
@ -33,14 +33,14 @@ import org.apache.lucene.index.IndexableField;
|
|||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.AttributeSource.State;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.solr.analysis.SolrAnalyzer;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.Sorting;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -20,16 +20,16 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/**
|
||||
* Utility Field used for random sorting. It should not be passed a value.
|
||||
|
|
|
@ -27,10 +27,10 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
public class StrField extends PrimitiveFieldType {
|
||||
|
||||
|
|
|
@ -16,14 +16,16 @@
|
|||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.QueryBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -31,9 +33,7 @@ import org.apache.solr.query.SolrRangeQuery;
|
|||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.Sorting;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/** <code>TextField</code> is the basic type for configurable text analysis.
|
||||
* Analyzers for field types using this implementation should be defined in the schema.
|
||||
|
|
|
@ -26,8 +26,8 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.FieldType.LegacyNumericType;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
|
@ -47,7 +47,6 @@ import org.apache.lucene.search.LegacyNumericRangeQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -61,6 +60,7 @@ import org.apache.solr.response.TextResponseWriter;
|
|||
import org.apache.solr.search.FunctionRangeQuery;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.apache.solr.util.DateMathParser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
|
|
@ -25,15 +25,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.carrotsearch.hppc.FloatArrayList;
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import com.carrotsearch.hppc.IntIntHashMap;
|
||||
import com.carrotsearch.hppc.IntLongHashMap;
|
||||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||
import com.carrotsearch.hppc.cursors.IntLongCursor;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -49,13 +41,12 @@ import org.apache.lucene.queries.function.FunctionValues;
|
|||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -65,8 +56,8 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.QueryElevationComponent;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
|
@ -75,6 +66,14 @@ import org.apache.solr.schema.StrField;
|
|||
import org.apache.solr.schema.TrieFloatField;
|
||||
import org.apache.solr.schema.TrieIntField;
|
||||
import org.apache.solr.schema.TrieLongField;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
import com.carrotsearch.hppc.FloatArrayList;
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import com.carrotsearch.hppc.IntIntHashMap;
|
||||
import com.carrotsearch.hppc.IntLongHashMap;
|
||||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||
import com.carrotsearch.hppc.cursors.IntLongCursor;
|
||||
|
||||
/**
|
||||
|
||||
|
|
|
@ -20,16 +20,16 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
/**
|
||||
* Lucene 5.0 removes "accidental" insanity, so you must explicitly
|
||||
|
|
|
@ -18,13 +18,12 @@ package org.apache.solr.search;
|
|||
|
||||
import java.net.URL;
|
||||
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
||||
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
/**
|
||||
* A SolrInfoMBean that provides introspection of the Solr FieldCache
|
||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.MultiPostingsEnum;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
|
@ -94,7 +93,6 @@ import org.apache.lucene.search.TopScoreDocCollector;
|
|||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -109,6 +107,7 @@ import org.apache.solr.core.DirectoryFactory.DirContext;
|
|||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
|
@ -122,6 +121,7 @@ import org.apache.solr.schema.TrieFloatField;
|
|||
import org.apache.solr.schema.TrieIntField;
|
||||
import org.apache.solr.search.facet.UnInvertedField;
|
||||
import org.apache.solr.search.stats.StatsSource;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.update.IndexFingerprint;
|
||||
import org.apache.solr.update.SolrIndexConfig;
|
||||
import org.slf4j.Logger;
|
||||
|
|
|
@ -27,17 +27,16 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.uninverting.DocTermOrds;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.BitDocSet;
|
||||
|
@ -45,6 +44,7 @@ import org.apache.solr.search.DocIterator;
|
|||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrCache;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.uninverting.DocTermOrds;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -20,13 +20,12 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -34,6 +33,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
|||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueInt;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.Insanity;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
|
|
@ -20,18 +20,18 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.Insanity;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.join.JoinUtil;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.Aliases;
|
||||
|
@ -45,6 +44,7 @@ import org.apache.solr.search.QParser;
|
|||
import org.apache.solr.search.QParserPlugin;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,887 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.codecs.PostingsFormat; // javadocs
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* This class enables fast access to multiple term ords for
|
||||
* a specified field across all docIDs.
|
||||
*
|
||||
* Like FieldCache, it uninverts the index and holds a
|
||||
* packed data structure in RAM to enable fast access.
|
||||
* Unlike FieldCache, it can handle multi-valued fields,
|
||||
* and, it does not hold the term bytes in RAM. Rather, you
|
||||
* must obtain a TermsEnum from the {@link #getOrdTermsEnum}
|
||||
* method, and then seek-by-ord to get the term's bytes.
|
||||
*
|
||||
* While normally term ords are type long, in this API they are
|
||||
* int as the internal representation here cannot address
|
||||
* more than MAX_INT unique terms. Also, typically this
|
||||
* class is used on fields with relatively few unique terms
|
||||
* vs the number of documents. In addition, there is an
|
||||
* internal limit (16 MB) on how many bytes each chunk of
|
||||
* documents may consume. If you trip this limit you'll hit
|
||||
* an IllegalStateException.
|
||||
*
|
||||
* Deleted documents are skipped during uninversion, and if
|
||||
* you look them up you'll get 0 ords.
|
||||
*
|
||||
* The returned per-document ords do not retain their
|
||||
* original order in the document. Instead they are returned
|
||||
* in sorted (by ord, ie term's BytesRef comparator) order. They
|
||||
* are also de-dup'd (ie if doc has same term more than once
|
||||
* in this field, you'll only get that ord back once).
|
||||
*
|
||||
* This class
|
||||
* will create its own term index internally, allowing to
|
||||
* create a wrapped TermsEnum that can handle ord. The
|
||||
* {@link #getOrdTermsEnum} method then provides this
|
||||
* wrapped enum.
|
||||
*
|
||||
* The RAM consumption of this class can be high!
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
/*
|
||||
* Final form of the un-inverted field:
|
||||
* Each document points to a list of term numbers that are contained in that document.
|
||||
*
|
||||
* Term numbers are in sorted order, and are encoded as variable-length deltas from the
|
||||
* previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A
|
||||
* term number of 0 signals the end of the termNumber list.
|
||||
*
|
||||
* There is a single int[maxDoc()] which either contains a pointer into a byte[] for
|
||||
* the termNumber lists, or directly contains the termNumber list if it fits in the 4
|
||||
* bytes of an integer. If the first byte in the integer is 1, the next 3 bytes
|
||||
* are a pointer into a byte[] where the termNumber list starts.
|
||||
*
|
||||
* There are actually 256 byte arrays, to compensate for the fact that the pointers
|
||||
* into the byte arrays are only 3 bytes long. The correct byte array for a document
|
||||
* is a function of its id.
|
||||
*
|
||||
* To save space and speed up faceting, any term that matches enough documents will
|
||||
* not be un-inverted... it will be skipped while building the un-inverted field structure,
|
||||
* and will use a set intersection method during faceting.
|
||||
*
|
||||
* To further save memory, the terms (the actual string values) are not all stored in
|
||||
* memory, but a TermIndex is used to convert term numbers to term values only
|
||||
* for the terms needed after faceting has completed. Only every 128th term value
|
||||
* is stored, along with its corresponding term number, and this is used as an
|
||||
* index to find the closest term and iterate until the desired number is hit (very
|
||||
* much like Lucene's own internal term index).
|
||||
*
|
||||
*/
|
||||
|
||||
public class DocTermOrds implements Accountable {
|
||||
|
||||
// Term ords are shifted by this, internally, to reserve
|
||||
// values 0 (end term) and 1 (index is a pointer into byte array)
|
||||
private final static int TNUM_OFFSET = 2;
|
||||
|
||||
/** Every 128th term is indexed, by default. */
|
||||
public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing
|
||||
|
||||
private int indexIntervalBits;
|
||||
private int indexIntervalMask;
|
||||
private int indexInterval;
|
||||
|
||||
/** Don't uninvert terms that exceed this count. */
|
||||
protected final int maxTermDocFreq;
|
||||
|
||||
/** Field we are uninverting. */
|
||||
protected final String field;
|
||||
|
||||
/** Number of terms in the field. */
|
||||
protected int numTermsInField;
|
||||
|
||||
/** Total number of references to term numbers. */
|
||||
protected long termInstances;
|
||||
private long memsz;
|
||||
|
||||
/** Total time to uninvert the field. */
|
||||
protected int total_time;
|
||||
|
||||
/** Time for phase1 of the uninvert process. */
|
||||
protected int phase1_time;
|
||||
|
||||
/** Holds the per-document ords or a pointer to the ords. */
|
||||
protected int[] index;
|
||||
|
||||
/** Holds term ords for documents. */
|
||||
protected byte[][] tnums = new byte[256][];
|
||||
|
||||
/** Total bytes (sum of term lengths) for all indexed terms.*/
|
||||
protected long sizeOfIndexedStrings;
|
||||
|
||||
/** Holds the indexed (by default every 128th) terms. */
|
||||
protected BytesRef[] indexedTermsArray = new BytesRef[0];
|
||||
|
||||
/** If non-null, only terms matching this prefix were
|
||||
* indexed. */
|
||||
protected BytesRef prefix;
|
||||
|
||||
/** Ordinal of the first term in the field, or 0 if the
|
||||
* {@link PostingsFormat} does not implement {@link
|
||||
* TermsEnum#ord}. */
|
||||
protected int ordBase;
|
||||
|
||||
/** Used while uninverting. */
|
||||
protected PostingsEnum postingsEnum;
|
||||
|
||||
/** If true, check and throw an exception if the field has docValues enabled.
|
||||
* Normally, docValues should be used in preference to DocTermOrds. */
|
||||
protected boolean checkForDocValues = true;
|
||||
|
||||
/** Returns total bytes used. */
|
||||
public long ramBytesUsed() {
|
||||
// can cache the mem size since it shouldn't change
|
||||
if (memsz!=0) return memsz;
|
||||
long sz = 8*8 + 32; // local fields
|
||||
if (index != null) sz += index.length * 4;
|
||||
if (tnums!=null) {
|
||||
for (byte[] arr : tnums)
|
||||
if (arr != null) sz += arr.length;
|
||||
}
|
||||
memsz = sz;
|
||||
return sz;
|
||||
}
|
||||
|
||||
/** Inverts all terms */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException {
|
||||
this(reader, liveDocs, field, null, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
// TODO: instead of all these ctors and options, take termsenum!
|
||||
|
||||
/** Inverts only terms starting w/ prefix */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException {
|
||||
this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** Inverts only terms starting w/ prefix, and only terms
|
||||
* whose docFreq (not taking deletions into account) is
|
||||
* <= maxTermDocFreq */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException {
|
||||
this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS);
|
||||
}
|
||||
|
||||
/** Inverts only terms starting w/ prefix, and only terms
|
||||
* whose docFreq (not taking deletions into account) is
|
||||
* <= maxTermDocFreq, with a custom indexing interval
|
||||
* (default is every 128nd term). */
|
||||
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException {
|
||||
this(field, maxTermDocFreq, indexIntervalBits);
|
||||
uninvert(reader, liveDocs, termPrefix);
|
||||
}
|
||||
|
||||
/** Subclass inits w/ this, but be sure you then call
|
||||
* uninvert, only once */
|
||||
protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) {
|
||||
//System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq);
|
||||
this.field = field;
|
||||
this.maxTermDocFreq = maxTermDocFreq;
|
||||
this.indexIntervalBits = indexIntervalBits;
|
||||
indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits);
|
||||
indexInterval = 1 << indexIntervalBits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a TermsEnum that implements ord, or null if no terms in field.
|
||||
* <p>
|
||||
* we build a "private" terms
|
||||
* index internally (WARNING: consumes RAM) and use that
|
||||
* index to implement ord. This also enables ord on top
|
||||
* of a composite reader. The returned TermsEnum is
|
||||
* unpositioned. This returns null if there are no terms.
|
||||
* </p>
|
||||
* <p><b>NOTE</b>: you must pass the same reader that was
|
||||
* used when creating this class
|
||||
*/
|
||||
public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
|
||||
// NOTE: see LUCENE-6529 before attempting to optimize this method to
|
||||
// return a TermsEnum directly from the reader if it already supports ord().
|
||||
|
||||
assert null != indexedTermsArray;
|
||||
|
||||
if (0 == indexedTermsArray.length) {
|
||||
return null;
|
||||
} else {
|
||||
return new OrdWrappedTermsEnum(reader);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of terms in this field
|
||||
*/
|
||||
public int numTerms() {
|
||||
return numTermsInField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if no terms were indexed.
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return index == null;
|
||||
}
|
||||
|
||||
/** Subclass can override this */
|
||||
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
|
||||
}
|
||||
|
||||
/** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)}
|
||||
* to record the document frequency for each uninverted
|
||||
* term. */
|
||||
protected void setActualDocFreq(int termNum, int df) throws IOException {
|
||||
}
|
||||
|
||||
/** Call this only once (if you subclass!) */
|
||||
protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
}
|
||||
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
|
||||
final long startTime = System.nanoTime();
|
||||
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
|
||||
|
||||
final int maxDoc = reader.maxDoc();
|
||||
final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
|
||||
final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
|
||||
final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
|
||||
|
||||
final Terms terms = reader.terms(field);
|
||||
if (terms == null) {
|
||||
// No terms
|
||||
return;
|
||||
}
|
||||
|
||||
final TermsEnum te = terms.iterator();
|
||||
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
|
||||
//System.out.println("seekStart=" + seekStart.utf8ToString());
|
||||
if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
|
||||
// No terms match
|
||||
return;
|
||||
}
|
||||
|
||||
// For our "term index wrapper"
|
||||
final List<BytesRef> indexedTerms = new ArrayList<>();
|
||||
final PagedBytes indexedTermsBytes = new PagedBytes(15);
|
||||
|
||||
// we need a minimum of 9 bytes, but round up to 12 since the space would
|
||||
// be wasted with most allocators anyway.
|
||||
byte[] tempArr = new byte[12];
|
||||
|
||||
//
|
||||
// enumerate all terms, and build an intermediate form of the un-inverted field.
|
||||
//
|
||||
// During this intermediate form, every document has a (potential) byte[]
|
||||
// and the int[maxDoc()] array either contains the termNumber list directly
|
||||
// or the *end* offset of the termNumber list in its byte array (for faster
|
||||
// appending and faster creation of the final form).
|
||||
//
|
||||
// idea... if things are too large while building, we could do a range of docs
|
||||
// at a time (but it would be a fair amount slower to build)
|
||||
// could also do ranges in parallel to take advantage of multiple CPUs
|
||||
|
||||
// OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
|
||||
// values. This requires going over the field first to find the most
|
||||
// frequent terms ahead of time.
|
||||
|
||||
int termNum = 0;
|
||||
postingsEnum = null;
|
||||
|
||||
// Loop begins with te positioned to first term (we call
|
||||
// seek above):
|
||||
for (;;) {
|
||||
final BytesRef t = te.term();
|
||||
if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
|
||||
break;
|
||||
}
|
||||
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
|
||||
|
||||
visitTerm(te, termNum);
|
||||
|
||||
if ((termNum & indexIntervalMask) == 0) {
|
||||
// Index this term
|
||||
sizeOfIndexedStrings += t.length;
|
||||
BytesRef indexedTerm = new BytesRef();
|
||||
indexedTermsBytes.copy(t, indexedTerm);
|
||||
// TODO: really should 1) strip off useless suffix,
|
||||
// and 2) use FST not array/PagedBytes
|
||||
indexedTerms.add(indexedTerm);
|
||||
}
|
||||
|
||||
final int df = te.docFreq();
|
||||
if (df <= maxTermDocFreq) {
|
||||
|
||||
postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);
|
||||
|
||||
// dF, but takes deletions into account
|
||||
int actualDF = 0;
|
||||
|
||||
for (;;) {
|
||||
int doc = postingsEnum.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
//System.out.println(" chunk=" + chunk + " docs");
|
||||
|
||||
actualDF ++;
|
||||
termInstances++;
|
||||
|
||||
//System.out.println(" docID=" + doc);
|
||||
// add TNUM_OFFSET to the term number to make room for special reserved values:
|
||||
// 0 (end term) and 1 (index into byte array follows)
|
||||
int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
|
||||
lastTerm[doc] = termNum;
|
||||
int val = index[doc];
|
||||
|
||||
if ((val & 0xff)==1) {
|
||||
// index into byte array (actually the end of
|
||||
// the doc-specific byte[] when building)
|
||||
int pos = val >>> 8;
|
||||
int ilen = vIntSize(delta);
|
||||
byte[] arr = bytes[doc];
|
||||
int newend = pos+ilen;
|
||||
if (newend > arr.length) {
|
||||
// We avoid a doubling strategy to lower memory usage.
|
||||
// this faceting method isn't for docs with many terms.
|
||||
// In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
|
||||
// TODO: figure out what array lengths we can round up to w/o actually using more memory
|
||||
// (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
|
||||
// It should be safe to round up to the nearest 32 bits in any case.
|
||||
int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
|
||||
byte[] newarr = new byte[newLen];
|
||||
System.arraycopy(arr, 0, newarr, 0, pos);
|
||||
arr = newarr;
|
||||
bytes[doc] = newarr;
|
||||
}
|
||||
pos = writeInt(delta, arr, pos);
|
||||
index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
|
||||
} else {
|
||||
// OK, this int has data in it... find the end (a zero starting byte - not
|
||||
// part of another number, hence not following a byte with the high bit set).
|
||||
int ipos;
|
||||
if (val==0) {
|
||||
ipos=0;
|
||||
} else if ((val & 0x0000ff80)==0) {
|
||||
ipos=1;
|
||||
} else if ((val & 0x00ff8000)==0) {
|
||||
ipos=2;
|
||||
} else if ((val & 0xff800000)==0) {
|
||||
ipos=3;
|
||||
} else {
|
||||
ipos=4;
|
||||
}
|
||||
|
||||
//System.out.println(" ipos=" + ipos);
|
||||
|
||||
int endPos = writeInt(delta, tempArr, ipos);
|
||||
//System.out.println(" endpos=" + endPos);
|
||||
if (endPos <= 4) {
|
||||
//System.out.println(" fits!");
|
||||
// value will fit in the integer... move bytes back
|
||||
for (int j=ipos; j<endPos; j++) {
|
||||
val |= (tempArr[j] & 0xff) << (j<<3);
|
||||
}
|
||||
index[doc] = val;
|
||||
} else {
|
||||
// value won't fit... move integer into byte[]
|
||||
for (int j=0; j<ipos; j++) {
|
||||
tempArr[j] = (byte)val;
|
||||
val >>>=8;
|
||||
}
|
||||
// point at the end index in the byte[]
|
||||
index[doc] = (endPos<<8) | 1;
|
||||
bytes[doc] = tempArr;
|
||||
tempArr = new byte[12];
|
||||
}
|
||||
}
|
||||
}
|
||||
setActualDocFreq(termNum, actualDF);
|
||||
}
|
||||
|
||||
termNum++;
|
||||
if (te.next() == null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
numTermsInField = termNum;
|
||||
|
||||
long midPoint = System.nanoTime();
|
||||
|
||||
if (termInstances == 0) {
|
||||
// we didn't invert anything
|
||||
// lower memory consumption.
|
||||
tnums = null;
|
||||
} else {
|
||||
|
||||
this.index = index;
|
||||
|
||||
//
|
||||
// transform intermediate form into the final form, building a single byte[]
|
||||
// at a time, and releasing the intermediate byte[]s as we go to avoid
|
||||
// increasing the memory footprint.
|
||||
//
|
||||
|
||||
for (int pass = 0; pass<256; pass++) {
|
||||
byte[] target = tnums[pass];
|
||||
int pos=0; // end in target;
|
||||
if (target != null) {
|
||||
pos = target.length;
|
||||
} else {
|
||||
target = new byte[4096];
|
||||
}
|
||||
|
||||
// loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
|
||||
// where pp is the pass (which array we are building), and xx is all values.
|
||||
// each pass shares the same byte[] for termNumber lists.
|
||||
for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
|
||||
int lim = Math.min(docbase + (1<<16), maxDoc);
|
||||
for (int doc=docbase; doc<lim; doc++) {
|
||||
//System.out.println(" pass=" + pass + " process docID=" + doc);
|
||||
int val = index[doc];
|
||||
if ((val&0xff) == 1) {
|
||||
int len = val >>> 8;
|
||||
//System.out.println(" ptr pos=" + pos);
|
||||
index[doc] = (pos<<8)|1; // change index to point to start of array
|
||||
if ((pos & 0xff000000) != 0) {
|
||||
// we only have 24 bits for the array index
|
||||
throw new IllegalStateException("Too many values for UnInvertedField faceting on field "+field);
|
||||
}
|
||||
byte[] arr = bytes[doc];
|
||||
/*
|
||||
for(byte b : arr) {
|
||||
//System.out.println(" b=" + Integer.toHexString((int) b));
|
||||
}
|
||||
*/
|
||||
bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
|
||||
if (target.length <= pos + len) {
|
||||
int newlen = target.length;
|
||||
/*** we don't have to worry about the array getting too large
|
||||
* since the "pos" param will overflow first (only 24 bits available)
|
||||
if ((newlen<<1) <= 0) {
|
||||
// overflow...
|
||||
newlen = Integer.MAX_VALUE;
|
||||
if (newlen <= pos + len) {
|
||||
throw new SolrException(400,"Too many terms to uninvert field!");
|
||||
}
|
||||
} else {
|
||||
while (newlen <= pos + len) newlen<<=1; // doubling strategy
|
||||
}
|
||||
****/
|
||||
while (newlen <= pos + len) newlen<<=1; // doubling strategy
|
||||
byte[] newtarget = new byte[newlen];
|
||||
System.arraycopy(target, 0, newtarget, 0, pos);
|
||||
target = newtarget;
|
||||
}
|
||||
System.arraycopy(arr, 0, target, pos, len);
|
||||
pos += len + 1; // skip single byte at end and leave it 0 for terminator
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shrink array
|
||||
if (pos < target.length) {
|
||||
byte[] newtarget = new byte[pos];
|
||||
System.arraycopy(target, 0, newtarget, 0, pos);
|
||||
target = newtarget;
|
||||
}
|
||||
|
||||
tnums[pass] = target;
|
||||
|
||||
if ((pass << 16) > maxDoc)
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
|
||||
|
||||
long endTime = System.nanoTime();
|
||||
|
||||
total_time = (int) TimeUnit.MILLISECONDS.convert(endTime-startTime, TimeUnit.NANOSECONDS);
|
||||
phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint-startTime, TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/** Number of bytes to represent an unsigned int as a vint. */
|
||||
private static int vIntSize(int x) {
|
||||
if ((x & (0xffffffff << (7*1))) == 0 ) {
|
||||
return 1;
|
||||
}
|
||||
if ((x & (0xffffffff << (7*2))) == 0 ) {
|
||||
return 2;
|
||||
}
|
||||
if ((x & (0xffffffff << (7*3))) == 0 ) {
|
||||
return 3;
|
||||
}
|
||||
if ((x & (0xffffffff << (7*4))) == 0 ) {
|
||||
return 4;
|
||||
}
|
||||
return 5;
|
||||
}
|
||||
|
||||
// todo: if we know the size of the vInt already, we could do
|
||||
// a single switch on the size
|
||||
private static int writeInt(int x, byte[] arr, int pos) {
|
||||
int a;
|
||||
a = (x >>> (7*4));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
a = (x >>> (7*3));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
a = (x >>> (7*2));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
a = (x >>> (7*1));
|
||||
if (a != 0) {
|
||||
arr[pos++] = (byte)(a | 0x80);
|
||||
}
|
||||
arr[pos++] = (byte)(x & 0x7f);
|
||||
return pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* "wrap" our own terms index around the original IndexReader.
|
||||
* Only valid if there are terms for this field rom the original reader
|
||||
*/
|
||||
private final class OrdWrappedTermsEnum extends TermsEnum {
|
||||
private final TermsEnum termsEnum;
|
||||
private BytesRef term;
|
||||
private long ord = -indexInterval-1; // force "real" seek
|
||||
|
||||
public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
|
||||
assert indexedTermsArray != null;
|
||||
assert 0 != indexedTermsArray.length;
|
||||
termsEnum = reader.fields().terms(field).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
|
||||
return termsEnum.postings(reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (++ord < 0) {
|
||||
ord = 0;
|
||||
}
|
||||
if (termsEnum.next() == null) {
|
||||
term = null;
|
||||
return null;
|
||||
}
|
||||
return setTerm(); // this is extra work if we know we are in bounds...
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() throws IOException {
|
||||
return termsEnum.docFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() throws IOException {
|
||||
return termsEnum.totalTermFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ordBase + ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||
|
||||
// already here
|
||||
if (term != null && term.equals(target)) {
|
||||
return SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
int startIdx = Arrays.binarySearch(indexedTermsArray, target);
|
||||
|
||||
if (startIdx >= 0) {
|
||||
// we hit the term exactly... lucky us!
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
ord = startIdx << indexIntervalBits;
|
||||
setTerm();
|
||||
assert term != null;
|
||||
return SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
// we didn't hit the term exactly
|
||||
startIdx = -startIdx-1;
|
||||
|
||||
if (startIdx == 0) {
|
||||
// our target occurs *before* the first term
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
|
||||
ord = 0;
|
||||
setTerm();
|
||||
assert term != null;
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
|
||||
// back up to the start of the block
|
||||
startIdx--;
|
||||
|
||||
if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
|
||||
// we are already in the right block and the current term is before the term we want,
|
||||
// so we don't need to seek.
|
||||
} else {
|
||||
// seek to the right block
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
ord = startIdx << indexIntervalBits;
|
||||
setTerm();
|
||||
assert term != null; // should be non-null since it's in the index
|
||||
}
|
||||
|
||||
while (term != null && term.compareTo(target) < 0) {
|
||||
next();
|
||||
}
|
||||
|
||||
if (term == null) {
|
||||
return SeekStatus.END;
|
||||
} else if (term.compareTo(target) == 0) {
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long targetOrd) throws IOException {
|
||||
int delta = (int) (targetOrd - ordBase - ord);
|
||||
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
|
||||
if (delta < 0 || delta > indexInterval) {
|
||||
final int idx = (int) (targetOrd >>> indexIntervalBits);
|
||||
final BytesRef base = indexedTermsArray[idx];
|
||||
//System.out.println(" do seek term=" + base.utf8ToString());
|
||||
ord = idx << indexIntervalBits;
|
||||
delta = (int) (targetOrd - ord);
|
||||
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
} else {
|
||||
//System.out.println("seek w/in block");
|
||||
}
|
||||
|
||||
while (--delta >= 0) {
|
||||
BytesRef br = termsEnum.next();
|
||||
if (br == null) {
|
||||
assert false;
|
||||
return;
|
||||
}
|
||||
ord++;
|
||||
}
|
||||
|
||||
setTerm();
|
||||
assert term != null;
|
||||
}
|
||||
|
||||
private BytesRef setTerm() throws IOException {
|
||||
term = termsEnum.term();
|
||||
//System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
|
||||
if (prefix != null && !StringHelper.startsWith(term, prefix)) {
|
||||
term = null;
|
||||
}
|
||||
return term;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the term ({@link BytesRef}) corresponding to
|
||||
* the provided ordinal. */
|
||||
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
|
||||
termsEnum.seekExact(ord);
|
||||
return termsEnum.term();
|
||||
}
|
||||
|
||||
/** Returns a SortedSetDocValues view of this instance */
|
||||
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
|
||||
if (isEmpty()) {
|
||||
return DocValues.emptySortedSet();
|
||||
} else {
|
||||
return new Iterator(reader);
|
||||
}
|
||||
}
|
||||
|
||||
private class Iterator extends SortedSetDocValues {
|
||||
final LeafReader reader;
|
||||
final TermsEnum te; // used internally for lookupOrd() and lookupTerm()
|
||||
// currently we read 5 at a time (using the logic of the old iterator)
|
||||
final int buffer[] = new int[5];
|
||||
int bufferUpto;
|
||||
int bufferLength;
|
||||
|
||||
private int tnum;
|
||||
private int upto;
|
||||
private byte[] arr;
|
||||
|
||||
Iterator(LeafReader reader) throws IOException {
|
||||
this.reader = reader;
|
||||
this.te = termsEnum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
while (bufferUpto == bufferLength) {
|
||||
if (bufferLength < buffer.length) {
|
||||
return NO_MORE_ORDS;
|
||||
} else {
|
||||
bufferLength = read(buffer);
|
||||
bufferUpto = 0;
|
||||
}
|
||||
}
|
||||
return buffer[bufferUpto++];
|
||||
}
|
||||
|
||||
/** Buffer must be at least 5 ints long. Returns number
|
||||
* of term ords placed into buffer; if this count is
|
||||
* less than buffer.length then that is the end. */
|
||||
int read(int[] buffer) {
|
||||
int bufferUpto = 0;
|
||||
if (arr == null) {
|
||||
// code is inlined into upto
|
||||
//System.out.println("inlined");
|
||||
int code = upto;
|
||||
int delta = 0;
|
||||
for (;;) {
|
||||
delta = (delta << 7) | (code & 0x7f);
|
||||
if ((code & 0x80)==0) {
|
||||
if (delta==0) break;
|
||||
tnum += delta - TNUM_OFFSET;
|
||||
buffer[bufferUpto++] = ordBase+tnum;
|
||||
//System.out.println(" tnum=" + tnum);
|
||||
delta = 0;
|
||||
}
|
||||
code >>>= 8;
|
||||
}
|
||||
} else {
|
||||
// code is a pointer
|
||||
for(;;) {
|
||||
int delta = 0;
|
||||
for(;;) {
|
||||
byte b = arr[upto++];
|
||||
delta = (delta << 7) | (b & 0x7f);
|
||||
//System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b);
|
||||
if ((b & 0x80) == 0) break;
|
||||
}
|
||||
//System.out.println(" delta=" + delta);
|
||||
if (delta == 0) break;
|
||||
tnum += delta - TNUM_OFFSET;
|
||||
//System.out.println(" tnum=" + tnum);
|
||||
buffer[bufferUpto++] = ordBase+tnum;
|
||||
if (bufferUpto == buffer.length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bufferUpto;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int docID) {
|
||||
tnum = 0;
|
||||
final int code = index[docID];
|
||||
if ((code & 0xff)==1) {
|
||||
// a pointer
|
||||
upto = code>>>8;
|
||||
//System.out.println(" pointer! upto=" + upto);
|
||||
int whichArray = (docID >>> 16) & 0xff;
|
||||
arr = tnums[whichArray];
|
||||
} else {
|
||||
//System.out.println(" inline!");
|
||||
arr = null;
|
||||
upto = code;
|
||||
}
|
||||
bufferUpto = 0;
|
||||
bufferLength = read(buffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookupOrd(long ord) {
|
||||
try {
|
||||
return DocTermOrds.this.lookupTerm(te, (int) ord);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueCount() {
|
||||
return numTerms();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long lookupTerm(BytesRef key) {
|
||||
try {
|
||||
switch (te.seekCeil(key)) {
|
||||
case FOUND:
|
||||
assert te.ord() >= 0;
|
||||
return te.ord();
|
||||
case NOT_FOUND:
|
||||
assert te.ord() >= 0;
|
||||
return -te.ord()-1;
|
||||
default: /* END */
|
||||
return -numTerms()-1;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum() {
|
||||
try {
|
||||
return getOrdTermsEnum(reader);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,466 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.IndexReader; // javadocs
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Expert: Maintains caches of term values.
|
||||
*
|
||||
* <p>Created: May 19, 2004 11:13:14 AM
|
||||
*
|
||||
* @since lucene 1.4
|
||||
* @see FieldCacheSanityChecker
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
interface FieldCache {
|
||||
|
||||
/**
|
||||
* Placeholder indicating creation of this cache is currently in-progress.
|
||||
*/
|
||||
public static final class CreationPlaceholder implements Accountable {
|
||||
Accountable value;
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
// don't call on the in-progress value, might make things angry.
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* interface to all parsers. It is used to parse different numeric types.
|
||||
*/
|
||||
public interface Parser {
|
||||
|
||||
/**
|
||||
* Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
|
||||
* to filter the actual TermsEnum before the field cache is filled.
|
||||
*
|
||||
* @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
|
||||
* @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
|
||||
* @throws IOException if an {@link IOException} occurs
|
||||
* @deprecated index with Points instead
|
||||
*/
|
||||
@Deprecated
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException;
|
||||
|
||||
/** Parse's this field's value */
|
||||
public long parseValue(BytesRef term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for points parsers. These parsers do not use the inverted index, but instead
|
||||
* uninvert point data.
|
||||
*
|
||||
* This abstraction can be cleaned up when Parser.termsEnum is removed.
|
||||
*/
|
||||
public abstract class PointParser implements Parser {
|
||||
public final TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
throw new UnsupportedOperationException("makes no sense for parsing points");
|
||||
}
|
||||
}
|
||||
|
||||
/** Expert: The cache used internally by sorting and range query classes. */
|
||||
public static FieldCache DEFAULT = new FieldCacheImpl();
|
||||
|
||||
/**
|
||||
* A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.IntPoint}.
|
||||
*/
|
||||
public static final Parser INT_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableBytesToInt(point.bytes, point.offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".INT_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LongPoint}.
|
||||
*/
|
||||
public static final Parser LONG_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableBytesToLong(point.bytes, point.offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LONG_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.FloatPoint}.
|
||||
*/
|
||||
public static final Parser FLOAT_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableFloatBits(NumericUtils.sortableBytesToInt(point.bytes, point.offset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".FLOAT_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.DoublePoint}.
|
||||
*/
|
||||
public static final Parser DOUBLE_POINT_PARSER = new PointParser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef point) {
|
||||
return NumericUtils.sortableDoubleBits(NumericUtils.sortableBytesToLong(point.bytes, point.offset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".DOUBLE_POINT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for int values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyIntField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_INT_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
return LegacyNumericUtils.prefixCodedToInt(term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_INT_PARSER";
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for float values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyFloatField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_FLOAT_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
int val = LegacyNumericUtils.prefixCodedToInt(term);
|
||||
if (val<0) val ^= 0x7fffffff;
|
||||
return val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_FLOAT_PARSER";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for long values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyLongField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_LONG_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
return LegacyNumericUtils.prefixCodedToLong(term);
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_LONG_PARSER";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A parser instance for double values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.document.LegacyDoubleField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Parser LEGACY_DOUBLE_PARSER = new Parser() {
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
long val = LegacyNumericUtils.prefixCodedToLong(term);
|
||||
if (val<0) val ^= 0x7fffffffffffffffL;
|
||||
return val;
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return FieldCache.class.getName()+".LEGACY_DOUBLE_PARSER";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||
}
|
||||
};
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||
* reads the terms/points in <code>field</code> and returns a bit set at the size of
|
||||
* <code>reader.maxDoc()</code>, with turned on bits for each docid that
|
||||
* does have a value for this field.
|
||||
* @param parser May be {@code null} if coming from the inverted index, otherwise
|
||||
* can be a {@link PointParser} to compute from point values.
|
||||
*/
|
||||
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a {@link NumericDocValues} over the values found in documents in the given
|
||||
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
|
||||
* uses {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values.
|
||||
* Otherwise, it checks the internal cache for an appropriate entry, and if
|
||||
* none is found, reads the terms/points in <code>field</code> as longs and returns
|
||||
* an array of size <code>reader.maxDoc()</code> of the value each document
|
||||
* has in the given field.
|
||||
*
|
||||
* @param reader
|
||||
* Used to get field values.
|
||||
* @param field
|
||||
* Which field contains the longs.
|
||||
* @param parser
|
||||
* Computes long for string values. May be {@code null} if the
|
||||
* requested field was indexed as {@link NumericDocValuesField} or
|
||||
* {@link org.apache.lucene.document.LegacyLongField}.
|
||||
* @param setDocsWithField
|
||||
* If true then {@link #getDocsWithField} will also be computed and
|
||||
* stored in the FieldCache.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException
|
||||
* If any error occurs.
|
||||
*/
|
||||
public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none
|
||||
* is found, reads the term values in <code>field</code>
|
||||
* and returns a {@link BinaryDocValues} instance, providing a
|
||||
* method to retrieve the term (as a BytesRef) per document.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the strings.
|
||||
* @param setDocsWithField If true then {@link #getDocsWithField} will
|
||||
* also be computed and stored in the FieldCache.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException;
|
||||
|
||||
/** Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String,boolean)},
|
||||
* but you can specify whether more RAM should be consumed in exchange for
|
||||
* faster lookups (default is "true"). Note that the
|
||||
* first call for a given reader and field "wins",
|
||||
* subsequent calls will share the same cache entry. */
|
||||
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none
|
||||
* is found, reads the term values in <code>field</code>
|
||||
* and returns a {@link SortedDocValues} instance,
|
||||
* providing methods to retrieve sort ordinals and terms
|
||||
* (as a ByteRef) per document.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the strings.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException;
|
||||
|
||||
/** Expert: just like {@link
|
||||
* #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you can specify
|
||||
* whether more RAM should be consumed in exchange for
|
||||
* faster lookups (default is "true"). Note that the
|
||||
* first call for a given reader and field "wins",
|
||||
* subsequent calls will share the same cache entry. */
|
||||
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException;
|
||||
|
||||
/** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */
|
||||
public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_INT });
|
||||
/** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */
|
||||
public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_LONG });
|
||||
|
||||
/**
|
||||
* Checks the internal cache for an appropriate entry, and if none is found, reads the term values
|
||||
* in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
|
||||
* the terms (as ords) per document.
|
||||
*
|
||||
* @param reader Used to build a {@link DocTermOrds} instance
|
||||
* @param field Which field contains the strings.
|
||||
* @param prefix prefix for a subset of the terms which should be uninverted. Can be null or
|
||||
* {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX}
|
||||
*
|
||||
* @return a {@link DocTermOrds} instance
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException;
|
||||
|
||||
/**
|
||||
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
|
||||
* Can be useful for logging/debugging.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CacheEntry {
|
||||
|
||||
private final Object readerKey;
|
||||
private final String fieldName;
|
||||
private final Class<?> cacheType;
|
||||
private final Object custom;
|
||||
private final Accountable value;
|
||||
|
||||
public CacheEntry(Object readerKey, String fieldName,
|
||||
Class<?> cacheType,
|
||||
Object custom,
|
||||
Accountable value) {
|
||||
this.readerKey = readerKey;
|
||||
this.fieldName = fieldName;
|
||||
this.cacheType = cacheType;
|
||||
this.custom = custom;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public Object getReaderKey() {
|
||||
return readerKey;
|
||||
}
|
||||
|
||||
public String getFieldName() {
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
public Class<?> getCacheType() {
|
||||
return cacheType;
|
||||
}
|
||||
|
||||
public Object getCustom() {
|
||||
return custom;
|
||||
}
|
||||
|
||||
public Object getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* The most recently estimated size of the value, null unless
|
||||
* estimateSize has been called.
|
||||
*/
|
||||
public String getEstimatedSize() {
|
||||
long bytesUsed = value == null ? 0L : value.ramBytesUsed();
|
||||
return RamUsageEstimator.humanReadableUnits(bytesUsed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder(250);
|
||||
b.append("'").append(getReaderKey()).append("'=>");
|
||||
b.append("'").append(getFieldName()).append("',");
|
||||
b.append(getCacheType()).append(",").append(getCustom());
|
||||
b.append("=>").append(getValue().getClass().getName()).append("#");
|
||||
b.append(System.identityHashCode(getValue()));
|
||||
|
||||
String s = getEstimatedSize();
|
||||
b.append(" (size =~ ").append(s).append(')');
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERT: Generates an array of CacheEntry objects representing all items
|
||||
* currently in the FieldCache.
|
||||
* <p>
|
||||
* NOTE: These CacheEntry objects maintain a strong reference to the
|
||||
* Cached Values. Maintaining references to a CacheEntry the AtomicIndexReader
|
||||
* associated with it has garbage collected will prevent the Value itself
|
||||
* from being garbage collected when the Cache drops the WeakReference.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public CacheEntry[] getCacheEntries();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* EXPERT: Instructs the FieldCache to forcibly expunge all entries
|
||||
* from the underlying caches. This is intended only to be used for
|
||||
* test methods as a way to ensure a known base state of the Cache
|
||||
* (with out needing to rely on GC to free WeakReferences).
|
||||
* It should not be relied on for "Cache maintenance" in general
|
||||
* application code.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void purgeAllCaches();
|
||||
|
||||
/**
|
||||
* Expert: drops all cache entries associated with this
|
||||
* reader {@link IndexReader#getCoreCacheKey}. NOTE: this cache key must
|
||||
* precisely match the reader that the cache entry is
|
||||
* keyed on. If you pass a top-level reader, it usually
|
||||
* will have no effect as Lucene now caches at the segment
|
||||
* reader level.
|
||||
*/
|
||||
public void purgeByCacheKey(Object coreCacheKey);
|
||||
|
||||
/**
|
||||
* If non-null, FieldCacheImpl will warn whenever
|
||||
* entries are created that are not sane according to
|
||||
* {@link FieldCacheSanityChecker}.
|
||||
*/
|
||||
public void setInfoStream(PrintStream stream);
|
||||
|
||||
/** counterpart of {@link #setInfoStream(PrintStream)} */
|
||||
public PrintStream getInfoStream();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,425 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.MapOfSets;
|
||||
import org.apache.solr.uninverting.FieldCache.CacheEntry;
|
||||
|
||||
/**
|
||||
* Provides methods for sanity checking that entries in the FieldCache
|
||||
* are not wasteful or inconsistent.
|
||||
* </p>
|
||||
* <p>
|
||||
* Lucene 2.9 Introduced numerous enhancements into how the FieldCache
|
||||
* is used by the low levels of Lucene searching (for Sorting and
|
||||
* ValueSourceQueries) to improve both the speed for Sorting, as well
|
||||
* as reopening of IndexReaders. But these changes have shifted the
|
||||
* usage of FieldCache from "top level" IndexReaders (frequently a
|
||||
* MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
|
||||
* As a result, existing applications that directly access the FieldCache
|
||||
* may find RAM usage increase significantly when upgrading to 2.9 or
|
||||
* Later. This class provides an API for these applications (or their
|
||||
* Unit tests) to check at run time if the FieldCache contains "insane"
|
||||
* usages of the FieldCache.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
* @see FieldCache
|
||||
* @see FieldCacheSanityChecker.Insanity
|
||||
* @see FieldCacheSanityChecker.InsanityType
|
||||
*/
|
||||
final class FieldCacheSanityChecker {
|
||||
|
||||
public FieldCacheSanityChecker() {
|
||||
/* NOOP */
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick and dirty convenience method
|
||||
* @see #check
|
||||
*/
|
||||
public static Insanity[] checkSanity(FieldCache cache) {
|
||||
return checkSanity(cache.getCacheEntries());
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick and dirty convenience method that instantiates an instance with
|
||||
* "good defaults" and uses it to test the CacheEntrys
|
||||
* @see #check
|
||||
*/
|
||||
public static Insanity[] checkSanity(CacheEntry... cacheEntries) {
|
||||
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
|
||||
return sanityChecker.check(cacheEntries);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tests a CacheEntry[] for indication of "insane" cache usage.
|
||||
* <p>
|
||||
* <B>NOTE:</b>FieldCache CreationPlaceholder objects are ignored.
|
||||
* (:TODO: is this a bad idea? are we masking a real problem?)
|
||||
* </p>
|
||||
*/
|
||||
public Insanity[] check(CacheEntry... cacheEntries) {
|
||||
if (null == cacheEntries || 0 == cacheEntries.length)
|
||||
return new Insanity[0];
|
||||
|
||||
// the indirect mapping lets MapOfSet dedup identical valIds for us
|
||||
//
|
||||
// maps the (valId) identityhashCode of cache values to
|
||||
// sets of CacheEntry instances
|
||||
final MapOfSets<Integer, CacheEntry> valIdToItems = new MapOfSets<>(new HashMap<Integer, Set<CacheEntry>>(17));
|
||||
// maps ReaderField keys to Sets of ValueIds
|
||||
final MapOfSets<ReaderField, Integer> readerFieldToValIds = new MapOfSets<>(new HashMap<ReaderField, Set<Integer>>(17));
|
||||
//
|
||||
|
||||
// any keys that we know result in more then one valId
|
||||
final Set<ReaderField> valMismatchKeys = new HashSet<>();
|
||||
|
||||
// iterate over all the cacheEntries to get the mappings we'll need
|
||||
for (int i = 0; i < cacheEntries.length; i++) {
|
||||
final CacheEntry item = cacheEntries[i];
|
||||
final Object val = item.getValue();
|
||||
|
||||
// It's OK to have dup entries, where one is eg
|
||||
// float[] and the other is the Bits (from
|
||||
// getDocWithField())
|
||||
if (val instanceof FieldCacheImpl.BitsEntry) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (val instanceof FieldCache.CreationPlaceholder)
|
||||
continue;
|
||||
|
||||
final ReaderField rf = new ReaderField(item.getReaderKey(),
|
||||
item.getFieldName());
|
||||
|
||||
final Integer valId = Integer.valueOf(System.identityHashCode(val));
|
||||
|
||||
// indirect mapping, so the MapOfSet will dedup identical valIds for us
|
||||
valIdToItems.put(valId, item);
|
||||
if (1 < readerFieldToValIds.put(rf, valId)) {
|
||||
valMismatchKeys.add(rf);
|
||||
}
|
||||
}
|
||||
|
||||
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
|
||||
|
||||
insanity.addAll(checkValueMismatch(valIdToItems,
|
||||
readerFieldToValIds,
|
||||
valMismatchKeys));
|
||||
insanity.addAll(checkSubreaders(valIdToItems,
|
||||
readerFieldToValIds));
|
||||
|
||||
return insanity.toArray(new Insanity[insanity.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper method used by check that iterates over
|
||||
* valMismatchKeys and generates a Collection of Insanity
|
||||
* instances accordingly. The MapOfSets are used to populate
|
||||
* the Insanity objects.
|
||||
* @see InsanityType#VALUEMISMATCH
|
||||
*/
|
||||
private Collection<Insanity> checkValueMismatch(MapOfSets<Integer, CacheEntry> valIdToItems,
|
||||
MapOfSets<ReaderField, Integer> readerFieldToValIds,
|
||||
Set<ReaderField> valMismatchKeys) {
|
||||
|
||||
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
|
||||
|
||||
if (! valMismatchKeys.isEmpty() ) {
|
||||
// we have multiple values for some ReaderFields
|
||||
|
||||
final Map<ReaderField, Set<Integer>> rfMap = readerFieldToValIds.getMap();
|
||||
final Map<Integer, Set<CacheEntry>> valMap = valIdToItems.getMap();
|
||||
for (final ReaderField rf : valMismatchKeys) {
|
||||
final List<CacheEntry> badEntries = new ArrayList<>(valMismatchKeys.size() * 2);
|
||||
for(final Integer value: rfMap.get(rf)) {
|
||||
for (final CacheEntry cacheEntry : valMap.get(value)) {
|
||||
badEntries.add(cacheEntry);
|
||||
}
|
||||
}
|
||||
|
||||
CacheEntry[] badness = new CacheEntry[badEntries.size()];
|
||||
badness = badEntries.toArray(badness);
|
||||
|
||||
insanity.add(new Insanity(InsanityType.VALUEMISMATCH,
|
||||
"Multiple distinct value objects for " +
|
||||
rf.toString(), badness));
|
||||
}
|
||||
}
|
||||
return insanity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper method used by check that iterates over
|
||||
* the keys of readerFieldToValIds and generates a Collection
|
||||
* of Insanity instances whenever two (or more) ReaderField instances are
|
||||
* found that have an ancestry relationships.
|
||||
*
|
||||
* @see InsanityType#SUBREADER
|
||||
*/
|
||||
private Collection<Insanity> checkSubreaders( MapOfSets<Integer, CacheEntry> valIdToItems,
|
||||
MapOfSets<ReaderField, Integer> readerFieldToValIds) {
|
||||
|
||||
final List<Insanity> insanity = new ArrayList<>(23);
|
||||
|
||||
Map<ReaderField, Set<ReaderField>> badChildren = new HashMap<>(17);
|
||||
MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<>(badChildren); // wrapper
|
||||
|
||||
Map<Integer, Set<CacheEntry>> viToItemSets = valIdToItems.getMap();
|
||||
Map<ReaderField, Set<Integer>> rfToValIdSets = readerFieldToValIds.getMap();
|
||||
|
||||
Set<ReaderField> seen = new HashSet<>(17);
|
||||
|
||||
Set<ReaderField> readerFields = rfToValIdSets.keySet();
|
||||
for (final ReaderField rf : readerFields) {
|
||||
|
||||
if (seen.contains(rf)) continue;
|
||||
|
||||
List<Object> kids = getAllDescendantReaderKeys(rf.readerKey);
|
||||
for (Object kidKey : kids) {
|
||||
ReaderField kid = new ReaderField(kidKey, rf.fieldName);
|
||||
|
||||
if (badChildren.containsKey(kid)) {
|
||||
// we've already process this kid as RF and found other problems
|
||||
// track those problems as our own
|
||||
badKids.put(rf, kid);
|
||||
badKids.putAll(rf, badChildren.get(kid));
|
||||
badChildren.remove(kid);
|
||||
|
||||
} else if (rfToValIdSets.containsKey(kid)) {
|
||||
// we have cache entries for the kid
|
||||
badKids.put(rf, kid);
|
||||
}
|
||||
seen.add(kid);
|
||||
}
|
||||
seen.add(rf);
|
||||
}
|
||||
|
||||
// every mapping in badKids represents an Insanity
|
||||
for (final ReaderField parent : badChildren.keySet()) {
|
||||
Set<ReaderField> kids = badChildren.get(parent);
|
||||
|
||||
List<CacheEntry> badEntries = new ArrayList<>(kids.size() * 2);
|
||||
|
||||
// put parent entr(ies) in first
|
||||
{
|
||||
for (final Integer value : rfToValIdSets.get(parent)) {
|
||||
badEntries.addAll(viToItemSets.get(value));
|
||||
}
|
||||
}
|
||||
|
||||
// now the entries for the descendants
|
||||
for (final ReaderField kid : kids) {
|
||||
for (final Integer value : rfToValIdSets.get(kid)) {
|
||||
badEntries.addAll(viToItemSets.get(value));
|
||||
}
|
||||
}
|
||||
|
||||
CacheEntry[] badness = new CacheEntry[badEntries.size()];
|
||||
badness = badEntries.toArray(badness);
|
||||
|
||||
insanity.add(new Insanity(InsanityType.SUBREADER,
|
||||
"Found caches for descendants of " +
|
||||
parent.toString(),
|
||||
badness));
|
||||
}
|
||||
|
||||
return insanity;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the seed is an IndexReader, and if so will walk
|
||||
* the hierarchy of subReaders building up a list of the objects
|
||||
* returned by {@code seed.getCoreCacheKey()}
|
||||
*/
|
||||
private List<Object> getAllDescendantReaderKeys(Object seed) {
|
||||
List<Object> all = new ArrayList<>(17); // will grow as we iter
|
||||
all.add(seed);
|
||||
for (int i = 0; i < all.size(); i++) {
|
||||
final Object obj = all.get(i);
|
||||
// TODO: We don't check closed readers here (as getTopReaderContext
|
||||
// throws AlreadyClosedException), what should we do? Reflection?
|
||||
if (obj instanceof IndexReader) {
|
||||
try {
|
||||
final List<IndexReaderContext> childs =
|
||||
((IndexReader) obj).getContext().children();
|
||||
if (childs != null) { // it is composite reader
|
||||
for (final IndexReaderContext ctx : childs) {
|
||||
all.add(ctx.reader().getCoreCacheKey());
|
||||
}
|
||||
}
|
||||
} catch (AlreadyClosedException ace) {
|
||||
// ignore this reader
|
||||
}
|
||||
}
|
||||
}
|
||||
// need to skip the first, because it was the seed
|
||||
return all.subList(1, all.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple pair object for using "readerKey + fieldName" a Map key
|
||||
*/
|
||||
private final static class ReaderField {
|
||||
public final Object readerKey;
|
||||
public final String fieldName;
|
||||
public ReaderField(Object readerKey, String fieldName) {
|
||||
this.readerKey = readerKey;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(readerKey) * fieldName.hashCode();
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object that) {
|
||||
if (! (that instanceof ReaderField)) return false;
|
||||
|
||||
ReaderField other = (ReaderField) that;
|
||||
return (this.readerKey == other.readerKey &&
|
||||
this.fieldName.equals(other.fieldName));
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return readerKey.toString() + "+" + fieldName;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple container for a collection of related CacheEntry objects that
|
||||
* in conjunction with each other represent some "insane" usage of the
|
||||
* FieldCache.
|
||||
*/
|
||||
public final static class Insanity {
|
||||
private final InsanityType type;
|
||||
private final String msg;
|
||||
private final CacheEntry[] entries;
|
||||
public Insanity(InsanityType type, String msg, CacheEntry... entries) {
|
||||
if (null == type) {
|
||||
throw new IllegalArgumentException
|
||||
("Insanity requires non-null InsanityType");
|
||||
}
|
||||
if (null == entries || 0 == entries.length) {
|
||||
throw new IllegalArgumentException
|
||||
("Insanity requires non-null/non-empty CacheEntry[]");
|
||||
}
|
||||
this.type = type;
|
||||
this.msg = msg;
|
||||
this.entries = entries;
|
||||
|
||||
}
|
||||
/**
|
||||
* Type of insane behavior this object represents
|
||||
*/
|
||||
public InsanityType getType() { return type; }
|
||||
/**
|
||||
* Description of hte insane behavior
|
||||
*/
|
||||
public String getMsg() { return msg; }
|
||||
/**
|
||||
* CacheEntry objects which suggest a problem
|
||||
*/
|
||||
public CacheEntry[] getCacheEntries() { return entries; }
|
||||
/**
|
||||
* Multi-Line representation of this Insanity object, starting with
|
||||
* the Type and Msg, followed by each CacheEntry.toString() on its
|
||||
* own line prefaced by a tab character
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append(getType()).append(": ");
|
||||
|
||||
String m = getMsg();
|
||||
if (null != m) buf.append(m);
|
||||
|
||||
buf.append('\n');
|
||||
|
||||
CacheEntry[] ce = getCacheEntries();
|
||||
for (int i = 0; i < ce.length; i++) {
|
||||
buf.append('\t').append(ce[i].toString()).append('\n');
|
||||
}
|
||||
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An Enumeration of the different types of "insane" behavior that
|
||||
* may be detected in a FieldCache.
|
||||
*
|
||||
* @see InsanityType#SUBREADER
|
||||
* @see InsanityType#VALUEMISMATCH
|
||||
* @see InsanityType#EXPECTED
|
||||
*/
|
||||
public final static class InsanityType {
|
||||
private final String label;
|
||||
private InsanityType(final String label) {
|
||||
this.label = label;
|
||||
}
|
||||
@Override
|
||||
public String toString() { return label; }
|
||||
|
||||
/**
|
||||
* Indicates an overlap in cache usage on a given field
|
||||
* in sub/super readers.
|
||||
*/
|
||||
public final static InsanityType SUBREADER
|
||||
= new InsanityType("SUBREADER");
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Indicates entries have the same reader+fieldname but
|
||||
* different cached values. This can happen if different datatypes,
|
||||
* or parsers are used -- and while it's not necessarily a bug
|
||||
* it's typically an indication of a possible problem.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>NOTE:</b> Only the reader, fieldname, and cached value are actually
|
||||
* tested -- if two cache entries have different parsers or datatypes but
|
||||
* the cached values are the same Object (== not just equal()) this method
|
||||
* does not consider that a red flag. This allows for subtle variations
|
||||
* in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...)
|
||||
* </p>
|
||||
*/
|
||||
public final static InsanityType VALUEMISMATCH
|
||||
= new InsanityType("VALUEMISMATCH");
|
||||
|
||||
/**
|
||||
* Indicates an expected bit of "insanity". This may be useful for
|
||||
* clients that wish to preserve/log information about insane usage
|
||||
* but indicate that it was expected.
|
||||
*/
|
||||
public final static InsanityType EXPECTED
|
||||
= new InsanityType("EXPECTED");
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,391 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.NumericDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.SortedDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.StringField; // javadocs
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.solr.uninverting.FieldCache.CacheEntry;
|
||||
|
||||
/**
|
||||
* A FilterReader that exposes <i>indexed</i> values as if they also had
|
||||
* docvalues.
|
||||
* <p>
|
||||
* This is accomplished by "inverting the inverted index" or "uninversion".
|
||||
* <p>
|
||||
* The uninversion process happens lazily: upon the first request for the
|
||||
* field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}
|
||||
* or similar), it will create the docvalues on-the-fly if needed and cache it,
|
||||
* based on the core cache key of the wrapped LeafReader.
|
||||
*/
|
||||
public class UninvertingReader extends FilterLeafReader {
|
||||
|
||||
/**
|
||||
* Specifies the type of uninversion to apply for the field.
|
||||
*/
|
||||
public static enum Type {
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
INTEGER_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
LONG_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
FLOAT_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
*/
|
||||
DOUBLE_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #INTEGER_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_INTEGER,
|
||||
/**
|
||||
* Single-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #LONG_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_LONG,
|
||||
/**
|
||||
* Single-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #FLOAT_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_FLOAT,
|
||||
/**
|
||||
* Single-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
* @deprecated Index with points and use {@link #DOUBLE_POINT} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
LEGACY_DOUBLE,
|
||||
/**
|
||||
* Single-valued Binary, (e.g. indexed with {@link StringField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link BinaryDocValuesField}.
|
||||
*/
|
||||
BINARY,
|
||||
/**
|
||||
* Single-valued Binary, (e.g. indexed with {@link StringField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedDocValuesField}.
|
||||
*/
|
||||
SORTED,
|
||||
/**
|
||||
* Multi-valued Binary, (e.g. indexed with {@link StringField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_BINARY,
|
||||
/**
|
||||
* Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_INTEGER,
|
||||
/**
|
||||
* Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_FLOAT,
|
||||
/**
|
||||
* Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_LONG,
|
||||
/**
|
||||
* Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_DOUBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
|
||||
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
|
||||
* and so on.
|
||||
*/
|
||||
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||
return new UninvertingDirectoryReader(in, mapping);
|
||||
}
|
||||
|
||||
static class UninvertingDirectoryReader extends FilterDirectoryReader {
|
||||
final Map<String,Type> mapping;
|
||||
|
||||
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||
super(in, new FilterDirectoryReader.SubReaderWrapper() {
|
||||
@Override
|
||||
public LeafReader wrap(LeafReader reader) {
|
||||
return new UninvertingReader(reader, mapping);
|
||||
}
|
||||
});
|
||||
this.mapping = mapping;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||
return new UninvertingDirectoryReader(in, mapping);
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String,Type> mapping;
|
||||
final FieldInfos fieldInfos;
|
||||
|
||||
/**
|
||||
* Create a new UninvertingReader with the specified mapping
|
||||
* <p>
|
||||
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
|
||||
* instead.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public UninvertingReader(LeafReader in, Map<String,Type> mapping) {
|
||||
super(in);
|
||||
this.mapping = mapping;
|
||||
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
|
||||
for (FieldInfo fi : in.getFieldInfos()) {
|
||||
DocValuesType type = fi.getDocValuesType();
|
||||
if (type == DocValuesType.NONE) {
|
||||
Type t = mapping.get(fi.name);
|
||||
if (t != null) {
|
||||
if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
|
||||
// type uses points
|
||||
if (fi.getPointDimensionCount() == 0) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// type uses inverted index
|
||||
if (fi.getIndexOptions() == IndexOptions.NONE) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch(t) {
|
||||
case INTEGER_POINT:
|
||||
case LONG_POINT:
|
||||
case FLOAT_POINT:
|
||||
case DOUBLE_POINT:
|
||||
case LEGACY_INTEGER:
|
||||
case LEGACY_LONG:
|
||||
case LEGACY_FLOAT:
|
||||
case LEGACY_DOUBLE:
|
||||
type = DocValuesType.NUMERIC;
|
||||
break;
|
||||
case BINARY:
|
||||
type = DocValuesType.BINARY;
|
||||
break;
|
||||
case SORTED:
|
||||
type = DocValuesType.SORTED;
|
||||
break;
|
||||
case SORTED_SET_BINARY:
|
||||
case SORTED_SET_INTEGER:
|
||||
case SORTED_SET_FLOAT:
|
||||
case SORTED_SET_LONG:
|
||||
case SORTED_SET_DOUBLE:
|
||||
type = DocValuesType.SORTED_SET;
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
}
|
||||
filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
|
||||
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
|
||||
fi.getPointDimensionCount(), fi.getPointNumBytes()));
|
||||
}
|
||||
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return fieldInfos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v != null) {
|
||||
switch (v) {
|
||||
case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER, true);
|
||||
case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER, true);
|
||||
case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER, true);
|
||||
case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER, true);
|
||||
case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||
case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER, true);
|
||||
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
}
|
||||
}
|
||||
return super.getNumericDocValues(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v == Type.BINARY) {
|
||||
return FieldCache.DEFAULT.getTerms(in, field, true);
|
||||
} else {
|
||||
return in.getBinaryDocValues(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v == Type.SORTED) {
|
||||
return FieldCache.DEFAULT.getTermsIndex(in, field);
|
||||
} else {
|
||||
return in.getSortedDocValues(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v != null) {
|
||||
switch (v) {
|
||||
case SORTED_SET_INTEGER:
|
||||
case SORTED_SET_FLOAT:
|
||||
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
|
||||
case SORTED_SET_LONG:
|
||||
case SORTED_SET_DOUBLE:
|
||||
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
|
||||
case SORTED_SET_BINARY:
|
||||
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
|
||||
}
|
||||
}
|
||||
return in.getSortedSetDocValues(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(String field) throws IOException {
|
||||
Type v = getType(field);
|
||||
if (v != null) {
|
||||
switch (v) {
|
||||
case INTEGER_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.INT_POINT_PARSER);
|
||||
case FLOAT_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.FLOAT_POINT_PARSER);
|
||||
case LONG_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LONG_POINT_PARSER);
|
||||
case DOUBLE_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.DOUBLE_POINT_PARSER);
|
||||
case LEGACY_INTEGER: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_INT_PARSER);
|
||||
case LEGACY_FLOAT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_FLOAT_PARSER);
|
||||
case LEGACY_LONG: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_LONG_PARSER);
|
||||
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_DOUBLE_PARSER);
|
||||
default:
|
||||
return FieldCache.DEFAULT.getDocsWithField(in, field, null);
|
||||
}
|
||||
} else {
|
||||
return in.getDocsWithField(field);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field's uninversion type, or null
|
||||
* if the field doesn't exist or doesn't have a mapping.
|
||||
*/
|
||||
private Type getType(String field) {
|
||||
FieldInfo info = fieldInfos.fieldInfo(field);
|
||||
if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
|
||||
return null;
|
||||
}
|
||||
return mapping.get(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCoreCacheKey() {
|
||||
return in.getCoreCacheKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCombinedCoreAndDeletesKey() {
|
||||
return in.getCombinedCoreAndDeletesKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Uninverting(" + in.toString() + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Return information about the backing cache
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static String[] getUninvertedStats() {
|
||||
CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
|
||||
String[] info = new String[entries.length];
|
||||
for (int i = 0; i < entries.length; i++) {
|
||||
info[i] = entries[i].toString();
|
||||
}
|
||||
return info;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Support for creating docvalues on-the-fly from the inverted index at runtime.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
|
@ -29,8 +29,9 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
||||
/**
|
||||
* Allows access to uninverted docvalues by delete-by-queries.
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -34,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestSlowCompositeReaderWrapper extends LuceneTestCase {
|
||||
|
||||
public void testCoreListenerOnSlowCompositeReaderWrapper() throws IOException {
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), newDirectory());
|
||||
final int numDocs = TestUtil.nextInt(random(), 1, 5);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
w.addDocument(new Document());
|
||||
if (random().nextBoolean()) {
|
||||
w.commit();
|
||||
}
|
||||
}
|
||||
w.commit();
|
||||
w.close();
|
||||
|
||||
final IndexReader reader = DirectoryReader.open(w.w.getDirectory());
|
||||
final LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader);
|
||||
|
||||
final int numListeners = TestUtil.nextInt(random(), 1, 10);
|
||||
final List<LeafReader.CoreClosedListener> listeners = new ArrayList<>();
|
||||
AtomicInteger counter = new AtomicInteger(numListeners);
|
||||
|
||||
for (int i = 0; i < numListeners; ++i) {
|
||||
CountCoreListener listener = new CountCoreListener(counter, leafReader.getCoreCacheKey());
|
||||
listeners.add(listener);
|
||||
leafReader.addCoreClosedListener(listener);
|
||||
}
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
leafReader.addCoreClosedListener(listeners.get(random().nextInt(listeners.size())));
|
||||
}
|
||||
final int removed = random().nextInt(numListeners);
|
||||
Collections.shuffle(listeners, random());
|
||||
for (int i = 0; i < removed; ++i) {
|
||||
leafReader.removeCoreClosedListener(listeners.get(i));
|
||||
}
|
||||
assertEquals(numListeners, counter.get());
|
||||
// make sure listeners are registered on the wrapped reader and that closing any of them has the same effect
|
||||
if (random().nextBoolean()) {
|
||||
reader.close();
|
||||
} else {
|
||||
leafReader.close();
|
||||
}
|
||||
assertEquals(removed, counter.get());
|
||||
w.w.getDirectory().close();
|
||||
}
|
||||
|
||||
private static final class CountCoreListener implements LeafReader.CoreClosedListener {
|
||||
|
||||
private final AtomicInteger count;
|
||||
private final Object coreCacheKey;
|
||||
|
||||
public CountCoreListener(AtomicInteger count, Object coreCacheKey) {
|
||||
this.count = count;
|
||||
this.coreCacheKey = coreCacheKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose(Object coreCacheKey) {
|
||||
assertSame(this.coreCacheKey, coreCacheKey);
|
||||
count.decrementAndGet();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -25,12 +25,12 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.uninverting.DocTermOrds;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.uninverting.DocTermOrds;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.junit.After;
|
||||
import org.junit.BeforeClass;
|
||||
|
|
|
@ -42,13 +42,12 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -56,6 +55,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.junit.BeforeClass;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
|
|
@ -0,0 +1,681 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
|
||||
// TODO:
|
||||
// - test w/ del docs
|
||||
// - test prefix
|
||||
// - test w/ cutoff
|
||||
// - crank docs way up so we get some merging sometimes
|
||||
|
||||
public class TestDocTermOrds extends LuceneTestCase {
|
||||
|
||||
public void testEmptyIndex() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
iw.close();
|
||||
|
||||
final DirectoryReader ir = DirectoryReader.open(dir);
|
||||
TestUtil.checkReader(ir);
|
||||
|
||||
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||
TestUtil.checkReader(composite);
|
||||
|
||||
// check the leaves
|
||||
// (normally there are none for an empty index, so this is really just future
|
||||
// proofing in case that changes for some reason)
|
||||
for (LeafReaderContext rc : ir.leaves()) {
|
||||
final LeafReader r = rc.reader();
|
||||
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
|
||||
assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
|
||||
assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
|
||||
}
|
||||
|
||||
// check the composite
|
||||
final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
|
||||
assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
|
||||
assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
Document doc = new Document();
|
||||
Field field = newTextField("field", "", Field.Store.NO);
|
||||
doc.add(field);
|
||||
field.setStringValue("a b c");
|
||||
w.addDocument(doc);
|
||||
|
||||
field.setStringValue("d e f");
|
||||
w.addDocument(doc);
|
||||
|
||||
field.setStringValue("a f");
|
||||
w.addDocument(doc);
|
||||
|
||||
final IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(ar);
|
||||
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
|
||||
SortedSetDocValues iter = dto.iterator(ar);
|
||||
|
||||
iter.setDocument(0);
|
||||
assertEquals(0, iter.nextOrd());
|
||||
assertEquals(1, iter.nextOrd());
|
||||
assertEquals(2, iter.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||
|
||||
iter.setDocument(1);
|
||||
assertEquals(3, iter.nextOrd());
|
||||
assertEquals(4, iter.nextOrd());
|
||||
assertEquals(5, iter.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||
|
||||
iter.setDocument(2);
|
||||
assertEquals(0, iter.nextOrd());
|
||||
assertEquals(5, iter.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
||||
final int NUM_TERMS = atLeast(20);
|
||||
final Set<BytesRef> terms = new HashSet<>();
|
||||
while(terms.size() < NUM_TERMS) {
|
||||
final String s = TestUtil.randomRealisticUnicodeString(random());
|
||||
//final String s = _TestUtil.randomSimpleString(random);
|
||||
if (s.length() > 0) {
|
||||
terms.add(new BytesRef(s));
|
||||
}
|
||||
}
|
||||
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
|
||||
Arrays.sort(termsArray);
|
||||
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
|
||||
// Sometimes swap in codec that impls ord():
|
||||
if (random().nextInt(10) == 7) {
|
||||
// Make sure terms index has ords:
|
||||
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
|
||||
conf.setCodec(codec);
|
||||
}
|
||||
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
|
||||
|
||||
final int[][] idToOrds = new int[NUM_DOCS][];
|
||||
final Set<Integer> ordsForDocSet = new HashSet<>();
|
||||
|
||||
for(int id=0;id<NUM_DOCS;id++) {
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(new LegacyIntField("id", id, Field.Store.YES));
|
||||
|
||||
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
|
||||
while(ordsForDocSet.size() < termCount) {
|
||||
ordsForDocSet.add(random().nextInt(termsArray.length));
|
||||
}
|
||||
final int[] ordsForDoc = new int[termCount];
|
||||
int upto = 0;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: doc id=" + id);
|
||||
}
|
||||
for(int ord : ordsForDocSet) {
|
||||
ordsForDoc[upto++] = ord;
|
||||
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" f=" + termsArray[ord].utf8ToString());
|
||||
}
|
||||
doc.add(field);
|
||||
}
|
||||
ordsForDocSet.clear();
|
||||
Arrays.sort(ordsForDoc);
|
||||
idToOrds[id] = ordsForDoc;
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
final DirectoryReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: reader=" + r);
|
||||
}
|
||||
|
||||
for(LeafReaderContext ctx : r.leaves()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: sub=" + ctx.reader());
|
||||
}
|
||||
verify(ctx.reader(), idToOrds, termsArray, null);
|
||||
}
|
||||
|
||||
// Also test top-level reader: its enum does not support
|
||||
// ord, so this forces the OrdWrapper to run:
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: top reader");
|
||||
}
|
||||
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(slowR);
|
||||
verify(slowR, idToOrds, termsArray, null);
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomWithPrefix() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
||||
final Set<String> prefixes = new HashSet<>();
|
||||
final int numPrefix = TestUtil.nextInt(random(), 2, 7);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: use " + numPrefix + " prefixes");
|
||||
}
|
||||
while(prefixes.size() < numPrefix) {
|
||||
prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
|
||||
//prefixes.add(_TestUtil.randomSimpleString(random));
|
||||
}
|
||||
final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);
|
||||
|
||||
final int NUM_TERMS = atLeast(20);
|
||||
final Set<BytesRef> terms = new HashSet<>();
|
||||
while(terms.size() < NUM_TERMS) {
|
||||
final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random());
|
||||
//final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
|
||||
if (s.length() > 0) {
|
||||
terms.add(new BytesRef(s));
|
||||
}
|
||||
}
|
||||
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
|
||||
Arrays.sort(termsArray);
|
||||
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
|
||||
// Sometimes swap in codec that impls ord():
|
||||
if (random().nextInt(10) == 7) {
|
||||
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
|
||||
conf.setCodec(codec);
|
||||
}
|
||||
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
|
||||
|
||||
final int[][] idToOrds = new int[NUM_DOCS][];
|
||||
final Set<Integer> ordsForDocSet = new HashSet<>();
|
||||
|
||||
for(int id=0;id<NUM_DOCS;id++) {
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(new LegacyIntField("id", id, Field.Store.YES));
|
||||
|
||||
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
|
||||
while(ordsForDocSet.size() < termCount) {
|
||||
ordsForDocSet.add(random().nextInt(termsArray.length));
|
||||
}
|
||||
final int[] ordsForDoc = new int[termCount];
|
||||
int upto = 0;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: doc id=" + id);
|
||||
}
|
||||
for(int ord : ordsForDocSet) {
|
||||
ordsForDoc[upto++] = ord;
|
||||
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" f=" + termsArray[ord].utf8ToString());
|
||||
}
|
||||
doc.add(field);
|
||||
}
|
||||
ordsForDocSet.clear();
|
||||
Arrays.sort(ordsForDoc);
|
||||
idToOrds[id] = ordsForDoc;
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
final DirectoryReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: reader=" + r);
|
||||
}
|
||||
|
||||
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(slowR);
|
||||
for(String prefix : prefixesArray) {
|
||||
|
||||
final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
|
||||
|
||||
final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
|
||||
for(int id=0;id<NUM_DOCS;id++) {
|
||||
final int[] docOrds = idToOrds[id];
|
||||
final List<Integer> newOrds = new ArrayList<>();
|
||||
for(int ord : idToOrds[id]) {
|
||||
if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
|
||||
newOrds.add(ord);
|
||||
}
|
||||
}
|
||||
final int[] newOrdsArray = new int[newOrds.size()];
|
||||
int upto = 0;
|
||||
for(int ord : newOrds) {
|
||||
newOrdsArray[upto++] = ord;
|
||||
}
|
||||
idToOrdsPrefix[id] = newOrdsArray;
|
||||
}
|
||||
|
||||
for(LeafReaderContext ctx : r.leaves()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: sub=" + ctx.reader());
|
||||
}
|
||||
verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
|
||||
}
|
||||
|
||||
// Also test top-level reader: its enum does not support
|
||||
// ord, so this forces the OrdWrapper to run:
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: top reader");
|
||||
}
|
||||
verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
|
||||
}
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
|
||||
|
||||
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(),
|
||||
"field",
|
||||
prefixRef,
|
||||
Integer.MAX_VALUE,
|
||||
TestUtil.nextInt(random(), 2, 10));
|
||||
|
||||
|
||||
final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER, false);
|
||||
/*
|
||||
for(int docID=0;docID<subR.maxDoc();docID++) {
|
||||
System.out.println(" docID=" + docID + " id=" + docIDToID[docID]);
|
||||
}
|
||||
*/
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: verify prefix=" + (prefixRef==null ? "null" : prefixRef.utf8ToString()));
|
||||
System.out.println("TEST: all TERMS:");
|
||||
TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
|
||||
int ord = 0;
|
||||
while(allTE.next() != null) {
|
||||
System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
|
||||
}
|
||||
}
|
||||
|
||||
//final TermsEnum te = subR.fields().terms("field").iterator();
|
||||
final TermsEnum te = dto.getOrdTermsEnum(r);
|
||||
if (dto.numTerms() == 0) {
|
||||
if (prefixRef == null) {
|
||||
assertNull(MultiFields.getTerms(r, "field"));
|
||||
} else {
|
||||
Terms terms = MultiFields.getTerms(r, "field");
|
||||
if (terms != null) {
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
|
||||
if (result != TermsEnum.SeekStatus.END) {
|
||||
assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: TERMS:");
|
||||
te.seekExact(0);
|
||||
while(true) {
|
||||
System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString());
|
||||
if (te.next() == null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SortedSetDocValues iter = dto.iterator(r);
|
||||
for(int docID=0;docID<r.maxDoc();docID++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
|
||||
}
|
||||
iter.setDocument(docID);
|
||||
final int[] answers = idToOrds[(int) docIDToID.get(docID)];
|
||||
int upto = 0;
|
||||
long ord;
|
||||
while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
te.seekExact(ord);
|
||||
final BytesRef expected = termsArray[answers[upto++]];
|
||||
if (VERBOSE) {
|
||||
System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
|
||||
}
|
||||
assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
|
||||
}
|
||||
assertEquals(answers.length, upto);
|
||||
}
|
||||
}
|
||||
|
||||
public void testBackToTheFuture() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("foo", "bar", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newStringField("foo", "baz", Field.Store.NO));
|
||||
// we need a second value for a doc, or we don't actually test DocTermOrds!
|
||||
doc.add(newStringField("foo", "car", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
DirectoryReader r1 = DirectoryReader.open(iw);
|
||||
|
||||
iw.deleteDocuments(new Term("foo", "baz"));
|
||||
DirectoryReader r2 = DirectoryReader.open(iw);
|
||||
|
||||
FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
|
||||
assertEquals(3, v.getValueCount());
|
||||
v.setDocument(1);
|
||||
assertEquals(1, v.nextOrd());
|
||||
|
||||
iw.close();
|
||||
r1.close();
|
||||
r2.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNumericEncoded32() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNumericEncoded64() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedTermsEnum() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", "hello", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("field", "world", Field.Store.NO));
|
||||
// we need a second value for a doc, or we don't actually test DocTermOrds!
|
||||
doc.add(new StringField("field", "hello", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("field", "beer", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.forceMerge(1);
|
||||
|
||||
DirectoryReader ireader = iwriter.getReader();
|
||||
iwriter.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ireader);
|
||||
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
|
||||
assertEquals(3, dv.getValueCount());
|
||||
|
||||
TermsEnum termsEnum = dv.termsEnum();
|
||||
|
||||
// next()
|
||||
assertEquals("beer", termsEnum.next().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
assertEquals("hello", termsEnum.next().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
assertEquals("world", termsEnum.next().utf8ToString());
|
||||
assertEquals(2, termsEnum.ord());
|
||||
|
||||
// seekCeil()
|
||||
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
|
||||
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
|
||||
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
|
||||
|
||||
// seekExact()
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
|
||||
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
|
||||
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("world")));
|
||||
assertEquals("world", termsEnum.term().utf8ToString());
|
||||
assertEquals(2, termsEnum.ord());
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
|
||||
|
||||
// seek(ord)
|
||||
termsEnum.seekExact(0);
|
||||
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||
assertEquals(0, termsEnum.ord());
|
||||
termsEnum.seekExact(1);
|
||||
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||
assertEquals(1, termsEnum.ord());
|
||||
termsEnum.seekExact(2);
|
||||
assertEquals("world", termsEnum.term().utf8ToString());
|
||||
assertEquals(2, termsEnum.ord());
|
||||
|
||||
// lookupTerm(BytesRef)
|
||||
assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
|
||||
assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
|
||||
assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
|
||||
assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
|
||||
assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
|
||||
assertEquals(2, dv.lookupTerm(new BytesRef("world")));
|
||||
assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testActuallySingleValued() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(null);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
IndexWriter iw = new IndexWriter(dir, iwconfig);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
|
||||
assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(2);
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(3);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals("bar", value.utf8ToString());
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals("baz", value.utf8ToString());
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,731 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CyclicBarrier;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LogDocMergePolicy;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestFieldCache extends LuceneTestCase {
|
||||
private static LeafReader reader;
|
||||
private static int NUM_DOCS;
|
||||
private static int NUM_ORDS;
|
||||
private static String[] unicodeStrings;
|
||||
private static BytesRef[][] multiValued;
|
||||
private static Directory directory;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
NUM_DOCS = atLeast(500);
|
||||
NUM_ORDS = atLeast(2);
|
||||
directory = newDirectory();
|
||||
IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
|
||||
long theLong = Long.MAX_VALUE;
|
||||
double theDouble = Double.MAX_VALUE;
|
||||
int theInt = Integer.MAX_VALUE;
|
||||
float theFloat = Float.MAX_VALUE;
|
||||
unicodeStrings = new String[NUM_DOCS];
|
||||
multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: setUp");
|
||||
}
|
||||
for (int i = 0; i < NUM_DOCS; i++){
|
||||
Document doc = new Document();
|
||||
doc.add(new LongPoint("theLong", theLong--));
|
||||
doc.add(new DoublePoint("theDouble", theDouble--));
|
||||
doc.add(new IntPoint("theInt", theInt--));
|
||||
doc.add(new FloatPoint("theFloat", theFloat--));
|
||||
if (i%2 == 0) {
|
||||
doc.add(new IntPoint("sparse", i));
|
||||
}
|
||||
|
||||
if (i%2 == 0) {
|
||||
doc.add(new IntPoint("numInt", i));
|
||||
}
|
||||
|
||||
// sometimes skip the field:
|
||||
if (random().nextInt(40) != 17) {
|
||||
unicodeStrings[i] = generateString(i);
|
||||
doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
|
||||
}
|
||||
|
||||
// sometimes skip the field:
|
||||
if (random().nextInt(10) != 8) {
|
||||
for (int j = 0; j < NUM_ORDS; j++) {
|
||||
String newValue = generateString(i);
|
||||
multiValued[i][j] = new BytesRef(newValue);
|
||||
doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
|
||||
}
|
||||
Arrays.sort(multiValued[i]);
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1); // this test relies on one segment and docid order
|
||||
IndexReader r = DirectoryReader.open(writer);
|
||||
assertEquals(1, r.leaves().size());
|
||||
reader = r.leaves().get(0).reader();
|
||||
TestUtil.checkReader(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
unicodeStrings = null;
|
||||
multiValued = null;
|
||||
}
|
||||
|
||||
public void test() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Long.MAX_VALUE - i, longs.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
|
||||
}
|
||||
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
|
||||
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
}
|
||||
|
||||
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
|
||||
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
|
||||
// getTermsIndex
|
||||
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
final String s;
|
||||
final int ord = termsIndex.getOrd(i);
|
||||
if (ord == -1) {
|
||||
s = null;
|
||||
} else {
|
||||
s = termsIndex.lookupOrd(ord).utf8ToString();
|
||||
}
|
||||
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
|
||||
}
|
||||
|
||||
int nTerms = termsIndex.getValueCount();
|
||||
|
||||
TermsEnum tenum = termsIndex.termsEnum();
|
||||
for (int i=0; i<nTerms; i++) {
|
||||
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
|
||||
final BytesRef val = termsIndex.lookupOrd(i);
|
||||
// System.out.println("i="+i);
|
||||
assertEquals(val, val1);
|
||||
}
|
||||
|
||||
// seek the enum around (note this isn't a great test here)
|
||||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
int k = random().nextInt(nTerms);
|
||||
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
|
||||
assertEquals(val, tenum.term());
|
||||
}
|
||||
|
||||
for(int i=0;i<nTerms;i++) {
|
||||
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
|
||||
assertEquals(val, tenum.term());
|
||||
}
|
||||
|
||||
// test bad field
|
||||
termsIndex = cache.getTermsIndex(reader, "bogusfield");
|
||||
|
||||
// getTerms
|
||||
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true);
|
||||
Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString", null);
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
final String s;
|
||||
if (!bits.get(i)) {
|
||||
s = null;
|
||||
} else {
|
||||
s = terms.get(i).utf8ToString();
|
||||
}
|
||||
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
|
||||
}
|
||||
|
||||
// test bad field
|
||||
terms = cache.getTerms(reader, "bogusfield", false);
|
||||
|
||||
// getDocTermOrds
|
||||
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
|
||||
int numEntries = cache.getCacheEntries().length;
|
||||
// ask for it again, and check that we didnt create any additional entries:
|
||||
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
|
||||
assertEquals(numEntries, cache.getCacheEntries().length);
|
||||
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
termOrds.setDocument(i);
|
||||
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
|
||||
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
|
||||
for (BytesRef v : values) {
|
||||
if (v == null) {
|
||||
// why does this test use null values... instead of an empty list: confusing
|
||||
break;
|
||||
}
|
||||
long ord = termOrds.nextOrd();
|
||||
assert ord != SortedSetDocValues.NO_MORE_ORDS;
|
||||
BytesRef scratch = termOrds.lookupOrd(ord);
|
||||
assertEquals(v, scratch);
|
||||
}
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
|
||||
}
|
||||
|
||||
// test bad field
|
||||
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
|
||||
assertTrue(termOrds.getValueCount() == 0);
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
}
|
||||
|
||||
public void testEmptyIndex() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
|
||||
writer.close();
|
||||
IndexReader r = DirectoryReader.open(dir);
|
||||
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(reader);
|
||||
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
||||
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static String generateString(int i) {
|
||||
String s = null;
|
||||
if (i > 0 && random().nextInt(3) == 1) {
|
||||
// reuse past string -- try to find one that's not null
|
||||
for(int iter = 0; iter < 10 && s == null;iter++) {
|
||||
s = unicodeStrings[random().nextInt(i)];
|
||||
}
|
||||
if (s == null) {
|
||||
s = TestUtil.randomUnicodeString(random());
|
||||
}
|
||||
} else {
|
||||
s = TestUtil.randomUnicodeString(random());
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public void testDocsWithField() throws Exception {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
|
||||
// The double[] takes one slots, and docsWithField should also
|
||||
// have been populated:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
|
||||
|
||||
// No new entries should appear:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
assertTrue(bits instanceof Bits.MatchAllBits);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
|
||||
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.INT_POINT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, numInts.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetDocsWithFieldThreadSafety() throws Exception {
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
int NUM_THREADS = 3;
|
||||
Thread[] threads = new Thread[NUM_THREADS];
|
||||
final AtomicBoolean failed = new AtomicBoolean();
|
||||
final AtomicInteger iters = new AtomicInteger();
|
||||
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
|
||||
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
cache.purgeAllCaches();
|
||||
iters.incrementAndGet();
|
||||
}
|
||||
});
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
try {
|
||||
while(!failed.get()) {
|
||||
final int op = random().nextInt(3);
|
||||
if (op == 0) {
|
||||
// Purge all caches & resume, once all
|
||||
// threads get here:
|
||||
restart.await();
|
||||
if (iters.get() >= NUM_ITER) {
|
||||
break;
|
||||
}
|
||||
} else if (op == 1) {
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
} else {
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
failed.set(true);
|
||||
restart.reset();
|
||||
throw new RuntimeException(t);
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[threadIDX].start();
|
||||
}
|
||||
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX].join();
|
||||
}
|
||||
assertFalse(failed.get());
|
||||
}
|
||||
|
||||
public void testDocValuesIntegration() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
|
||||
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
|
||||
doc.add(new NumericDocValuesField("numeric", 42));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
// Binary type: can be retrieved via getTerms()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER, false);
|
||||
});
|
||||
|
||||
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true);
|
||||
final BytesRef term = binary.get(0);
|
||||
assertEquals("binary value", term.utf8ToString());
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "binary");
|
||||
});
|
||||
|
||||
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER, false);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "sorted");
|
||||
});
|
||||
|
||||
binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true);
|
||||
BytesRef scratch = binary.get(0);
|
||||
assertEquals("sorted value", scratch.utf8ToString());
|
||||
|
||||
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
|
||||
assertEquals(0, sorted.getOrd(0));
|
||||
assertEquals(1, sorted.getValueCount());
|
||||
scratch = sorted.get(0);
|
||||
assertEquals("sorted value", scratch.utf8ToString());
|
||||
|
||||
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(0, sortedSet.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
assertEquals(1, sortedSet.getValueCount());
|
||||
|
||||
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
// Numeric type: can be retrieved via getInts() and so on
|
||||
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER, false);
|
||||
assertEquals(42, numeric.get(0));
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTerms(ar, "numeric", true);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "numeric");
|
||||
});
|
||||
|
||||
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
// SortedSet type: can be retrieved via getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER, false);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
|
||||
});
|
||||
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
new DocTermOrds(ar, null, "sortedset");
|
||||
});
|
||||
|
||||
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(0, sortedSet.nextOrd());
|
||||
assertEquals(1, sortedSet.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
assertEquals(2, sortedSet.getValueCount());
|
||||
|
||||
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
|
||||
assertTrue(bits.get(0));
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonexistantFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
|
||||
BytesRef scratch = binaries.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||
assertEquals(-1, sorted.getOrd(0));
|
||||
scratch = sorted.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
|
||||
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
|
||||
assertFalse(bits.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonIndexedFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("bogusbytes", "bogus"));
|
||||
doc.add(new StoredField("bogusshorts", "bogus"));
|
||||
doc.add(new StoredField("bogusints", "bogus"));
|
||||
doc.add(new StoredField("boguslongs", "bogus"));
|
||||
doc.add(new StoredField("bogusfloats", "bogus"));
|
||||
doc.add(new StoredField("bogusdoubles", "bogus"));
|
||||
doc.add(new StoredField("bogusterms", "bogus"));
|
||||
doc.add(new StoredField("bogustermsindex", "bogus"));
|
||||
doc.add(new StoredField("bogusmultivalued", "bogus"));
|
||||
doc.add(new StoredField("bogusbits", "bogus"));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
|
||||
BytesRef scratch = binaries.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||
assertEquals(-1, sorted.getOrd(0));
|
||||
scratch = sorted.get(0);
|
||||
assertEquals(0, scratch.length);
|
||||
|
||||
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
|
||||
sortedSet.setDocument(0);
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||
|
||||
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
|
||||
assertFalse(bits.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
|
||||
public void testLongFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
LongPoint field = new LongPoint("f", 0L);
|
||||
StoredField field2 = new StoredField("f", 0L);
|
||||
doc.add(field);
|
||||
doc.add(field2);
|
||||
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final long v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Long.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Long.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextLong(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setLongValue(v);
|
||||
field2.setLongValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], longs.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
|
||||
public void testIntFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
IntPoint field = new IntPoint("f", 0);
|
||||
doc.add(field);
|
||||
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final int v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Integer.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Integer.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextInt(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setIntValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], ints.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestFieldCacheReopen extends LuceneTestCase {
|
||||
|
||||
// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
|
||||
|
||||
// LUCENE-1579: Ensure that on a reopened reader, that any
|
||||
// shared segments reuse the doc values arrays in
|
||||
// FieldCache
|
||||
public void testFieldCacheReuseAfterReopen() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(
|
||||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random())).
|
||||
setMergePolicy(newLogMergePolicy(10))
|
||||
);
|
||||
Document doc = new Document();
|
||||
doc.add(new IntPoint("number", 17));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
// Open reader1
|
||||
DirectoryReader r = DirectoryReader.open(dir);
|
||||
LeafReader r1 = getOnlyLeafReader(r);
|
||||
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER, false);
|
||||
assertEquals(17, ints.get(0));
|
||||
|
||||
// Add new segment
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
// Reopen reader1 --> reader2
|
||||
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
|
||||
assertNotNull(r2);
|
||||
r.close();
|
||||
LeafReader sub0 = r2.leaves().get(0).reader();
|
||||
final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER, false);
|
||||
r2.close();
|
||||
assertTrue(ints == ints2);
|
||||
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.uninverting.FieldCacheSanityChecker.Insanity;
|
||||
import org.apache.solr.uninverting.FieldCacheSanityChecker.InsanityType;
|
||||
|
||||
public class TestFieldCacheSanityChecker extends LuceneTestCase {
|
||||
|
||||
protected LeafReader readerA;
|
||||
protected LeafReader readerB;
|
||||
protected LeafReader readerX;
|
||||
protected LeafReader readerAclone;
|
||||
protected Directory dirA, dirB;
|
||||
private static final int NUM_DOCS = 1000;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
dirA = newDirectory();
|
||||
dirB = newDirectory();
|
||||
|
||||
IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
long theLong = Long.MAX_VALUE;
|
||||
double theDouble = Double.MAX_VALUE;
|
||||
int theInt = Integer.MAX_VALUE;
|
||||
float theFloat = Float.MAX_VALUE;
|
||||
for (int i = 0; i < NUM_DOCS; i++){
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
|
||||
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
|
||||
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
|
||||
if (0 == i % 3) {
|
||||
wA.addDocument(doc);
|
||||
} else {
|
||||
wB.addDocument(doc);
|
||||
}
|
||||
}
|
||||
wA.close();
|
||||
wB.close();
|
||||
DirectoryReader rA = DirectoryReader.open(dirA);
|
||||
readerA = SlowCompositeReaderWrapper.wrap(rA);
|
||||
readerAclone = SlowCompositeReaderWrapper.wrap(rA);
|
||||
readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA));
|
||||
readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB));
|
||||
readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
readerA.close();
|
||||
readerAclone.close();
|
||||
readerB.close();
|
||||
readerX.close();
|
||||
dirA.close();
|
||||
dirB.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testSanity() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
cache.getNumerics(readerA, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
cache.getNumerics(readerAclone, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
cache.getNumerics(readerB, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
|
||||
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
|
||||
|
||||
// // //
|
||||
|
||||
Insanity[] insanity =
|
||||
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||
|
||||
if (0 < insanity.length)
|
||||
dumpArray(getTestClass().getName() + "#" + getTestName()
|
||||
+ " INSANITY", insanity, System.err);
|
||||
|
||||
assertEquals("shouldn't be any cache insanity", 0, insanity.length);
|
||||
cache.purgeAllCaches();
|
||||
}
|
||||
|
||||
public void testInsanity1() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
|
||||
cache.getTerms(readerX, "theInt", false);
|
||||
|
||||
// // //
|
||||
|
||||
Insanity[] insanity =
|
||||
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||
|
||||
assertEquals("wrong number of cache errors", 1, insanity.length);
|
||||
assertEquals("wrong type of cache error",
|
||||
InsanityType.VALUEMISMATCH,
|
||||
insanity[0].getType());
|
||||
assertEquals("wrong number of entries in cache error", 2,
|
||||
insanity[0].getCacheEntries().length);
|
||||
|
||||
// we expect bad things, don't let tearDown complain about them
|
||||
cache.purgeAllCaches();
|
||||
}
|
||||
|
||||
public void testInsanity2() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
cache.getTerms(readerA, "theInt", false);
|
||||
cache.getTerms(readerB, "theInt", false);
|
||||
cache.getTerms(readerX, "theInt", false);
|
||||
|
||||
|
||||
// // //
|
||||
|
||||
Insanity[] insanity =
|
||||
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||
|
||||
assertEquals("wrong number of cache errors", 1, insanity.length);
|
||||
assertEquals("wrong type of cache error",
|
||||
InsanityType.SUBREADER,
|
||||
insanity[0].getType());
|
||||
assertEquals("wrong number of entries in cache error", 3,
|
||||
insanity[0].getCacheEntries().length);
|
||||
|
||||
// we expect bad things, don't let tearDown complain about them
|
||||
cache.purgeAllCaches();
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,318 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/** random sorting tests with uninversion */
|
||||
public class TestFieldCacheSortRandom extends LuceneTestCase {
|
||||
|
||||
public void testRandomStringSort() throws Exception {
|
||||
testRandomStringSort(SortField.Type.STRING);
|
||||
}
|
||||
|
||||
public void testRandomStringValSort() throws Exception {
|
||||
testRandomStringSort(SortField.Type.STRING_VAL);
|
||||
}
|
||||
|
||||
private void testRandomStringSort(SortField.Type type) throws Exception {
|
||||
Random random = new Random(random().nextLong());
|
||||
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
final boolean allowDups = random.nextBoolean();
|
||||
final Set<String> seen = new HashSet<>();
|
||||
final int maxLength = TestUtil.nextInt(random, 5, 100);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
|
||||
}
|
||||
|
||||
int numDocs = 0;
|
||||
final List<BytesRef> docValues = new ArrayList<>();
|
||||
// TODO: deletions
|
||||
while (numDocs < NUM_DOCS) {
|
||||
final Document doc = new Document();
|
||||
|
||||
// 10% of the time, the document is missing the value:
|
||||
final BytesRef br;
|
||||
if (random().nextInt(10) != 7) {
|
||||
final String s;
|
||||
if (random.nextBoolean()) {
|
||||
s = TestUtil.randomSimpleString(random, maxLength);
|
||||
} else {
|
||||
s = TestUtil.randomUnicodeString(random, maxLength);
|
||||
}
|
||||
|
||||
if (!allowDups) {
|
||||
if (seen.contains(s)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(s);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": s=" + s);
|
||||
}
|
||||
|
||||
doc.add(new StringField("stringdv", s, Field.Store.NO));
|
||||
docValues.add(new BytesRef(s));
|
||||
|
||||
} else {
|
||||
br = null;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": <missing>");
|
||||
}
|
||||
docValues.add(null);
|
||||
}
|
||||
|
||||
doc.add(new IntPoint("id", numDocs));
|
||||
doc.add(new StoredField("id", numDocs));
|
||||
writer.addDocument(doc);
|
||||
numDocs++;
|
||||
|
||||
if (random.nextInt(40) == 17) {
|
||||
// force flush
|
||||
writer.getReader().close();
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,UninvertingReader.Type> mapping = new HashMap<>();
|
||||
mapping.put("stringdv", Type.SORTED);
|
||||
mapping.put("id", Type.INTEGER_POINT);
|
||||
final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
|
||||
writer.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println(" reader=" + r);
|
||||
}
|
||||
|
||||
final IndexSearcher s = newSearcher(r, false);
|
||||
final int ITERS = atLeast(100);
|
||||
for(int iter=0;iter<ITERS;iter++) {
|
||||
final boolean reverse = random.nextBoolean();
|
||||
|
||||
final TopFieldDocs hits;
|
||||
final SortField sf;
|
||||
final boolean sortMissingLast;
|
||||
final boolean missingIsNull;
|
||||
sf = new SortField("stringdv", type, reverse);
|
||||
sortMissingLast = random().nextBoolean();
|
||||
missingIsNull = true;
|
||||
|
||||
if (sortMissingLast) {
|
||||
sf.setMissingValue(SortField.STRING_LAST);
|
||||
}
|
||||
|
||||
final Sort sort;
|
||||
if (random.nextBoolean()) {
|
||||
sort = new Sort(sf);
|
||||
} else {
|
||||
sort = new Sort(sf, SortField.FIELD_DOC);
|
||||
}
|
||||
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
|
||||
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
|
||||
int queryType = random.nextInt(2);
|
||||
if (queryType == 0) {
|
||||
hits = s.search(new ConstantScoreQuery(f),
|
||||
hitCount, sort, random.nextBoolean(), random.nextBoolean());
|
||||
} else {
|
||||
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
|
||||
}
|
||||
|
||||
// Compute expected results:
|
||||
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
|
||||
@Override
|
||||
public int compare(BytesRef a, BytesRef b) {
|
||||
if (a == null) {
|
||||
if (b == null) {
|
||||
return 0;
|
||||
}
|
||||
if (sortMissingLast) {
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if (b == null) {
|
||||
if (sortMissingLast) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
return a.compareTo(b);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (reverse) {
|
||||
Collections.reverse(f.matchValues);
|
||||
}
|
||||
final List<BytesRef> expected = f.matchValues;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" expected:");
|
||||
for(int idx=0;idx<expected.size();idx++) {
|
||||
BytesRef br = expected.get(idx);
|
||||
if (br == null && missingIsNull == false) {
|
||||
br = new BytesRef();
|
||||
}
|
||||
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
|
||||
if (idx == hitCount-1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" actual:");
|
||||
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||
BytesRef br = (BytesRef) fd.fields[0];
|
||||
|
||||
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
|
||||
}
|
||||
}
|
||||
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||
BytesRef br = expected.get(hitIDX);
|
||||
if (br == null && missingIsNull == false) {
|
||||
br = new BytesRef();
|
||||
}
|
||||
|
||||
// Normally, the old codecs (that don't support
|
||||
// docsWithField via doc values) will always return
|
||||
// an empty BytesRef for the missing case; however,
|
||||
// if all docs in a given segment were missing, in
|
||||
// that case it will return null! So we must map
|
||||
// null here, too:
|
||||
BytesRef br2 = (BytesRef) fd.fields[0];
|
||||
if (br2 == null && missingIsNull == false) {
|
||||
br2 = new BytesRef();
|
||||
}
|
||||
|
||||
assertEquals(br, br2);
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static class RandomQuery extends Query {
|
||||
private final long seed;
|
||||
private float density;
|
||||
private final List<BytesRef> docValues;
|
||||
public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
|
||||
|
||||
// density should be 0.0 ... 1.0
|
||||
public RandomQuery(long seed, float density, List<BytesRef> docValues) {
|
||||
this.seed = seed;
|
||||
this.density = density;
|
||||
this.docValues = docValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new ConstantScoreWeight(this) {
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
Random random = new Random(seed ^ context.docBase);
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
|
||||
assertNotNull(idSource);
|
||||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||
for(int docID=0;docID<maxDoc;docID++) {
|
||||
if (random.nextFloat() <= density) {
|
||||
bits.set(docID);
|
||||
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
|
||||
matchValues.add(docValues.get((int) idSource.get(docID)));
|
||||
}
|
||||
}
|
||||
|
||||
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "RandomFilter(density=" + density + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return sameClassAs(other) &&
|
||||
equalsTo(getClass().cast(other));
|
||||
}
|
||||
|
||||
private boolean equalsTo(RandomQuery other) {
|
||||
return seed == other.seed &&
|
||||
docValues == other.docValues &&
|
||||
density == other.density;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = classHash();
|
||||
h = 31 * h + Objects.hash(seed, density);
|
||||
h = 31 * h + System.identityHashCode(docValues);
|
||||
return h;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,592 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
|
||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||
|
||||
public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
assumeFalse("test unsupported on J9 temporarily, see https://issues.apache.org/jira/browse/LUCENE-6522",
|
||||
Constants.JAVA_VENDOR.startsWith("IBM"));
|
||||
}
|
||||
|
||||
public void testByteMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Byte.MIN_VALUE, Byte.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testShortMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Short.MIN_VALUE, Short.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testIntMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Integer.MIN_VALUE, Integer.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testLongMissingVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestMissingVsFieldCache(Long.MIN_VALUE, Long.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedFixedLengthVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||
doTestSortedVsFieldCache(fixedLength, fixedLength);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedVariableLengthVsFieldCache() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestSortedVsFieldCache(1, 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedSetFixedLengthVsUninvertedField() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||
doTestSortedSetVsUninvertedField(fixedLength, fixedLength);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedSetVariableLengthVsUninvertedField() throws Exception {
|
||||
int numIterations = atLeast(1);
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
doTestSortedSetVsUninvertedField(1, 10);
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-4853
|
||||
public void testHugeBinaryValues() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
// FSDirectory because SimpleText will consume gobbs of
|
||||
// space when storing big binary values:
|
||||
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
|
||||
boolean doFixed = random().nextBoolean();
|
||||
int numDocs;
|
||||
int fixedLength = 0;
|
||||
if (doFixed) {
|
||||
// Sometimes make all values fixed length since some
|
||||
// codecs have different code paths for this:
|
||||
numDocs = TestUtil.nextInt(random(), 10, 20);
|
||||
fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
|
||||
} else {
|
||||
numDocs = TestUtil.nextInt(random(), 100, 200);
|
||||
}
|
||||
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
|
||||
List<byte[]> docBytes = new ArrayList<>();
|
||||
long totalBytes = 0;
|
||||
for(int docID=0;docID<numDocs;docID++) {
|
||||
// we don't use RandomIndexWriter because it might add
|
||||
// more docvalues than we expect !!!!
|
||||
|
||||
// Must be > 64KB in size to ensure more than 2 pages in
|
||||
// PagedBytes would be needed:
|
||||
int numBytes;
|
||||
if (doFixed) {
|
||||
numBytes = fixedLength;
|
||||
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||
numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
|
||||
} else {
|
||||
numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
|
||||
}
|
||||
totalBytes += numBytes;
|
||||
if (totalBytes > 5 * 1024*1024) {
|
||||
break;
|
||||
}
|
||||
byte[] bytes = new byte[numBytes];
|
||||
random().nextBytes(bytes);
|
||||
docBytes.add(bytes);
|
||||
Document doc = new Document();
|
||||
BytesRef b = new BytesRef(bytes);
|
||||
b.length = bytes.length;
|
||||
doc.add(new BinaryDocValuesField("field", b));
|
||||
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||
throw iae;
|
||||
} else {
|
||||
// OK: some codecs can't handle binary DV > 32K
|
||||
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||
w.rollback();
|
||||
d.close();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryReader r;
|
||||
try {
|
||||
r = DirectoryReader.open(w);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||
throw iae;
|
||||
} else {
|
||||
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||
|
||||
// OK: some codecs can't handle binary DV > 32K
|
||||
w.rollback();
|
||||
d.close();
|
||||
return;
|
||||
}
|
||||
}
|
||||
w.close();
|
||||
|
||||
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(ar);
|
||||
|
||||
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||
for(int docID=0;docID<docBytes.size();docID++) {
|
||||
Document doc = ar.document(docID);
|
||||
BytesRef bytes = s.get(docID);
|
||||
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||
assertEquals(expected.length, bytes.length);
|
||||
assertEquals(new BytesRef(expected), bytes);
|
||||
}
|
||||
|
||||
assertTrue(codecAcceptsHugeBinaryValues("field"));
|
||||
|
||||
ar.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
private static final int LARGE_BINARY_FIELD_LENGTH = (1 << 15) - 2;
|
||||
|
||||
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
|
||||
public void testHugeBinaryValueLimit() throws Exception {
|
||||
// We only test DVFormats that have a limit
|
||||
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
// FSDirectory because SimpleText will consume gobbs of
|
||||
// space when storing big binary values:
|
||||
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
|
||||
boolean doFixed = random().nextBoolean();
|
||||
int numDocs;
|
||||
int fixedLength = 0;
|
||||
if (doFixed) {
|
||||
// Sometimes make all values fixed length since some
|
||||
// codecs have different code paths for this:
|
||||
numDocs = TestUtil.nextInt(random(), 10, 20);
|
||||
fixedLength = LARGE_BINARY_FIELD_LENGTH;
|
||||
} else {
|
||||
numDocs = TestUtil.nextInt(random(), 100, 200);
|
||||
}
|
||||
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
|
||||
List<byte[]> docBytes = new ArrayList<>();
|
||||
long totalBytes = 0;
|
||||
for(int docID=0;docID<numDocs;docID++) {
|
||||
// we don't use RandomIndexWriter because it might add
|
||||
// more docvalues than we expect !!!!
|
||||
|
||||
// Must be > 64KB in size to ensure more than 2 pages in
|
||||
// PagedBytes would be needed:
|
||||
int numBytes;
|
||||
if (doFixed) {
|
||||
numBytes = fixedLength;
|
||||
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||
numBytes = LARGE_BINARY_FIELD_LENGTH;
|
||||
} else {
|
||||
numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
|
||||
}
|
||||
totalBytes += numBytes;
|
||||
if (totalBytes > 5 * 1024*1024) {
|
||||
break;
|
||||
}
|
||||
byte[] bytes = new byte[numBytes];
|
||||
random().nextBytes(bytes);
|
||||
docBytes.add(bytes);
|
||||
Document doc = new Document();
|
||||
BytesRef b = new BytesRef(bytes);
|
||||
b.length = bytes.length;
|
||||
doc.add(new BinaryDocValuesField("field", b));
|
||||
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
DirectoryReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
|
||||
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(ar
|
||||
);
|
||||
|
||||
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||
for(int docID=0;docID<docBytes.size();docID++) {
|
||||
Document doc = ar.document(docID);
|
||||
BytesRef bytes = s.get(docID);
|
||||
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||
assertEquals(expected.length, bytes.length);
|
||||
assertEquals(new BytesRef(expected), bytes);
|
||||
}
|
||||
|
||||
ar.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||
Document doc = new Document();
|
||||
Field idField = new StringField("id", "", Field.Store.NO);
|
||||
Field indexedField = new StringField("indexed", "", Field.Store.NO);
|
||||
Field dvField = new SortedDocValuesField("dv", new BytesRef());
|
||||
doc.add(idField);
|
||||
doc.add(indexedField);
|
||||
doc.add(dvField);
|
||||
|
||||
// index some docs
|
||||
int numDocs = atLeast(300);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
idField.setStringValue(Integer.toString(i));
|
||||
final int length;
|
||||
if (minLength == maxLength) {
|
||||
length = minLength; // fixed length
|
||||
} else {
|
||||
length = TestUtil.nextInt(random(), minLength, maxLength);
|
||||
}
|
||||
String value = TestUtil.randomSimpleString(random(), length);
|
||||
indexedField.setStringValue(value);
|
||||
dvField.setBytesValue(new BytesRef(value));
|
||||
writer.addDocument(doc);
|
||||
if (random().nextInt(31) == 0) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
|
||||
// delete some docs
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
writer.close();
|
||||
|
||||
// compare
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
|
||||
SortedDocValues actual = r.getSortedDocValues("dv");
|
||||
assertEquals(r.maxDoc(), expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||
|
||||
// index some docs
|
||||
int numDocs = atLeast(300);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
|
||||
doc.add(idField);
|
||||
final int length = TestUtil.nextInt(random(), minLength, maxLength);
|
||||
int numValues = random().nextInt(17);
|
||||
// create a random list of strings
|
||||
List<String> values = new ArrayList<>();
|
||||
for (int v = 0; v < numValues; v++) {
|
||||
values.add(TestUtil.randomSimpleString(random(), minLength, length));
|
||||
}
|
||||
|
||||
// add in any order to the indexed field
|
||||
ArrayList<String> unordered = new ArrayList<>(values);
|
||||
Collections.shuffle(unordered, random());
|
||||
for (String v : values) {
|
||||
doc.add(newStringField("indexed", v, Field.Store.NO));
|
||||
}
|
||||
|
||||
// add in any order to the dv field
|
||||
ArrayList<String> unordered2 = new ArrayList<>(values);
|
||||
Collections.shuffle(unordered2, random());
|
||||
for (String v : unordered2) {
|
||||
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
|
||||
}
|
||||
|
||||
writer.addDocument(doc);
|
||||
if (random().nextInt(31) == 0) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
|
||||
// delete some docs
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
|
||||
// compare per-segment
|
||||
DirectoryReader ir = writer.getReader();
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
|
||||
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
|
||||
assertEquals(r.maxDoc(), expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
|
||||
writer.forceMerge(1);
|
||||
|
||||
// now compare again after the merge
|
||||
ir = writer.getReader();
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
|
||||
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
|
||||
assertEquals(ir.maxDoc(), expected, actual);
|
||||
ir.close();
|
||||
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||
Field idField = new StringField("id", "", Field.Store.NO);
|
||||
Field indexedField = newStringField("indexed", "", Field.Store.NO);
|
||||
Field dvField = new NumericDocValuesField("dv", 0);
|
||||
|
||||
|
||||
// index some docs
|
||||
int numDocs = atLeast(300);
|
||||
// numDocs should be always > 256 so that in case of a codec that optimizes
|
||||
// for numbers of values <= 256, all storage layouts are tested
|
||||
assert numDocs > 256;
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
idField.setStringValue(Integer.toString(i));
|
||||
long value = longs.next();
|
||||
indexedField.setStringValue(Long.toString(value));
|
||||
dvField.setLongValue(value);
|
||||
Document doc = new Document();
|
||||
doc.add(idField);
|
||||
// 1/4 of the time we neglect to add the fields
|
||||
if (random().nextInt(4) > 0) {
|
||||
doc.add(indexedField);
|
||||
doc.add(dvField);
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
if (random().nextInt(31) == 0) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
|
||||
// delete some docs
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
|
||||
// merge some segments and ensure that at least one of them has more than
|
||||
// 256 values
|
||||
writer.forceMerge(numDocs / 256);
|
||||
|
||||
writer.close();
|
||||
|
||||
// compare
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null);
|
||||
Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void doTestMissingVsFieldCache(final long minValue, final long maxValue) throws Exception {
|
||||
doTestMissingVsFieldCache(new LongProducer() {
|
||||
@Override
|
||||
long next() {
|
||||
return TestUtil.nextLong(random(), minValue, maxValue);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static abstract class LongProducer {
|
||||
abstract long next();
|
||||
}
|
||||
|
||||
private void assertEquals(Bits expected, Bits actual) throws Exception {
|
||||
assertEquals(expected.length(), actual.length());
|
||||
for (int i = 0; i < expected.length(); i++) {
|
||||
assertEquals(expected.get(i), actual.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
|
||||
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
|
||||
}
|
||||
|
||||
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
|
||||
// can be null for the segment if no docs actually had any SortedDocValues
|
||||
// in this case FC.getDocTermsOrds returns EMPTY
|
||||
if (actual == null) {
|
||||
assertEquals(expected.getValueCount(), 0);
|
||||
return;
|
||||
}
|
||||
assertEquals(expected.getValueCount(), actual.getValueCount());
|
||||
// compare ord lists
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
expected.setDocument(i);
|
||||
actual.setDocument(i);
|
||||
long expectedOrd;
|
||||
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
|
||||
assertEquals(expectedOrd, actual.nextOrd());
|
||||
}
|
||||
assertEquals(NO_MORE_ORDS, actual.nextOrd());
|
||||
}
|
||||
|
||||
// compare ord dictionary
|
||||
for (long i = 0; i < expected.getValueCount(); i++) {
|
||||
final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
|
||||
final BytesRef actualBytes = actual.lookupOrd(i);
|
||||
assertEquals(expectedBytes, actualBytes);
|
||||
}
|
||||
|
||||
// compare termsenum
|
||||
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
|
||||
}
|
||||
|
||||
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
|
||||
BytesRef ref;
|
||||
|
||||
// sequential next() through all terms
|
||||
while ((ref = expected.next()) != null) {
|
||||
assertEquals(ref, actual.next());
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
assertNull(actual.next());
|
||||
|
||||
// sequential seekExact(ord) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
expected.seekExact(i);
|
||||
actual.seekExact(i);
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// sequential seekExact(BytesRef) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
expected.seekExact(i);
|
||||
assertTrue(actual.seekExact(expected.term()));
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// sequential seekCeil(BytesRef) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
expected.seekExact(i);
|
||||
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekExact(ord)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||
expected.seekExact(randomOrd);
|
||||
actual.seekExact(randomOrd);
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekExact(BytesRef)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||
expected.seekExact(randomOrd);
|
||||
actual.seekExact(expected.term());
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekCeil(BytesRef)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
|
||||
SeekStatus expectedStatus = expected.seekCeil(target);
|
||||
assertEquals(expectedStatus, actual.seekCeil(target));
|
||||
if (expectedStatus != SeekStatus.END) {
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||
String name = TestUtil.getDocValuesFormat(field);
|
||||
return !(name.equals("Memory")); // Direct has a different type of limit
|
||||
}
|
||||
}
|
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
// TODO: what happened to this test... its not actually uninverting?
|
||||
public class TestFieldCacheWithThreads extends LuceneTestCase {
|
||||
|
||||
public void test() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
final List<Long> numbers = new ArrayList<>();
|
||||
final List<BytesRef> binary = new ArrayList<>();
|
||||
final List<BytesRef> sorted = new ArrayList<>();
|
||||
final int numDocs = atLeast(100);
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
Document d = new Document();
|
||||
long number = random().nextLong();
|
||||
d.add(new NumericDocValuesField("number", number));
|
||||
BytesRef bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
|
||||
d.add(new BinaryDocValuesField("bytes", bytes));
|
||||
binary.add(bytes);
|
||||
bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
|
||||
d.add(new SortedDocValuesField("sorted", bytes));
|
||||
sorted.add(bytes);
|
||||
w.addDocument(d);
|
||||
numbers.add(number);
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
final IndexReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
|
||||
assertEquals(1, r.leaves().size());
|
||||
final LeafReader ar = r.leaves().get(0).reader();
|
||||
|
||||
int numThreads = TestUtil.nextInt(random(), 2, 5);
|
||||
List<Thread> threads = new ArrayList<>();
|
||||
final CountDownLatch startingGun = new CountDownLatch(1);
|
||||
for(int t=0;t<numThreads;t++) {
|
||||
final Random threadRandom = new Random(random().nextLong());
|
||||
Thread thread = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
//NumericDocValues ndv = ar.getNumericDocValues("number");
|
||||
NumericDocValues ndv = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false);
|
||||
//BinaryDocValues bdv = ar.getBinaryDocValues("bytes");
|
||||
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false);
|
||||
SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
|
||||
startingGun.await();
|
||||
int iters = atLeast(1000);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
int docID = threadRandom.nextInt(numDocs);
|
||||
switch(threadRandom.nextInt(4)) {
|
||||
case 0:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.INT_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
case 1:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
case 2:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.FLOAT_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
case 3:
|
||||
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.DOUBLE_POINT_PARSER, false).get(docID));
|
||||
break;
|
||||
}
|
||||
BytesRef term = bdv.get(docID);
|
||||
assertEquals(binary.get(docID), term);
|
||||
term = sdv.get(docID);
|
||||
assertEquals(sorted.get(docID), term);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
thread.start();
|
||||
threads.add(thread);
|
||||
}
|
||||
|
||||
startingGun.countDown();
|
||||
|
||||
for(Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void test2() throws Exception {
|
||||
Random random = random();
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
final boolean allowDups = random.nextBoolean();
|
||||
final Set<String> seen = new HashSet<>();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
|
||||
}
|
||||
int numDocs = 0;
|
||||
final List<BytesRef> docValues = new ArrayList<>();
|
||||
|
||||
// TODO: deletions
|
||||
while (numDocs < NUM_DOCS) {
|
||||
final String s;
|
||||
if (random.nextBoolean()) {
|
||||
s = TestUtil.randomSimpleString(random);
|
||||
} else {
|
||||
s = TestUtil.randomUnicodeString(random);
|
||||
}
|
||||
final BytesRef br = new BytesRef(s);
|
||||
|
||||
if (!allowDups) {
|
||||
if (seen.contains(s)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(s);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": s=" + s);
|
||||
}
|
||||
|
||||
final Document doc = new Document();
|
||||
doc.add(new SortedDocValuesField("stringdv", br));
|
||||
doc.add(new NumericDocValuesField("id", numDocs));
|
||||
docValues.add(br);
|
||||
writer.addDocument(doc);
|
||||
numDocs++;
|
||||
|
||||
if (random.nextInt(40) == 17) {
|
||||
// force flush
|
||||
writer.getReader().close();
|
||||
}
|
||||
}
|
||||
|
||||
writer.forceMerge(1);
|
||||
final DirectoryReader r = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
final LeafReader sr = getOnlyLeafReader(r);
|
||||
|
||||
final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);
|
||||
|
||||
final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
|
||||
Thread[] threads = new Thread[NUM_THREADS];
|
||||
for(int thread=0;thread<NUM_THREADS;thread++) {
|
||||
threads[thread] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
Random random = random();
|
||||
final SortedDocValues stringDVDirect;
|
||||
final NumericDocValues docIDToID;
|
||||
try {
|
||||
stringDVDirect = sr.getSortedDocValues("stringdv");
|
||||
docIDToID = sr.getNumericDocValues("id");
|
||||
assertNotNull(stringDVDirect);
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
while(System.nanoTime() < END_TIME) {
|
||||
final SortedDocValues source;
|
||||
source = stringDVDirect;
|
||||
|
||||
for(int iter=0;iter<100;iter++) {
|
||||
final int docID = random.nextInt(sr.maxDoc());
|
||||
BytesRef term = source.get(docID);
|
||||
assertEquals(docValues.get((int) docIDToID.get(docID)), term);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[thread].start();
|
||||
}
|
||||
|
||||
for(Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,497 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.concurrent.CyclicBarrier;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LegacyDoubleField;
|
||||
import org.apache.lucene.document.LegacyFloatField;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/** random assortment of tests against legacy numerics */
|
||||
public class TestLegacyFieldCache extends LuceneTestCase {
|
||||
private static LeafReader reader;
|
||||
private static int NUM_DOCS;
|
||||
private static Directory directory;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
NUM_DOCS = atLeast(500);
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
long theLong = Long.MAX_VALUE;
|
||||
double theDouble = Double.MAX_VALUE;
|
||||
int theInt = Integer.MAX_VALUE;
|
||||
float theFloat = Float.MAX_VALUE;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: setUp");
|
||||
}
|
||||
for (int i = 0; i < NUM_DOCS; i++){
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
|
||||
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
|
||||
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
|
||||
if (i%2 == 0) {
|
||||
doc.add(new LegacyIntField("sparse", i, Field.Store.NO));
|
||||
}
|
||||
|
||||
if (i%2 == 0) {
|
||||
doc.add(new LegacyIntField("numInt", i, Field.Store.NO));
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
IndexReader r = writer.getReader();
|
||||
reader = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
}
|
||||
|
||||
public void testInfoStream() throws Exception {
|
||||
try {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||
cache.getNumerics(reader, "theDouble", new FieldCache.Parser() {
|
||||
@Override
|
||||
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||
}
|
||||
@Override
|
||||
public long parseValue(BytesRef term) {
|
||||
int val = (int) LegacyNumericUtils.prefixCodedToLong(term);
|
||||
if (val<0) val ^= 0x7fffffff;
|
||||
return val;
|
||||
}
|
||||
}, false);
|
||||
assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1);
|
||||
} finally {
|
||||
FieldCache.DEFAULT.setInfoStream(null);
|
||||
FieldCache.DEFAULT.purgeAllCaches();
|
||||
}
|
||||
}
|
||||
|
||||
public void test() throws IOException {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Long.MAX_VALUE - i, longs.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
|
||||
}
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean());
|
||||
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean()));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
|
||||
}
|
||||
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "theLong", null);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", null));
|
||||
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
}
|
||||
|
||||
docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", null));
|
||||
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
}
|
||||
|
||||
public void testEmptyIndex() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
|
||||
writer.close();
|
||||
IndexReader r = DirectoryReader.open(dir);
|
||||
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
||||
TestUtil.checkReader(reader);
|
||||
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
||||
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testDocsWithField() throws Exception {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
|
||||
// The double[] takes one slots, and docsWithField should also
|
||||
// have been populated:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER);
|
||||
|
||||
// No new entries should appear:
|
||||
assertEquals(2, cache.getCacheEntries().length);
|
||||
assertTrue(bits instanceof Bits.MatchAllBits);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.LEGACY_INT_PARSER);
|
||||
assertEquals(4, cache.getCacheEntries().length);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
|
||||
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.LEGACY_INT_PARSER);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, numInts.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetDocsWithFieldThreadSafety() throws Exception {
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
int NUM_THREADS = 3;
|
||||
Thread[] threads = new Thread[NUM_THREADS];
|
||||
final AtomicBoolean failed = new AtomicBoolean();
|
||||
final AtomicInteger iters = new AtomicInteger();
|
||||
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
|
||||
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
cache.purgeAllCaches();
|
||||
iters.incrementAndGet();
|
||||
}
|
||||
});
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
try {
|
||||
while(!failed.get()) {
|
||||
final int op = random().nextInt(3);
|
||||
if (op == 0) {
|
||||
// Purge all caches & resume, once all
|
||||
// threads get here:
|
||||
restart.await();
|
||||
if (iters.get() >= NUM_ITER) {
|
||||
break;
|
||||
}
|
||||
} else if (op == 1) {
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||
}
|
||||
} else {
|
||||
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
|
||||
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||
for (int i = 0; i < docsWithField.length(); i++) {
|
||||
if (i%2 == 0) {
|
||||
assertTrue(docsWithField.get(i));
|
||||
assertEquals(i, ints.get(i));
|
||||
} else {
|
||||
assertFalse(docsWithField.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
failed.set(true);
|
||||
restart.reset();
|
||||
throw new RuntimeException(t);
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[threadIDX].start();
|
||||
}
|
||||
|
||||
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||
threads[threadIDX].join();
|
||||
}
|
||||
assertFalse(failed.get());
|
||||
}
|
||||
|
||||
public void testDocValuesIntegration() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
|
||||
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
|
||||
doc.add(new NumericDocValuesField("numeric", 42));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
|
||||
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
// Binary type: can be retrieved via getTerms()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER, false);
|
||||
});
|
||||
|
||||
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER, false);
|
||||
});
|
||||
|
||||
// Numeric type: can be retrieved via getInts() and so on
|
||||
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER, false);
|
||||
assertEquals(42, numeric.get(0));
|
||||
|
||||
// SortedSet type: can be retrieved via getDocTermOrds()
|
||||
expectThrows(IllegalStateException.class, () -> {
|
||||
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER, false);
|
||||
});
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonexistantFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonIndexedFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("bogusbytes", "bogus"));
|
||||
doc.add(new StoredField("bogusshorts", "bogus"));
|
||||
doc.add(new StoredField("bogusints", "bogus"));
|
||||
doc.add(new StoredField("boguslongs", "bogus"));
|
||||
doc.add(new StoredField("bogusfloats", "bogus"));
|
||||
doc.add(new StoredField("bogusdoubles", "bogus"));
|
||||
doc.add(new StoredField("bogusbits", "bogus"));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
LeafReader ar = getOnlyLeafReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||
assertEquals(0, floats.get(0));
|
||||
|
||||
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||
assertEquals(0, doubles.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
|
||||
public void testLongFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
|
||||
doc.add(field);
|
||||
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final long v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Long.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Long.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextLong(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setLongValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], longs.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
|
||||
public void testIntFieldCache() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
cfg.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||
Document doc = new Document();
|
||||
LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
|
||||
doc.add(field);
|
||||
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final int v;
|
||||
switch (random().nextInt(10)) {
|
||||
case 0:
|
||||
v = Integer.MIN_VALUE;
|
||||
break;
|
||||
case 1:
|
||||
v = 0;
|
||||
break;
|
||||
case 2:
|
||||
v = Integer.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
v = TestUtil.nextInt(random(), -10, 10);
|
||||
break;
|
||||
}
|
||||
values[i] = v;
|
||||
if (v == 0 && random().nextBoolean()) {
|
||||
// missing
|
||||
iw.addDocument(new Document());
|
||||
} else {
|
||||
field.setIntValue(v);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
iw.forceMerge(1);
|
||||
final DirectoryReader reader = iw.getReader();
|
||||
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER, false);
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
assertEquals(values[i], ints.get(i));
|
||||
}
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestNumericTerms32 extends LuceneTestCase {
|
||||
// distance of entries
|
||||
private static int distance;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final int startOffset = - 1 << 15;
|
||||
// number of docs to generate for testing
|
||||
private static int noDocs;
|
||||
|
||||
private static Directory directory = null;
|
||||
private static IndexReader reader = null;
|
||||
private static IndexSearcher searcher = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
noDocs = atLeast(4096);
|
||||
distance = (1 << 30) / noDocs;
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
|
||||
.setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
final FieldType storedInt = new FieldType(LegacyIntField.TYPE_NOT_STORED);
|
||||
storedInt.setStored(true);
|
||||
storedInt.freeze();
|
||||
|
||||
final FieldType storedInt8 = new FieldType(storedInt);
|
||||
storedInt8.setNumericPrecisionStep(8);
|
||||
|
||||
final FieldType storedInt4 = new FieldType(storedInt);
|
||||
storedInt4.setNumericPrecisionStep(4);
|
||||
|
||||
final FieldType storedInt2 = new FieldType(storedInt);
|
||||
storedInt2.setNumericPrecisionStep(2);
|
||||
|
||||
LegacyIntField
|
||||
field8 = new LegacyIntField("field8", 0, storedInt8),
|
||||
field4 = new LegacyIntField("field4", 0, storedInt4),
|
||||
field2 = new LegacyIntField("field2", 0, storedInt2);
|
||||
|
||||
Document doc = new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
doc.add(field8); doc.add(field4); doc.add(field2);
|
||||
|
||||
// Add a series of noDocs docs with increasing int values
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
int val=distance*l+startOffset;
|
||||
field8.setIntValue(val);
|
||||
field4.setIntValue(val);
|
||||
field2.setIntValue(val);
|
||||
|
||||
val=l-(noDocs/2);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
Map<String,Type> map = new HashMap<>();
|
||||
map.put("field2", Type.LEGACY_INTEGER);
|
||||
map.put("field4", Type.LEGACY_INTEGER);
|
||||
map.put("field8", Type.LEGACY_INTEGER);
|
||||
reader = UninvertingReader.wrap(writer.getReader(), map);
|
||||
searcher=newSearcher(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
searcher = null;
|
||||
TestUtil.checkReader(reader);
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
int num = TestUtil.nextInt(random(), 10, 20);
|
||||
for (int i = 0; i < num; i++) {
|
||||
int lower=(int)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
int upper=(int)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
|
||||
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.INT, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
int last = searcher.doc(sd[0].doc).getField(field).numericValue().intValue();
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
int act = searcher.doc(sd[j].doc).getField(field).numericValue().intValue();
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestNumericTerms64 extends LuceneTestCase {
|
||||
// distance of entries
|
||||
private static long distance;
|
||||
// shift the starting of the values to the left, to also have negative values:
|
||||
private static final long startOffset = - 1L << 31;
|
||||
// number of docs to generate for testing
|
||||
private static int noDocs;
|
||||
|
||||
private static Directory directory = null;
|
||||
private static IndexReader reader = null;
|
||||
private static IndexSearcher searcher = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
noDocs = atLeast(4096);
|
||||
distance = (1L << 60) / noDocs;
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
|
||||
.setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
final FieldType storedLong = new FieldType(LegacyLongField.TYPE_NOT_STORED);
|
||||
storedLong.setStored(true);
|
||||
storedLong.freeze();
|
||||
|
||||
final FieldType storedLong8 = new FieldType(storedLong);
|
||||
storedLong8.setNumericPrecisionStep(8);
|
||||
|
||||
final FieldType storedLong4 = new FieldType(storedLong);
|
||||
storedLong4.setNumericPrecisionStep(4);
|
||||
|
||||
final FieldType storedLong6 = new FieldType(storedLong);
|
||||
storedLong6.setNumericPrecisionStep(6);
|
||||
|
||||
final FieldType storedLong2 = new FieldType(storedLong);
|
||||
storedLong2.setNumericPrecisionStep(2);
|
||||
|
||||
LegacyLongField
|
||||
field8 = new LegacyLongField("field8", 0L, storedLong8),
|
||||
field6 = new LegacyLongField("field6", 0L, storedLong6),
|
||||
field4 = new LegacyLongField("field4", 0L, storedLong4),
|
||||
field2 = new LegacyLongField("field2", 0L, storedLong2);
|
||||
|
||||
Document doc = new Document();
|
||||
// add fields, that have a distance to test general functionality
|
||||
doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2);
|
||||
|
||||
// Add a series of noDocs docs with increasing long values, by updating the fields
|
||||
for (int l=0; l<noDocs; l++) {
|
||||
long val=distance*l+startOffset;
|
||||
field8.setLongValue(val);
|
||||
field6.setLongValue(val);
|
||||
field4.setLongValue(val);
|
||||
field2.setLongValue(val);
|
||||
|
||||
val=l-(noDocs/2);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
Map<String,Type> map = new HashMap<>();
|
||||
map.put("field2", Type.LEGACY_LONG);
|
||||
map.put("field4", Type.LEGACY_LONG);
|
||||
map.put("field6", Type.LEGACY_LONG);
|
||||
map.put("field8", Type.LEGACY_LONG);
|
||||
reader = UninvertingReader.wrap(writer.getReader(), map);
|
||||
searcher=newSearcher(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
searcher = null;
|
||||
TestUtil.checkReader(reader);
|
||||
reader.close();
|
||||
reader = null;
|
||||
directory.close();
|
||||
directory = null;
|
||||
}
|
||||
|
||||
private void testSorting(int precisionStep) throws Exception {
|
||||
String field="field"+precisionStep;
|
||||
// 10 random tests, the index order is ascending,
|
||||
// so using a reverse sort field should retun descending documents
|
||||
int num = TestUtil.nextInt(random(), 10, 20);
|
||||
for (int i = 0; i < num; i++) {
|
||||
long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||
if (lower>upper) {
|
||||
long a=lower; lower=upper; upper=a;
|
||||
}
|
||||
Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
|
||||
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
|
||||
if (topDocs.totalHits==0) continue;
|
||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||
assertNotNull(sd);
|
||||
long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
|
||||
for (int j=1; j<sd.length; j++) {
|
||||
long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
|
||||
assertTrue("Docs should be sorted backwards", last>act );
|
||||
last=act;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_8bit() throws Exception {
|
||||
testSorting(8);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_6bit() throws Exception {
|
||||
testSorting(6);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_4bit() throws Exception {
|
||||
testSorting(4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSorting_2bit() throws Exception {
|
||||
testSorting(2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,395 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.uninverting;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LegacyIntField;
|
||||
import org.apache.lucene.document.LegacyLongField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
public class TestUninvertingReader extends LuceneTestCase {
|
||||
|
||||
public void testSortedSetInteger() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetFloat() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
|
||||
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetLong() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_LONG));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetDouble() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
|
||||
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
|
||||
LeafReader ar = ir.leaves().get(0).reader();
|
||||
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||
assertEquals(2, v.getValueCount());
|
||||
|
||||
v.setDocument(0);
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
v.setDocument(1);
|
||||
assertEquals(0, v.nextOrd());
|
||||
assertEquals(1, v.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||
|
||||
BytesRef value = v.lookupOrd(0);
|
||||
assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));
|
||||
|
||||
value = v.lookupOrd(1);
|
||||
assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
|
||||
public void testSortedSetIntegerManyValues() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
final FieldType NO_TRIE_TYPE = new FieldType(LegacyIntField.TYPE_NOT_STORED);
|
||||
NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
|
||||
|
||||
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||
UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
|
||||
UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
|
||||
UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
|
||||
UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
|
||||
final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
|
||||
MULTI_VALUES.add("trie_multi");
|
||||
MULTI_VALUES.add("notrie_multi");
|
||||
|
||||
|
||||
final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
|
||||
final int MIN = TestUtil.nextInt(random(), 10, 100);
|
||||
final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
|
||||
final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
|
||||
|
||||
{ // (at least) one doc should have every value, so that at least one segment has every value
|
||||
final Document doc = new Document();
|
||||
for (int i = MIN; i <= MAX; i++) {
|
||||
doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
|
||||
}
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
// now add some more random docs (note: starting at i=1 because of previously added doc)
|
||||
for (int i = 1; i < NUM_DOCS; i++) {
|
||||
final Document doc = new Document();
|
||||
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||
doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
|
||||
}
|
||||
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||
int numMulti = atLeast(1);
|
||||
while (0 < numMulti--) {
|
||||
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||
doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
|
||||
doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
|
||||
}
|
||||
}
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
iw.close();
|
||||
|
||||
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||
TestUtil.checkReader(ir);
|
||||
|
||||
final int NUM_LEAVES = ir.leaves().size();
|
||||
|
||||
// check the leaves: no more then total set size
|
||||
for (LeafReaderContext rc : ir.leaves()) {
|
||||
final LeafReader ar = rc.reader();
|
||||
for (String f : UNINVERT_MAP.keySet()) {
|
||||
final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
|
||||
final long valSetSize = v.getValueCount();
|
||||
assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " +
|
||||
valSetSize + " from: " + ar.toString(),
|
||||
valSetSize <= EXPECTED_VALSET_SIZE);
|
||||
|
||||
if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
|
||||
// tighter check on multi fields in single segment index since we know one doc has all of them
|
||||
assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size",
|
||||
EXPECTED_VALSET_SIZE, valSetSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check the composite of all leaves: exact expectation of set size
|
||||
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||
TestUtil.checkReader(composite);
|
||||
|
||||
for (String f : MULTI_VALUES) {
|
||||
final SortedSetDocValues v = composite.getSortedSetDocValues(f);
|
||||
final long valSetSize = v.getValueCount();
|
||||
assertEquals(f + ": Composite reader value set should have had exactly expected size",
|
||||
EXPECTED_VALSET_SIZE, valSetSize);
|
||||
}
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSortedSetEmptyIndex() throws IOException {
|
||||
final Directory dir = newDirectory();
|
||||
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
iw.close();
|
||||
|
||||
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||
for (Type t : EnumSet.allOf(Type.class)) {
|
||||
UNINVERT_MAP.put(t.name(), t);
|
||||
}
|
||||
|
||||
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||
TestUtil.checkReader(ir);
|
||||
|
||||
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||
TestUtil.checkReader(composite);
|
||||
|
||||
for (String f : UNINVERT_MAP.keySet()) {
|
||||
// check the leaves
|
||||
// (normally there are none for an empty index, so this is really just future
|
||||
// proofing in case that changes for some reason)
|
||||
for (LeafReaderContext rc : ir.leaves()) {
|
||||
final LeafReader ar = rc.reader();
|
||||
assertNull(f + ": Expected no doc values from empty index (leaf)",
|
||||
ar.getSortedSetDocValues(f));
|
||||
}
|
||||
|
||||
// check the composite
|
||||
assertNull(f + ": Expected no doc values from empty index (composite)",
|
||||
composite.getSortedSetDocValues(f));
|
||||
|
||||
}
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testFieldInfos() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
|
||||
Document doc = new Document();
|
||||
BytesRef idBytes = new BytesRef("id");
|
||||
doc.add(new StringField("id", idBytes, Store.YES));
|
||||
doc.add(new LegacyIntField("int", 5, Store.YES));
|
||||
doc.add(new NumericDocValuesField("dv", 5));
|
||||
doc.add(new IntPoint("dint", 5));
|
||||
doc.add(new StoredField("stored", 5)); // not indexed
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.forceMerge(1);
|
||||
iw.close();
|
||||
|
||||
Map<String, Type> uninvertingMap = new HashMap<>();
|
||||
uninvertingMap.put("int", Type.LEGACY_INTEGER);
|
||||
uninvertingMap.put("dv", Type.LEGACY_INTEGER);
|
||||
uninvertingMap.put("dint", Type.INTEGER_POINT);
|
||||
|
||||
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||
uninvertingMap);
|
||||
LeafReader leafReader = ir.leaves().get(0).reader();
|
||||
|
||||
FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
|
||||
assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
|
||||
assertEquals(0, intFInfo.getPointDimensionCount());
|
||||
assertEquals(0, intFInfo.getPointNumBytes());
|
||||
|
||||
FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
|
||||
assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
|
||||
assertEquals(1, dintFInfo.getPointDimensionCount());
|
||||
assertEquals(4, dintFInfo.getPointNumBytes());
|
||||
|
||||
FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
|
||||
assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());
|
||||
|
||||
FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
|
||||
assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());
|
||||
|
||||
TestUtil.checkReader(ir);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -22,25 +22,24 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.ResultContext;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.CopyField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
|
Loading…
Reference in New Issue